aifeifei798 commited on
Commit
03ab9bd
·
verified ·
1 Parent(s): e19cee7

Upload feifeiflorence.py

Browse files
Files changed (1) hide show
  1. feifeilib/feifeiflorence.py +61 -61
feifeilib/feifeiflorence.py CHANGED
@@ -1,61 +1,61 @@
1
- from PIL import Image
2
- import spaces
3
- import gradio as gr
4
- from transformers import (
5
- AutoProcessor,
6
- AutoModelForCausalLM,
7
- )
8
- import torch
9
- import subprocess
10
- from feifeilib.feifeitexttoimg import feifeitexttoimg
11
-
12
- subprocess.run(
13
- "pip install flash-attn --no-build-isolation",
14
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
- shell=True,
16
- )
17
-
18
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
19
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
20
-
21
- Florence_models = AutoModelForCausalLM.from_pretrained(
22
- "microsoft/Florence-2-large",
23
- torch_dtype=torch_dtype,
24
- trust_remote_code=True).to(device)
25
-
26
- Florence_processors = AutoProcessor.from_pretrained(
27
- "microsoft/Florence-2-large", trust_remote_code=True)
28
-
29
-
30
- @spaces.GPU
31
- def feifeiflorence(
32
- image,
33
- progress=gr.Progress(track_tqdm=True),
34
- ):
35
-
36
- image = Image.fromarray(image)
37
- task_prompt = "<MORE_DETAILED_CAPTION>"
38
-
39
- if image.mode != "RGB":
40
- image = image.convert("RGB")
41
-
42
- inputs = Florence_processors(text=task_prompt,
43
- images=image,
44
- return_tensors="pt").to(device, torch_dtype)
45
-
46
- generated_ids = Florence_models.generate(
47
- input_ids=inputs["input_ids"],
48
- pixel_values=inputs["pixel_values"],
49
- max_new_tokens=1024,
50
- num_beams=3,
51
- do_sample=False,
52
- )
53
- generated_text = Florence_processors.batch_decode(
54
- generated_ids, skip_special_tokens=False)[0]
55
- parsed_answer = Florence_processors.post_process_generation(
56
- generated_text,
57
- task=task_prompt,
58
- image_size=(image.width, image.height))
59
- out_text=parsed_answer["<MORE_DETAILED_CAPTION>"]
60
- out_img, speed = feifeitexttoimg(out_text)
61
- return out_text,out_img
 
1
+ from PIL import Image
2
+ import spaces
3
+ import gradio as gr
4
+ from transformers import (
5
+ AutoProcessor,
6
+ AutoModelForCausalLM,
7
+ )
8
+ import torch
9
+ import subprocess
10
+ from feifeilib.feifeitexttoimg import feifeitexttoimg
11
+
12
+ subprocess.run(
13
+ "pip install flash-attn --no-build-isolation",
14
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
+ shell=True,
16
+ )
17
+
18
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
19
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
20
+
21
+ Florence_models = AutoModelForCausalLM.from_pretrained(
22
+ "microsoft/Florence-2-large",
23
+ torch_dtype=torch_dtype,
24
+ trust_remote_code=True).to(device)
25
+
26
+ Florence_processors = AutoProcessor.from_pretrained(
27
+ "microsoft/Florence-2-large", trust_remote_code=True)
28
+
29
+
30
+ @spaces.GPU
31
+ def feifeiflorence(
32
+ image,
33
+ progress=gr.Progress(track_tqdm=True),
34
+ ):
35
+ image = Image.fromarray(image)
36
+ task_prompt = "<MORE_DETAILED_CAPTION>"
37
+
38
+ if image.mode != "RGB":
39
+ image = image.convert("RGB")
40
+
41
+ inputs = Florence_processors(text=task_prompt,
42
+ images=image,
43
+ return_tensors="pt").to(device, torch_dtype)
44
+
45
+ generated_ids = Florence_models.generate(
46
+ input_ids=inputs["input_ids"],
47
+ pixel_values=inputs["pixel_values"],
48
+ max_new_tokens=1024,
49
+ num_beams=3,
50
+ do_sample=False,
51
+ )
52
+ generated_text = Florence_processors.batch_decode(
53
+ generated_ids, skip_special_tokens=False)[0]
54
+ parsed_answer = Florence_processors.post_process_generation(
55
+ generated_text,
56
+ task=task_prompt,
57
+ image_size=(image.width, image.height))
58
+ out_text=parsed_answer["<MORE_DETAILED_CAPTION>"]
59
+ width, height = image.size
60
+ out_img, speed = feifeitexttoimg(out_text,width=width,height=height)
61
+ return out_text,f"width={width} height={height}",out_img