Update README.md
Browse files
README.md
CHANGED
@@ -39,8 +39,8 @@ The original code can be found [here](https://github.com/Ucas-HaoranWei/GOT-OCR2
|
|
39 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
40 |
|
41 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
42 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
43 |
-
>>> processor = AutoProcessor.from_pretrained("
|
44 |
|
45 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/image_ocr.jpg"
|
46 |
>>> inputs = processor(image, return_tensors="pt").to(device)
|
@@ -63,8 +63,8 @@ The original code can be found [here](https://github.com/Ucas-HaoranWei/GOT-OCR2
|
|
63 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
64 |
|
65 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
66 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
67 |
-
>>> processor = AutoProcessor.from_pretrained("
|
68 |
|
69 |
>>> image1 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/multi_box.png"
|
70 |
>>> image2 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/image_ocr.jpg"
|
@@ -91,8 +91,8 @@ GOT-OCR2 can also generate formatted text, such as markdown or LaTeX. Here is an
|
|
91 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
92 |
|
93 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
94 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
95 |
-
>>> processor = AutoProcessor.from_pretrained("
|
96 |
|
97 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/latex.png"
|
98 |
>>> inputs = processor(image, return_tensors="pt", format=True).to(device)
|
@@ -119,8 +119,8 @@ Here is an example of how to process multiple pages at once:
|
|
119 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
120 |
|
121 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
122 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
123 |
-
>>> processor = AutoProcessor.from_pretrained("
|
124 |
|
125 |
>>> image1 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/page1.png"
|
126 |
>>> image2 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/page2.png"
|
@@ -148,8 +148,8 @@ Here is an example of how to process cropped patches:
|
|
148 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
149 |
|
150 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
151 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
152 |
-
>>> processor = AutoProcessor.from_pretrained("
|
153 |
|
154 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/one_column.png"
|
155 |
>>> inputs = processor(image, return_tensors="pt", format=True, crop_to_patches=True, max_patches=3).to(device)
|
@@ -174,8 +174,8 @@ GOT supports interactive OCR, where the user can specify the region to be recogn
|
|
174 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
175 |
|
176 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
177 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
178 |
-
>>> processor = AutoProcessor.from_pretrained("
|
179 |
|
180 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/multi_box.png"
|
181 |
>>> inputs = processor(image, return_tensors="pt", color="green").to(device) # or box=[x1, y1, x2, y2] for coordinates (image pixels)
|
@@ -202,8 +202,8 @@ Here is an example of how to process sheet music:
|
|
202 |
>>> import verovio
|
203 |
|
204 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
205 |
-
>>> model = AutoModelForImageTextToText.from_pretrained("
|
206 |
-
>>> processor = AutoProcessor.from_pretrained("
|
207 |
|
208 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/sheet_music.png"
|
209 |
>>> inputs = processor(image, return_tensors="pt", format=True).to(device)
|
|
|
39 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
40 |
|
41 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
42 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
43 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
44 |
|
45 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/image_ocr.jpg"
|
46 |
>>> inputs = processor(image, return_tensors="pt").to(device)
|
|
|
63 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
64 |
|
65 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
66 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
67 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
68 |
|
69 |
>>> image1 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/multi_box.png"
|
70 |
>>> image2 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/image_ocr.jpg"
|
|
|
91 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
92 |
|
93 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
94 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
95 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
96 |
|
97 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/latex.png"
|
98 |
>>> inputs = processor(image, return_tensors="pt", format=True).to(device)
|
|
|
119 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
120 |
|
121 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
122 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
123 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
124 |
|
125 |
>>> image1 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/page1.png"
|
126 |
>>> image2 = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/page2.png"
|
|
|
148 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
149 |
|
150 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
151 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", torch_dtype=torch.bfloat16, device_map=device)
|
152 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
153 |
|
154 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/one_column.png"
|
155 |
>>> inputs = processor(image, return_tensors="pt", format=True, crop_to_patches=True, max_patches=3).to(device)
|
|
|
174 |
>>> from transformers import AutoProcessor, AutoModelForImageTextToText
|
175 |
|
176 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
177 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
178 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
179 |
|
180 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/multi_box.png"
|
181 |
>>> inputs = processor(image, return_tensors="pt", color="green").to(device) # or box=[x1, y1, x2, y2] for coordinates (image pixels)
|
|
|
202 |
>>> import verovio
|
203 |
|
204 |
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
205 |
+
>>> model = AutoModelForImageTextToText.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf", device_map=device)
|
206 |
+
>>> processor = AutoProcessor.from_pretrained("stepfun-ai/GOT-OCR-2.0-hf")
|
207 |
|
208 |
>>> image = "https://huggingface.co/datasets/hf-internal-testing/fixtures_got_ocr/resolve/main/sheet_music.png"
|
209 |
>>> inputs = processor(image, return_tensors="pt", format=True).to(device)
|