Update README.md
Browse files
README.md
CHANGED
@@ -13,6 +13,43 @@ A CLIP ViT-L/14 model trained using [OpenCLIP](https://github.com/mlfoundations/
|
|
13 |
|
14 |
# How to Use
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Training Details
|
18 |
|
|
|
13 |
|
14 |
# How to Use
|
15 |
|
16 |
+
## Installation
|
17 |
+
|
18 |
+
```bash
|
19 |
+
$ pip install open_clip_torch
|
20 |
+
```
|
21 |
+
|
22 |
+
## Zero-shot Image Classification
|
23 |
+
```python
|
24 |
+
import open_clip
|
25 |
+
|
26 |
+
model, preprocess = open_clip.create_model_from_pretrained('hf-hub:speed/llm-jp-roberta-ViT-L-14-relaion-1.5B-lr5e-4-bs8k-accum4-20241218-epoch90')
|
27 |
+
tokenizer = open_clip.get_tokenizer('hf-hub:speed/llm-jp-roberta-ViT-L-14-relaion-1.5B-lr5e-4-bs8k-accum4-20241218-epoch90')
|
28 |
+
|
29 |
+
import torch
|
30 |
+
from PIL import Image
|
31 |
+
import requests
|
32 |
+
|
33 |
+
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
|
34 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
35 |
+
image = preprocess(image).unsqueeze(0)
|
36 |
+
text = tokenizer(["猫", "犬", "鳥"])
|
37 |
+
|
38 |
+
with torch.no_grad(), torch.cuda.amp.autocast():
|
39 |
+
image_features = model.encode_image(image)
|
40 |
+
text_features = model.encode_text(text)
|
41 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
42 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
43 |
+
|
44 |
+
text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
45 |
+
|
46 |
+
print("Label probs:", text_probs)
|
47 |
+
```
|
48 |
+
|
49 |
+
Reference:
|
50 |
+
- [Using OpenCLIP at Hugging Face](https://huggingface.co/docs/hub/en/open_clip), HuggingFace Docs
|
51 |
+
- OpenCLIP [repository](https://github.com/mlfoundations/open_clip)
|
52 |
+
|
53 |
|
54 |
# Training Details
|
55 |
|