RaushanTurganbay HF staff commited on
Commit
cb42003
·
verified ·
1 Parent(s): 4cf9d8c

Add chat template usage examples

Browse files
Files changed (1) hide show
  1. README.md +31 -5
README.md CHANGED
@@ -70,7 +70,22 @@ def read_video_pyav(container, indices):
70
  model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
71
  processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
72
 
73
- prompt = "USER: <video>Why is this video funny? ASSISTANT:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
75
  container = av.open(video_path)
76
 
@@ -89,11 +104,22 @@ print(processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_to
89
  # Generate from images and videos mix
90
  url = "http://images.cocodataset.org/val2017/000000039769.jpg"
91
  image = Image.open(requests.get(url, stream=True).raw)
92
- prompt = [
93
- "USER: <image> How many cats are there in the image? ASSISTANT:",
94
- "USER: <video>Why is this video funny? ASSISTANT:"
 
 
 
 
 
 
 
 
 
95
  ]
96
- inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
 
 
97
 
98
  # Generate
99
  generate_ids = model.generate(**inputs, max_length=50)
 
70
  model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
71
  processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
72
 
73
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
74
+ # Each value in "content" has to be a list of dicts with types ("text", "image", "video")
75
+ conversation = [
76
+ {
77
+
78
+ "role": "user",
79
+ "content": [
80
+ {"type": "text", "text": "Why is this video funny?"},
81
+ {"type": "video"},
82
+ ],
83
+ },
84
+ ]
85
+
86
+ # will be formatted as USER: <video>\nWhy is this video funny? ASSISTANT:"
87
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
88
+
89
  video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
90
  container = av.open(video_path)
91
 
 
104
  # Generate from images and videos mix
105
  url = "http://images.cocodataset.org/val2017/000000039769.jpg"
106
  image = Image.open(requests.get(url, stream=True).raw)
107
+
108
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
109
+ # Each value in "content" has to be a list of dicts with types ("text", "image", "video")
110
+ conversation_image = [
111
+ {
112
+
113
+ "role": "user",
114
+ "content": [
115
+ {"type": "text", "text": "How many cats are there in the image?"},
116
+ {"type": "image"},
117
+ ],
118
+ },
119
  ]
120
+ prompt_image = processor.apply_chat_template(conversation_image, add_generation_prompt=True)
121
+
122
+ inputs = processor(text=[prompt_image, prompt], images=image, videos=clip, padding=True, return_tensors="pt")
123
 
124
  # Generate
125
  generate_ids = model.generate(**inputs, max_length=50)