Spaces:

lixin4ever
/

VideoLLaMA3

Running on Zero

App Files Files Community

lixin4ever commited on 15 days ago

Commit

593a1aa

verified ·

1 Parent(s): 0911cc7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -11,19 +11,21 @@ HEADER = """
 """
 class VideoLLaMA3GradioInterface(object):
-    def __init__(self, model_name, device="cpu", example_dir=None, **server_kwargs):
         self.device = device
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16,
-            attn_implementation="flash_attention_2",
-        )
-        self.model.to(self.device)
-        self.processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
         self.server_kwargs = server_kwargs
         self.image_formats = ("png", "jpg", "jpeg")
@@ -138,7 +140,7 @@ class VideoLLaMA3GradioInterface(object):
             "max_new_tokens": max_new_tokens
         }
-        inputs = self.processor(
             conversation=new_messages,
             add_system_prompt=True,
             add_generation_prompt=True,
@@ -148,14 +150,14 @@ class VideoLLaMA3GradioInterface(object):
         if "pixel_values" in inputs:
             inputs["pixel_values"] = inputs["pixel_values"].to(torch.bfloat16)
-        streamer = TextIteratorStreamer(self.processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {
             **inputs,
             **generation_config,
             "streamer": streamer,
         }
-        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
         thread.start()
         messages.append({"role": "assistant", "content": ""})
@@ -169,8 +171,7 @@ class VideoLLaMA3GradioInterface(object):
 if __name__ == "__main__":
     interface = VideoLLaMA3GradioInterface(
-        model_name="DAMO-NLP-SG/VideoLLaMA3-7B",
-        device="cuda",
         example_dir="./examples",
     )
     interface.launch()

 """
+device = "cuda"
+model = AutoModelForCausalLM.from_pretrained(
+    "DAMO-NLP-SG/VideoLLaMA3-7B",
+    trust_remote_code=True,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
 class VideoLLaMA3GradioInterface(object):
+    def __init__(self, device="cpu", example_dir=None, **server_kwargs):
         self.device = device
         self.server_kwargs = server_kwargs
         self.image_formats = ("png", "jpg", "jpeg")
             "max_new_tokens": max_new_tokens
         }
+        inputs = processor(
             conversation=new_messages,
             add_system_prompt=True,
             add_generation_prompt=True,
         if "pixel_values" in inputs:
             inputs["pixel_values"] = inputs["pixel_values"].to(torch.bfloat16)
+        streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {
             **inputs,
             **generation_config,
             "streamer": streamer,
         }
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
         messages.append({"role": "assistant", "content": ""})
 if __name__ == "__main__":
     interface = VideoLLaMA3GradioInterface(
+        device=device,
         example_dir="./examples",
     )
     interface.launch()