Spaces:

ajeetraina
/

demollm

Runtime error

ajeetsraina commited on Sep 22, 2023

Commit

26faa32

1 Parent(s): 2bdbc80

Added

Files changed (2) hide show

Dockerfile ADDED Viewed

+# Specify a base image that contains the necessary dependencies for running Hugging Face LLMs.
+FROM python:3.9-transformers
+# Copy your Hugging Face LLM files into the container.
+COPY . /app
+# Set the working directory to the directory where your Hugging Face LLM files are located.
+WORKDIR /app
+# Expose port 8000 for your Hugging Face LLM.
+EXPOSE 8000
+# Start the Hugging Face LLM server.
+CMD ["python", "inference.py"]

inference.py ADDED Viewed

+import transformers
+class LLMInferenceServer:
+    def __init__(self, model_name):
+        self.model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
+    def generate(self, prompt, max_length=100):
+        inputs = transformers.InputFeatures(input_ids=[self.model.config.bos_token_id], attention_mask=[1])
+        output = self.model.generate(inputs, max_length=max_length, prompt=prompt)
+        return output[0]
+if __name__ == '__main__':
+    model_name = "google/bigbird-roberta-base"
+    server = LLMInferenceServer(model_name)
+    from flask import Flask, request, jsonify
+    app = Flask(__name__)
+    @app.route("/generate", methods=["POST"])
+    def generate():
+        prompt = request.json["prompt"]
+        max_length = request.json["max_length"]
+        response = server.generate(prompt, max_length)
+        return jsonify({"response": response})
+    app.run(host="0.0.0.0", port=8000)