Spaces:

Streetmarkets
/

openFashionClip

Running

App Files Files Community

im commited on 22 days ago

Commit

4bd62d7

1 Parent(s): d933c69

change

Browse files

Files changed (4) hide show

app copy.py +257 -0
app.py +136 -147
debug_image.jpg +0 -0
temp_image.jpg +0 -0

app copy.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import uuid
+import requests
+from PIL import Image
+import numpy as np
+import gradio as gr
+from encoder import FashionCLIPEncoder
+# Constants
+REQUESTS_HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+BATCH_SIZE = 30  # Define batch size for processing
+# Initialize encoder
+encoder = FashionCLIPEncoder()
+# Helper function to download images
+def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
+    try:
+        response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
+        if response.status_code == 200:
+            return Image.open(response.raw).convert("RGB")  # Ensure consistent format
+        return None
+    except Exception as e:
+        print(f"Error downloading image: {e}")
+        return None
+# Embedding function for a batch of images
+def batch_process_images(image_urls: str):
+    # Split the input string by commas and strip whitespace
+    urls = [url.strip() for url in image_urls.split(",") if url.strip()]
+    if not urls:
+        return {"error": "No valid image URLs provided."}
+    results = []
+    batch_urls, batch_images = [], []
+    for url in urls:
+        try:
+            # Download image
+            image = download_image_as_pil(url)
+            if not image:
+                results.append({"image_url": url, "error": "Failed to download image"})
+                continue
+            batch_urls.append(url)
+            batch_images.append(image)
+            # Process batch when reaching batch size
+            if len(batch_images) == BATCH_SIZE:
+                process_batch(batch_urls, batch_images, results)
+                batch_urls, batch_images = [], []
+        except Exception as e:
+            results.append({"image_url": url, "error": str(e)})
+    # Process remaining images in the last batch
+    if batch_images:
+        process_batch(batch_urls, batch_images, results)
+    return results
+# Helper function to process a batch
+def process_batch(batch_urls, batch_images, results):
+    try:
+        # Generate embeddings
+        embeddings = encoder.encode_images(batch_images)
+        for url, embedding in zip(batch_urls, embeddings):
+            # Normalize embedding
+            embedding_normalized = embedding / np.linalg.norm(embedding)
+            # Append results
+            results.append({
+                "image_url": url,
+                "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
+                "success": True
+            })
+    except Exception as e:
+        for url in batch_urls:
+            results.append({"image_url": url, "error": str(e)})
+# Gradio Interface
+iface = gr.Interface(
+    fn=batch_process_images,
+    inputs=gr.Textbox(
+        lines=5,
+        placeholder="Enter image URLs separated by commas",
+        label="Batch Image URLs",
+    ),
+    outputs=gr.JSON(label="Embedding Results"),
+    title="Batch Fashion CLIP Embedding API",
+    description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
+    examples=[
+        ["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
+    ],
+)
+# Launch Gradio App
+if __name__ == "__main__":
+    iface.launch()
+# import os
+# import requests
+# from PIL import Image
+# import numpy as np
+# from encoder import FashionCLIPEncoder
+# from pinecone import Pinecone
+# from dotenv import load_dotenv
+# # Load environment variables
+# load_dotenv()
+# # Constants
+# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
+# REQUESTS_HEADERS = {
+#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+# }
+# BATCH_SIZE = 30  # Define batch size for processing
+# # Ensure API key and index name are set
+# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
+#     raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")
+# # Initialize Pinecone
+# pc = Pinecone(api_key=PINECONE_API_KEY)
+# # Connect to the existing index
+# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
+#     raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")
+# index = pc.Index(PINECONE_INDEX_NAME)
+# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")
+# # Initialize encoder
+# encoder = FashionCLIPEncoder()
+# # Helper function to download images
+# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
+#     """
+#     Downloads an image from a URL and converts it to a PIL Image in RGB format.
+#     """
+#     try:
+#         response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
+#         if response.status_code == 200:
+#             return Image.open(response.raw).convert("RGB")  # Ensure consistent format
+#         return None
+#     except Exception as e:
+#         print(f"Error downloading image from {url}: {e}")
+#         return None
+# # Function to process a batch of images
+# def batch_process_images(image_data: list, namespace: str = None):
+#     """
+#     Processes a batch of images, generates embeddings, and uploads them to Pinecone.
+#     Args:
+#         image_data (list): A list of dictionaries with "id" and "url" keys.
+#         namespace (str): Namespace for the Pinecone index.
+#     Returns:
+#         list: A list of results containing the embedding preview or error information.
+#     """
+#     results = []
+#     batch_ids, batch_urls, batch_images = [], [], []
+#     for data in image_data:
+#         try:
+#             image_id = data["id"]
+#             image_url = data["url"]
+#             # Download the image
+#             image = download_image_as_pil(image_url)
+#             if not image:
+#                 results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
+#                 continue
+#             batch_ids.append(image_id)
+#             batch_urls.append(image_url)
+#             batch_images.append(image)
+#             # Process batch when reaching batch size
+#             if len(batch_images) == BATCH_SIZE:
+#                 process_batch(batch_ids, batch_urls, batch_images, results, namespace)
+#                 batch_ids, batch_urls, batch_images = [], [], []
+#         except Exception as e:
+#             results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})
+#     # Process remaining images in the last batch
+#     if batch_images:
+#         process_batch(batch_ids, batch_urls, batch_images, results, namespace)
+#     return results
+# # Function to process a batch and upload to Pinecone
+# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
+#     """
+#     Processes a batch of images and generates embeddings, uploading them to Pinecone.
+#     Args:
+#         batch_ids (list): List of IDs for the images.
+#         batch_urls (list): List of image URLs.
+#         batch_images (list): List of PIL images.
+#         results (list): List to store results for each image.
+#         namespace (str): Namespace for the Pinecone index.
+#     """
+#     try:
+#         # Generate embeddings
+#         embeddings = encoder.encode_images(batch_images)
+#         vectors = []
+#         for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
+#             # Normalize embedding
+#             embedding_normalized = embedding / np.linalg.norm(embedding)
+#             # Append results
+#             result = {
+#                 "id": image_id,
+#                 "url": url,
+#                 "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
+#                 "success": True
+#             }
+#             results.append(result)
+#             # Prepare vector for upserting
+#             vectors.append({
+#                 "id": str(image_id),
+#                 "values": embedding_normalized.tolist(),
+#                 "metadata": {"url": url}
+#             })
+#         # Upload vectors to Pinecone
+#         index.upsert(vectors=vectors, namespace=namespace)
+#     except Exception as e:
+#         for image_id, url in zip(batch_ids, batch_urls):
+#             results.append({"id": image_id, "url": url, "error": str(e)})
+# # Example usage
+# if __name__ == "__main__":
+#     # Example input data
+#     image_data = [
+#         {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
+#         {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
+#     ]
+#     # Process images and upload to Pinecone under namespace "ns1"
+#     results = batch_process_images(image_data, namespace="ns1")
+#     # Print results
+#     for result in results:
+#         print(result)

app.py CHANGED Viewed

@@ -1,188 +1,177 @@
-# import gradio as gr
-# from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
-# from PIL import Image
-# import requests
-# import torch
-# # Load the FashionCLIP processor and model
-# processor = AutoProcessor.from_pretrained("patrickjohncyh/fashion-clip")
-# model = AutoModelForZeroShotImageClassification.from_pretrained("patrickjohncyh/fashion-clip")
-# # Define the function to process both text and image inputs
-# def generate_embeddings(input_text=None, input_image_url=None):
-#     try:
-#         if input_image_url:
-#             # Process image with accompanying text
-#             response = requests.get(input_image_url, stream=True)
-#             response.raise_for_status()
-#             image = Image.open(response.raw)
-#             # Use a default text if none is provided
-#             if not input_text:
-#                 input_text = "this is an image"
-#             # Prepare inputs for the model
-#             inputs = processor(
-#                 text=[input_text],
-#                 images=image,
-#                 return_tensors="pt",
-#                 padding=True
-#             )
-#             with torch.no_grad():
-#                 outputs = model(**inputs)
-#             image_embedding = outputs.logits_per_image.cpu().numpy().tolist()
-#             return {
-#                 "type": "image_embedding",
-#                 "input_image_url": input_image_url,
-#                 "input_text": input_text,
-#                 "embedding": image_embedding
-#             }
-#         elif input_text:
-#             # Process text input only
-#             inputs = processor(
-#                 text=[input_text],
-#                 images=None,
-#                 return_tensors="pt",
-#                 padding=True
-#             )
-#             with torch.no_grad():
-#                 outputs = model(**inputs)
-#             text_embedding = outputs.logits_per_text.cpu().numpy().tolist()
-#             return {
-#                 "type": "text_embedding",
-#                 "input_text": input_text,
-#                 "embedding": text_embedding
-#             }
-#         else:
-#             return {"error": "Please provide either a text query or an image URL."}
-#     except Exception as e:
-#         return {"error": str(e)}
-# # Create the Gradio interface
-# interface = gr.Interface(
-#     fn=generate_embeddings,
-#     inputs=[
-#         gr.Textbox(label="Text Query (Optional)", placeholder="e.g., red dress (used with image or for text embedding)"),
-#         gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg (used with or without text query)")
-#     ],
-#     outputs="json",
-#     title="FashionCLIP Combined Embedding API",
-#     description="Provide a text query and/or an image URL to compute embeddings for vector search."
-# )
-# # Launch the app
-# if __name__ == "__main__":
-#     interface.launch()
-# print(generate_embeddings("red dress"))
-import uuid
 import requests
-from PIL import Image
 import numpy as np
-import gradio as gr
 from encoder import FashionCLIPEncoder
 # Constants
 REQUESTS_HEADERS = {
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
-BATCH_SIZE = 30  # Define batch size for processing
-# Initialize encoder
 encoder = FashionCLIPEncoder()
-# Helper function to download images
 def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
     try:
         response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
-        if response.status_code == 200:
-            return Image.open(response.raw).convert("RGB")  # Ensure consistent format
         return None
     except Exception as e:
         print(f"Error downloading image: {e}")
         return None
-# Embedding function for a batch of images
-def batch_process_images(image_urls: str):
-    # Split the input string by commas and strip whitespace
-    urls = [url.strip() for url in image_urls.split(",") if url.strip()]
-    if not urls:
-        return {"error": "No valid image URLs provided."}
     results = []
-    batch_urls, batch_images = [], []
-    for url in urls:
         try:
             # Download image
-            image = download_image_as_pil(url)
             if not image:
-                results.append({"image_url": url, "error": "Failed to download image"})
                 continue
-            batch_urls.append(url)
             batch_images.append(image)
             # Process batch when reaching batch size
             if len(batch_images) == BATCH_SIZE:
-                process_batch(batch_urls, batch_images, results)
-                batch_urls, batch_images = [], []
         except Exception as e:
-            results.append({"image_url": url, "error": str(e)})
     # Process remaining images in the last batch
     if batch_images:
-        process_batch(batch_urls, batch_images, results)
     return results
-# Helper function to process a batch
-def process_batch(batch_urls, batch_images, results):
-    try:
-        # Generate embeddings
-        embeddings = encoder.encode_images(batch_images)
-        for url, embedding in zip(batch_urls, embeddings):
-            # Normalize embedding
-            embedding_normalized = embedding / np.linalg.norm(embedding)
-            # Append results
-            results.append({
-                "image_url": url,
-                "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
-                "success": True
-            })
-    except Exception as e:
-        for url in batch_urls:
-            results.append({"image_url": url, "error": str(e)})
-# Gradio Interface
-iface = gr.Interface(
-    fn=batch_process_images,
-    inputs=gr.Textbox(
-        lines=5,
-        placeholder="Enter image URLs separated by commas",
-        label="Batch Image URLs",
-    ),
-    outputs=gr.JSON(label="Embedding Results"),
-    title="Batch Fashion CLIP Embedding API",
-    description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
-    examples=[
-        ["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
-    ],
-)
-# Launch Gradio App
 if __name__ == "__main__":
-    iface.launch()

+# app.py
+import os
+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List
 import requests
+from PIL import Image, UnidentifiedImageError
 import numpy as np
 from encoder import FashionCLIPEncoder
+from pinecone import Pinecone
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Initialize FastAPI app
+app = FastAPI()
 # Constants
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
+PINECONE_NAMESPACE = os.getenv("PINECONE_NAMESPACE")
 REQUESTS_HEADERS = {
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
+BATCH_SIZE = 30
+# Initialize services
+pc = Pinecone(api_key=PINECONE_API_KEY)
+index = pc.Index(PINECONE_INDEX_NAME)
 encoder = FashionCLIPEncoder()
+class ProductData(BaseModel):
+    product_id: str
+    url: str
+class ProcessRequest(BaseModel):
+    products: List[ProductData]
+    upload_to_pinecone: bool = True
 def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
     try:
         response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
+        print(f"URL: {url}, Status Code: {response.status_code}, Content-Type: {response.headers.get('Content-Type')}")
+        if response.status_code == 200 and 'image' in response.headers.get('Content-Type', ''):
+            try:
+                with open("temp_image.jpg", "wb") as f:
+                    f.write(response.content)  # Save the image to a temporary file
+                return Image.open("temp_image.jpg").convert("RGB")
+            except UnidentifiedImageError:
+                print(f"Unidentified image file from URL: {url}")
+        else:
+            print(f"Non-image content for URL: {url}")
         return None
     except Exception as e:
         print(f"Error downloading image: {e}")
         return None
+def process_batch(batch_products, batch_images, results):
+    try:
+        # Generate embeddings
+        embeddings = encoder.encode_images(batch_images)
+        for product, embedding in zip(batch_products, embeddings):
+            # Normalize embedding
+            embedding_normalized = embedding / np.linalg.norm(embedding)
+            # Append results
+            results.append({
+                "product_id": product["product_id"],
+                "image_url": product["url"],
+                "embedding": embedding_normalized.tolist(),
+                "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
+                "success": True
+            })
+    except Exception as e:
+        for product in batch_products:
+            results.append({
+                "product_id": product["product_id"],
+                "image_url": product["url"],
+                "error": str(e)
+            })
+def batch_process_images(products):
+    if not products:
+        return {"error": "No products provided."}
     results = []
+    batch_products, batch_images = [], []
+    for product in products:
         try:
             # Download image
+            image = download_image_as_pil(product["url"])
             if not image:
+                results.append({
+                    "product_id": product["product_id"],
+                    "image_url": product["url"],
+                    "error": "Failed to download image"
+                })
                 continue
+            batch_products.append(product)
             batch_images.append(image)
             # Process batch when reaching batch size
             if len(batch_images) == BATCH_SIZE:
+                process_batch(batch_products, batch_images, results)
+                batch_products, batch_images = [], []
         except Exception as e:
+            results.append({
+                "product_id": product["product_id"],
+                "image_url": product["url"],
+                "error": str(e)
+            })
     # Process remaining images in the last batch
     if batch_images:
+        process_batch(batch_products, batch_images, results)
     return results
+def upload_to_pinecone(processed_results):
+    """Upload embeddings to Pinecone"""
+    vectors_to_upsert = []
+    for result in processed_results:
+        if 'error' not in result and 'embedding' in result:
+            vector = {
+                'id': result['product_id'],
+                'values': result['embedding'],
+                'metadata': {
+                    'image_url': result['image_url']
+                }
+            }
+            vectors_to_upsert.append(vector)
+    if vectors_to_upsert:
+        index.upsert(vectors=vectors_to_upsert, namespace=PINECONE_NAMESPACE)
+    return {"uploaded_count": len(vectors_to_upsert)}
+@app.post("/process")
+async def process_images(request: ProcessRequest):
+    """
+    Process product images and optionally upload their embeddings to Pinecone
+    Parameters:
+    - products: List of products with product_id and url
+    - upload_to_pinecone: Boolean flag to determine if embeddings should be uploaded to Pinecone
+    """
+    # Convert products to list of dicts
+    products_data = [{"product_id": p.product_id, "url": p.url} for p in request.products]
+    # Process images
+    results = batch_process_images(products_data)
+    # Upload to Pinecone if requested
+    if request.upload_to_pinecone:
+        upload_result = upload_to_pinecone(results)
+        return {
+            "processing_results": results,
+            "pinecone_upload": upload_result
+        }
+    return {"processing_results": results}
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

debug_image.jpg ADDED Viewed

temp_image.jpg ADDED Viewed