import uuid import requests from PIL import Image import numpy as np import gradio as gr from encoder import FashionCLIPEncoder # Constants REQUESTS_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } BATCH_SIZE = 30 # Define batch size for processing # Initialize encoder encoder = FashionCLIPEncoder() # Helper function to download images def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image: try: response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout) if response.status_code == 200: return Image.open(response.raw).convert("RGB") # Ensure consistent format return None except Exception as e: print(f"Error downloading image: {e}") return None # Embedding function for a batch of images def batch_process_images(image_urls: str): # Split the input string by commas and strip whitespace urls = [url.strip() for url in image_urls.split(",") if url.strip()] if not urls: return {"error": "No valid image URLs provided."} results = [] batch_urls, batch_images = [], [] for url in urls: try: # Download image image = download_image_as_pil(url) if not image: results.append({"image_url": url, "error": "Failed to download image"}) continue batch_urls.append(url) batch_images.append(image) # Process batch when reaching batch size if len(batch_images) == BATCH_SIZE: process_batch(batch_urls, batch_images, results) batch_urls, batch_images = [], [] except Exception as e: results.append({"image_url": url, "error": str(e)}) # Process remaining images in the last batch if batch_images: process_batch(batch_urls, batch_images, results) return results # Helper function to process a batch def process_batch(batch_urls, batch_images, results): try: # Generate embeddings embeddings = encoder.encode_images(batch_images) for url, embedding in zip(batch_urls, embeddings): # Normalize embedding embedding_normalized = embedding / np.linalg.norm(embedding) # Append results results.append({ "image_url": url, "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview "success": True }) except Exception as e: for url in batch_urls: results.append({"image_url": url, "error": str(e)}) # Gradio Interface iface = gr.Interface( fn=batch_process_images, inputs=gr.Textbox( lines=5, placeholder="Enter image URLs separated by commas", label="Batch Image URLs", ), outputs=gr.JSON(label="Embedding Results"), title="Batch Fashion CLIP Embedding API", description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.", examples=[ ["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"] ], ) # Launch Gradio App if __name__ == "__main__": iface.launch() # import os # import requests # from PIL import Image # import numpy as np # from encoder import FashionCLIPEncoder # from pinecone import Pinecone # from dotenv import load_dotenv # # Load environment variables # load_dotenv() # # Constants # PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") # PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME") # REQUESTS_HEADERS = { # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' # } # BATCH_SIZE = 30 # Define batch size for processing # # Ensure API key and index name are set # if not PINECONE_API_KEY or not PINECONE_INDEX_NAME: # raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.") # # Initialize Pinecone # pc = Pinecone(api_key=PINECONE_API_KEY) # # Connect to the existing index # if PINECONE_INDEX_NAME not in pc.list_indexes().names(): # raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.") # index = pc.Index(PINECONE_INDEX_NAME) # print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.") # # Initialize encoder # encoder = FashionCLIPEncoder() # # Helper function to download images # def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image: # """ # Downloads an image from a URL and converts it to a PIL Image in RGB format. # """ # try: # response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout) # if response.status_code == 200: # return Image.open(response.raw).convert("RGB") # Ensure consistent format # return None # except Exception as e: # print(f"Error downloading image from {url}: {e}") # return None # # Function to process a batch of images # def batch_process_images(image_data: list, namespace: str = None): # """ # Processes a batch of images, generates embeddings, and uploads them to Pinecone. # Args: # image_data (list): A list of dictionaries with "id" and "url" keys. # namespace (str): Namespace for the Pinecone index. # Returns: # list: A list of results containing the embedding preview or error information. # """ # results = [] # batch_ids, batch_urls, batch_images = [], [], [] # for data in image_data: # try: # image_id = data["id"] # image_url = data["url"] # # Download the image # image = download_image_as_pil(image_url) # if not image: # results.append({"id": image_id, "url": image_url, "error": "Failed to download image"}) # continue # batch_ids.append(image_id) # batch_urls.append(image_url) # batch_images.append(image) # # Process batch when reaching batch size # if len(batch_images) == BATCH_SIZE: # process_batch(batch_ids, batch_urls, batch_images, results, namespace) # batch_ids, batch_urls, batch_images = [], [], [] # except Exception as e: # results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)}) # # Process remaining images in the last batch # if batch_images: # process_batch(batch_ids, batch_urls, batch_images, results, namespace) # return results # # Function to process a batch and upload to Pinecone # def process_batch(batch_ids, batch_urls, batch_images, results, namespace): # """ # Processes a batch of images and generates embeddings, uploading them to Pinecone. # Args: # batch_ids (list): List of IDs for the images. # batch_urls (list): List of image URLs. # batch_images (list): List of PIL images. # results (list): List to store results for each image. # namespace (str): Namespace for the Pinecone index. # """ # try: # # Generate embeddings # embeddings = encoder.encode_images(batch_images) # vectors = [] # for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings): # # Normalize embedding # embedding_normalized = embedding / np.linalg.norm(embedding) # # Append results # result = { # "id": image_id, # "url": url, # "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview # "success": True # } # results.append(result) # # Prepare vector for upserting # vectors.append({ # "id": str(image_id), # "values": embedding_normalized.tolist(), # "metadata": {"url": url} # }) # # Upload vectors to Pinecone # index.upsert(vectors=vectors, namespace=namespace) # except Exception as e: # for image_id, url in zip(batch_ids, batch_urls): # results.append({"id": image_id, "url": url, "error": str(e)}) # # Example usage # if __name__ == "__main__": # # Example input data # image_data = [ # {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"}, # {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"} # ] # # Process images and upload to Pinecone under namespace "ns1" # results = batch_process_images(image_data, namespace="ns1") # # Print results # for result in results: # print(result)