im commited on
Commit
0116945
·
1 Parent(s): ae547d4
Files changed (2) hide show
  1. app copy 2.py +104 -0
  2. app.py +106 -80
app copy 2.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import requests
3
+ from PIL import Image
4
+ import numpy as np
5
+ import gradio as gr
6
+ from encoder import FashionCLIPEncoder
7
+
8
+ # Constants
9
+ REQUESTS_HEADERS = {
10
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
11
+ }
12
+ BATCH_SIZE = 30 # Define batch size for processing
13
+
14
+ # Initialize encoder
15
+ encoder = FashionCLIPEncoder()
16
+
17
+ # Helper function to download images
18
+ def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
19
+ try:
20
+ response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
21
+ if response.status_code == 200:
22
+ return Image.open(response.raw).convert("RGB") # Ensure consistent format
23
+ return None
24
+ except Exception as e:
25
+ print(f"Error downloading image: {e}")
26
+ return None
27
+
28
+ # Embedding function for a batch of images
29
+ def batch_process_images(image_urls: str):
30
+ # Split the input string by commas and strip whitespace
31
+ urls = [url.strip() for url in image_urls.split(",") if url.strip()]
32
+
33
+ if not urls:
34
+ return {"error": "No valid image URLs provided."}
35
+
36
+ results = []
37
+ batch_urls, batch_images = [], []
38
+
39
+ for url in urls:
40
+ try:
41
+ # Download image
42
+ image = download_image_as_pil(url)
43
+ if not image:
44
+ results.append({"image_url": url, "error": "Failed to download image"})
45
+ continue
46
+
47
+ batch_urls.append(url)
48
+ batch_images.append(image)
49
+
50
+ # Process batch when reaching batch size
51
+ if len(batch_images) == BATCH_SIZE:
52
+ process_batch(batch_urls, batch_images, results)
53
+ batch_urls, batch_images = [], []
54
+
55
+ except Exception as e:
56
+ results.append({"image_url": url, "error": str(e)})
57
+
58
+ # Process remaining images in the last batch
59
+ if batch_images:
60
+ process_batch(batch_urls, batch_images, results)
61
+
62
+ return results
63
+
64
+
65
+ # Helper function to process a batch
66
+ def process_batch(batch_urls, batch_images, results):
67
+ try:
68
+ # Generate embeddings
69
+ embeddings = encoder.encode_images(batch_images)
70
+
71
+ for url, embedding in zip(batch_urls, embeddings):
72
+ # Normalize embedding
73
+ embedding_normalized = embedding / np.linalg.norm(embedding)
74
+
75
+ # Append results
76
+ results.append({
77
+ "image_url": url,
78
+ "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
79
+ "success": True
80
+ })
81
+ except Exception as e:
82
+ for url in batch_urls:
83
+ results.append({"image_url": url, "error": str(e)})
84
+
85
+
86
+ # Gradio Interface
87
+ iface = gr.Interface(
88
+ fn=batch_process_images,
89
+ inputs=gr.Textbox(
90
+ lines=5,
91
+ placeholder="Enter image URLs separated by commas",
92
+ label="Batch Image URLs",
93
+ ),
94
+ outputs=gr.JSON(label="Embedding Results"),
95
+ title="Batch Fashion CLIP Embedding API",
96
+ description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
97
+ examples=[
98
+ ["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
99
+ ],
100
+ )
101
+
102
+ # Launch Gradio App
103
+ if __name__ == "__main__":
104
+ iface.launch()
app.py CHANGED
@@ -1,21 +1,16 @@
1
- # app.py
2
  import os
3
- from fastapi import FastAPI
4
- from pydantic import BaseModel
5
- from typing import List
6
  import requests
7
  from PIL import Image, UnidentifiedImageError
8
  import numpy as np
 
9
  from encoder import FashionCLIPEncoder
10
  from pinecone import Pinecone
11
  from dotenv import load_dotenv
 
12
 
13
  # Load environment variables
14
  load_dotenv()
15
 
16
- # Initialize FastAPI app
17
- app = FastAPI()
18
-
19
  # Constants
20
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
21
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
@@ -30,14 +25,6 @@ pc = Pinecone(api_key=PINECONE_API_KEY)
30
  index = pc.Index(PINECONE_INDEX_NAME)
31
  encoder = FashionCLIPEncoder()
32
 
33
- class ProductData(BaseModel):
34
- product_id: str
35
- url: str
36
-
37
- class ProcessRequest(BaseModel):
38
- products: List[ProductData]
39
- upload_to_pinecone: bool = True
40
-
41
  def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
42
  try:
43
  response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
@@ -51,7 +38,6 @@ def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
51
  print(f"Error downloading image: {e}")
52
  return None
53
 
54
-
55
  def process_batch(batch_products, batch_images, results):
56
  try:
57
  # Generate embeddings
@@ -66,58 +52,77 @@ def process_batch(batch_products, batch_images, results):
66
  "product_id": product["product_id"],
67
  "image_url": product["url"],
68
  "embedding": embedding_normalized.tolist(),
69
- "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
70
  "success": True
71
  })
72
  except Exception as e:
73
  for product in batch_products:
74
  results.append({
75
- "product_id": product["product_id"],
76
- "image_url": product["url"],
77
  "error": str(e)
78
  })
79
 
80
- def batch_process_images(products):
81
- if not products:
82
- return {"error": "No products provided."}
 
 
 
 
 
 
83
 
84
- results = []
85
- batch_products, batch_images = [], []
86
 
87
- for product in products:
88
- try:
89
- # Download image
90
- image = download_image_as_pil(product["url"])
91
- if not image:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  results.append({
93
  "product_id": product["product_id"],
94
  "image_url": product["url"],
95
- "error": "Failed to download image"
96
  })
97
- continue
98
-
99
- batch_products.append(product)
100
- batch_images.append(image)
101
-
102
- # Process batch when reaching batch size
103
- if len(batch_images) == BATCH_SIZE:
104
- process_batch(batch_products, batch_images, results)
105
- batch_products, batch_images = [], []
106
-
107
- except Exception as e:
108
- results.append({
109
- "product_id": product["product_id"],
110
- "image_url": product["url"],
111
- "error": str(e)
112
- })
113
 
114
- # Process remaining images in the last batch
115
- if batch_images:
116
- process_batch(batch_products, batch_images, results)
117
 
118
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- def upload_to_pinecone(processed_results):
121
  """Upload embeddings to Pinecone"""
122
  vectors_to_upsert = []
123
  for result in processed_results:
@@ -136,35 +141,56 @@ def upload_to_pinecone(processed_results):
136
 
137
  return {"uploaded_count": len(vectors_to_upsert)}
138
 
139
- @app.post("/process")
140
- async def process_images(request: ProcessRequest):
141
- """
142
- Process product images and optionally upload their embeddings to Pinecone
143
-
144
- Parameters:
145
- - products: List of products with product_id and url
146
- - upload_to_pinecone: Boolean flag to determine if embeddings should be uploaded to Pinecone
147
- """
148
- # Convert products to list of dicts
149
- products_data = [{"product_id": p.product_id, "url": p.url} for p in request.products]
150
-
151
- # Process images
152
- results = batch_process_images(products_data)
153
-
154
- # Upload to Pinecone if requested
155
- if request.upload_to_pinecone:
156
- upload_result = upload_to_pinecone(results)
157
- return {
158
- "processing_results": results,
159
- "pinecone_upload": upload_result
160
  }
161
-
162
- return {"processing_results": results}
 
163
 
164
- @app.get("/health")
165
- async def health_check():
166
- return {"status": "healthy"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
- # if __name__ == "__main__":
169
- # import uvicorn
170
- # uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
  import os
 
 
 
2
  import requests
3
  from PIL import Image, UnidentifiedImageError
4
  import numpy as np
5
+ import gradio as gr
6
  from encoder import FashionCLIPEncoder
7
  from pinecone import Pinecone
8
  from dotenv import load_dotenv
9
+ import json
10
 
11
  # Load environment variables
12
  load_dotenv()
13
 
 
 
 
14
  # Constants
15
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
16
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
 
25
  index = pc.Index(PINECONE_INDEX_NAME)
26
  encoder = FashionCLIPEncoder()
27
 
 
 
 
 
 
 
 
 
28
  def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
29
  try:
30
  response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
 
38
  print(f"Error downloading image: {e}")
39
  return None
40
 
 
41
  def process_batch(batch_products, batch_images, results):
42
  try:
43
  # Generate embeddings
 
52
  "product_id": product["product_id"],
53
  "image_url": product["url"],
54
  "embedding": embedding_normalized.tolist(),
55
+ "embedding_preview": embedding_normalized[:5].tolist(),
56
  "success": True
57
  })
58
  except Exception as e:
59
  for product in batch_products:
60
  results.append({
61
+ "product_id": product["product_id"],
62
+ "image_url": product["url"],
63
  "error": str(e)
64
  })
65
 
66
+ def batch_process_images(json_input: str):
67
+ try:
68
+ # Parse JSON input
69
+ data = json.loads(json_input)
70
+ products = data.get("products", [])
71
+ upload_to_pinecone = data.get("upload_to_pinecone", False)
72
+
73
+ if not products:
74
+ return {"error": "No products provided in JSON input."}
75
 
76
+ results = []
77
+ batch_products, batch_images = [], []
78
 
79
+ for product in products:
80
+ try:
81
+ # Download image
82
+ image = download_image_as_pil(product["url"])
83
+ if not image:
84
+ results.append({
85
+ "product_id": product["product_id"],
86
+ "image_url": product["url"],
87
+ "error": "Failed to download image"
88
+ })
89
+ continue
90
+
91
+ batch_products.append(product)
92
+ batch_images.append(image)
93
+
94
+ # Process batch when reaching batch size
95
+ if len(batch_images) == BATCH_SIZE:
96
+ process_batch(batch_products, batch_images, results)
97
+ batch_products, batch_images = [], []
98
+
99
+ except Exception as e:
100
  results.append({
101
  "product_id": product["product_id"],
102
  "image_url": product["url"],
103
+ "error": str(e)
104
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Process remaining images in the last batch
107
+ if batch_images:
108
+ process_batch(batch_products, batch_images, results)
109
 
110
+ # Upload to Pinecone if requested
111
+ if upload_to_pinecone:
112
+ upload_result = upload_vector_to_pinecone(results)
113
+ return {
114
+ "processing_results": results,
115
+ "pinecone_upload": upload_result
116
+ }
117
+
118
+ return {"processing_results": results}
119
+
120
+ except json.JSONDecodeError:
121
+ return {"error": "Invalid JSON format"}
122
+ except Exception as e:
123
+ return {"error": f"Unexpecteddd error: {str(e)}"}
124
 
125
+ def upload_vector_to_pinecone(processed_results):
126
  """Upload embeddings to Pinecone"""
127
  vectors_to_upsert = []
128
  for result in processed_results:
 
141
 
142
  return {"uploaded_count": len(vectors_to_upsert)}
143
 
144
+ # Example JSON input
145
+ EXAMPLE_INPUT = {
146
+ "products": [
147
+ {
148
+ "product_id": "1",
149
+ "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
+ ],
152
+ "upload_to_pinecone": False
153
+ }
154
 
155
+ # Gradio Interface
156
+ iface = gr.Interface(
157
+ fn=batch_process_images,
158
+ inputs=gr.Code(
159
+ label="Input JSON",
160
+ language="json",
161
+ value=json.dumps(EXAMPLE_INPUT, indent=4) # Changed from default to value
162
+ ),
163
+ outputs=gr.JSON(label="Processing Results"),
164
+ title="Fashion CLIP Embedding Generator",
165
+ description="Provide JSON input with product IDs, URLs, and Pinecone upload preference to generate embeddings.",
166
+ article="""
167
+ ### Input JSON Format:
168
+ ```json
169
+ {
170
+ "products": [
171
+ {
172
+ "product_id": "string",
173
+ "url": "string"
174
+ }
175
+ ],
176
+ "upload_to_pinecone": boolean
177
+ }
178
+ ```
179
+
180
+ ### Features:
181
+ - Batch processing of multiple images
182
+ - Custom product ID support
183
+ - Embedding generation using Fashion CLIP
184
+ - Optional Pinecone database integration
185
+ - Error handling and detailed results
186
+
187
+ Make sure to set up your environment variables in a .env file:
188
+ - PINECONE_API_KEY
189
+ - PINECONE_INDEX_NAME
190
+ - PINECONE_NAMESPACE
191
+ """
192
+ )
193
 
194
+ # Launch Gradio App
195
+ if __name__ == "__main__":
196
+ iface.launch()