Spaces:
Running
Running
im
commited on
Commit
·
4bd62d7
1
Parent(s):
d933c69
change
Browse files- app copy.py +257 -0
- app.py +136 -147
- debug_image.jpg +0 -0
- temp_image.jpg +0 -0
app copy.py
ADDED
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
import requests
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import gradio as gr
|
6 |
+
from encoder import FashionCLIPEncoder
|
7 |
+
|
8 |
+
# Constants
|
9 |
+
REQUESTS_HEADERS = {
|
10 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
11 |
+
}
|
12 |
+
BATCH_SIZE = 30 # Define batch size for processing
|
13 |
+
|
14 |
+
# Initialize encoder
|
15 |
+
encoder = FashionCLIPEncoder()
|
16 |
+
|
17 |
+
# Helper function to download images
|
18 |
+
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
|
19 |
+
try:
|
20 |
+
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
|
21 |
+
if response.status_code == 200:
|
22 |
+
return Image.open(response.raw).convert("RGB") # Ensure consistent format
|
23 |
+
return None
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Error downloading image: {e}")
|
26 |
+
return None
|
27 |
+
|
28 |
+
# Embedding function for a batch of images
|
29 |
+
def batch_process_images(image_urls: str):
|
30 |
+
# Split the input string by commas and strip whitespace
|
31 |
+
urls = [url.strip() for url in image_urls.split(",") if url.strip()]
|
32 |
+
|
33 |
+
if not urls:
|
34 |
+
return {"error": "No valid image URLs provided."}
|
35 |
+
|
36 |
+
results = []
|
37 |
+
batch_urls, batch_images = [], []
|
38 |
+
|
39 |
+
for url in urls:
|
40 |
+
try:
|
41 |
+
# Download image
|
42 |
+
image = download_image_as_pil(url)
|
43 |
+
if not image:
|
44 |
+
results.append({"image_url": url, "error": "Failed to download image"})
|
45 |
+
continue
|
46 |
+
|
47 |
+
batch_urls.append(url)
|
48 |
+
batch_images.append(image)
|
49 |
+
|
50 |
+
# Process batch when reaching batch size
|
51 |
+
if len(batch_images) == BATCH_SIZE:
|
52 |
+
process_batch(batch_urls, batch_images, results)
|
53 |
+
batch_urls, batch_images = [], []
|
54 |
+
|
55 |
+
except Exception as e:
|
56 |
+
results.append({"image_url": url, "error": str(e)})
|
57 |
+
|
58 |
+
# Process remaining images in the last batch
|
59 |
+
if batch_images:
|
60 |
+
process_batch(batch_urls, batch_images, results)
|
61 |
+
|
62 |
+
return results
|
63 |
+
|
64 |
+
|
65 |
+
# Helper function to process a batch
|
66 |
+
def process_batch(batch_urls, batch_images, results):
|
67 |
+
try:
|
68 |
+
# Generate embeddings
|
69 |
+
embeddings = encoder.encode_images(batch_images)
|
70 |
+
|
71 |
+
for url, embedding in zip(batch_urls, embeddings):
|
72 |
+
# Normalize embedding
|
73 |
+
embedding_normalized = embedding / np.linalg.norm(embedding)
|
74 |
+
|
75 |
+
# Append results
|
76 |
+
results.append({
|
77 |
+
"image_url": url,
|
78 |
+
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
|
79 |
+
"success": True
|
80 |
+
})
|
81 |
+
except Exception as e:
|
82 |
+
for url in batch_urls:
|
83 |
+
results.append({"image_url": url, "error": str(e)})
|
84 |
+
|
85 |
+
|
86 |
+
# Gradio Interface
|
87 |
+
iface = gr.Interface(
|
88 |
+
fn=batch_process_images,
|
89 |
+
inputs=gr.Textbox(
|
90 |
+
lines=5,
|
91 |
+
placeholder="Enter image URLs separated by commas",
|
92 |
+
label="Batch Image URLs",
|
93 |
+
),
|
94 |
+
outputs=gr.JSON(label="Embedding Results"),
|
95 |
+
title="Batch Fashion CLIP Embedding API",
|
96 |
+
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
|
97 |
+
examples=[
|
98 |
+
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
|
99 |
+
],
|
100 |
+
)
|
101 |
+
|
102 |
+
# Launch Gradio App
|
103 |
+
if __name__ == "__main__":
|
104 |
+
iface.launch()
|
105 |
+
|
106 |
+
|
107 |
+
# import os
|
108 |
+
# import requests
|
109 |
+
# from PIL import Image
|
110 |
+
# import numpy as np
|
111 |
+
# from encoder import FashionCLIPEncoder
|
112 |
+
# from pinecone import Pinecone
|
113 |
+
# from dotenv import load_dotenv
|
114 |
+
|
115 |
+
# # Load environment variables
|
116 |
+
# load_dotenv()
|
117 |
+
|
118 |
+
# # Constants
|
119 |
+
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
120 |
+
# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
|
121 |
+
# REQUESTS_HEADERS = {
|
122 |
+
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
123 |
+
# }
|
124 |
+
# BATCH_SIZE = 30 # Define batch size for processing
|
125 |
+
|
126 |
+
# # Ensure API key and index name are set
|
127 |
+
# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
|
128 |
+
# raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")
|
129 |
+
|
130 |
+
# # Initialize Pinecone
|
131 |
+
# pc = Pinecone(api_key=PINECONE_API_KEY)
|
132 |
+
|
133 |
+
# # Connect to the existing index
|
134 |
+
# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
|
135 |
+
# raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")
|
136 |
+
|
137 |
+
# index = pc.Index(PINECONE_INDEX_NAME)
|
138 |
+
# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")
|
139 |
+
|
140 |
+
# # Initialize encoder
|
141 |
+
# encoder = FashionCLIPEncoder()
|
142 |
+
|
143 |
+
# # Helper function to download images
|
144 |
+
# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
|
145 |
+
# """
|
146 |
+
# Downloads an image from a URL and converts it to a PIL Image in RGB format.
|
147 |
+
# """
|
148 |
+
# try:
|
149 |
+
# response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
|
150 |
+
# if response.status_code == 200:
|
151 |
+
# return Image.open(response.raw).convert("RGB") # Ensure consistent format
|
152 |
+
# return None
|
153 |
+
# except Exception as e:
|
154 |
+
# print(f"Error downloading image from {url}: {e}")
|
155 |
+
# return None
|
156 |
+
|
157 |
+
# # Function to process a batch of images
|
158 |
+
# def batch_process_images(image_data: list, namespace: str = None):
|
159 |
+
# """
|
160 |
+
# Processes a batch of images, generates embeddings, and uploads them to Pinecone.
|
161 |
+
|
162 |
+
# Args:
|
163 |
+
# image_data (list): A list of dictionaries with "id" and "url" keys.
|
164 |
+
# namespace (str): Namespace for the Pinecone index.
|
165 |
+
|
166 |
+
# Returns:
|
167 |
+
# list: A list of results containing the embedding preview or error information.
|
168 |
+
# """
|
169 |
+
# results = []
|
170 |
+
# batch_ids, batch_urls, batch_images = [], [], []
|
171 |
+
|
172 |
+
# for data in image_data:
|
173 |
+
# try:
|
174 |
+
# image_id = data["id"]
|
175 |
+
# image_url = data["url"]
|
176 |
+
|
177 |
+
# # Download the image
|
178 |
+
# image = download_image_as_pil(image_url)
|
179 |
+
# if not image:
|
180 |
+
# results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
|
181 |
+
# continue
|
182 |
+
|
183 |
+
# batch_ids.append(image_id)
|
184 |
+
# batch_urls.append(image_url)
|
185 |
+
# batch_images.append(image)
|
186 |
+
|
187 |
+
# # Process batch when reaching batch size
|
188 |
+
# if len(batch_images) == BATCH_SIZE:
|
189 |
+
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
|
190 |
+
# batch_ids, batch_urls, batch_images = [], [], []
|
191 |
+
|
192 |
+
# except Exception as e:
|
193 |
+
# results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})
|
194 |
+
|
195 |
+
# # Process remaining images in the last batch
|
196 |
+
# if batch_images:
|
197 |
+
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
|
198 |
+
|
199 |
+
# return results
|
200 |
+
|
201 |
+
# # Function to process a batch and upload to Pinecone
|
202 |
+
# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
|
203 |
+
# """
|
204 |
+
# Processes a batch of images and generates embeddings, uploading them to Pinecone.
|
205 |
+
|
206 |
+
# Args:
|
207 |
+
# batch_ids (list): List of IDs for the images.
|
208 |
+
# batch_urls (list): List of image URLs.
|
209 |
+
# batch_images (list): List of PIL images.
|
210 |
+
# results (list): List to store results for each image.
|
211 |
+
# namespace (str): Namespace for the Pinecone index.
|
212 |
+
# """
|
213 |
+
# try:
|
214 |
+
# # Generate embeddings
|
215 |
+
# embeddings = encoder.encode_images(batch_images)
|
216 |
+
|
217 |
+
# vectors = []
|
218 |
+
# for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
|
219 |
+
# # Normalize embedding
|
220 |
+
# embedding_normalized = embedding / np.linalg.norm(embedding)
|
221 |
+
|
222 |
+
# # Append results
|
223 |
+
# result = {
|
224 |
+
# "id": image_id,
|
225 |
+
# "url": url,
|
226 |
+
# "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
|
227 |
+
# "success": True
|
228 |
+
# }
|
229 |
+
# results.append(result)
|
230 |
+
|
231 |
+
# # Prepare vector for upserting
|
232 |
+
# vectors.append({
|
233 |
+
# "id": str(image_id),
|
234 |
+
# "values": embedding_normalized.tolist(),
|
235 |
+
# "metadata": {"url": url}
|
236 |
+
# })
|
237 |
+
|
238 |
+
# # Upload vectors to Pinecone
|
239 |
+
# index.upsert(vectors=vectors, namespace=namespace)
|
240 |
+
# except Exception as e:
|
241 |
+
# for image_id, url in zip(batch_ids, batch_urls):
|
242 |
+
# results.append({"id": image_id, "url": url, "error": str(e)})
|
243 |
+
|
244 |
+
# # Example usage
|
245 |
+
# if __name__ == "__main__":
|
246 |
+
# # Example input data
|
247 |
+
# image_data = [
|
248 |
+
# {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
|
249 |
+
# {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
|
250 |
+
# ]
|
251 |
+
|
252 |
+
# # Process images and upload to Pinecone under namespace "ns1"
|
253 |
+
# results = batch_process_images(image_data, namespace="ns1")
|
254 |
+
|
255 |
+
# # Print results
|
256 |
+
# for result in results:
|
257 |
+
# print(result)
|
app.py
CHANGED
@@ -1,188 +1,177 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
# # Load the FashionCLIP processor and model
|
8 |
-
# processor = AutoProcessor.from_pretrained("patrickjohncyh/fashion-clip")
|
9 |
-
# model = AutoModelForZeroShotImageClassification.from_pretrained("patrickjohncyh/fashion-clip")
|
10 |
-
|
11 |
-
# # Define the function to process both text and image inputs
|
12 |
-
# def generate_embeddings(input_text=None, input_image_url=None):
|
13 |
-
# try:
|
14 |
-
# if input_image_url:
|
15 |
-
# # Process image with accompanying text
|
16 |
-
# response = requests.get(input_image_url, stream=True)
|
17 |
-
# response.raise_for_status()
|
18 |
-
# image = Image.open(response.raw)
|
19 |
-
|
20 |
-
# # Use a default text if none is provided
|
21 |
-
# if not input_text:
|
22 |
-
# input_text = "this is an image"
|
23 |
-
|
24 |
-
# # Prepare inputs for the model
|
25 |
-
# inputs = processor(
|
26 |
-
# text=[input_text],
|
27 |
-
# images=image,
|
28 |
-
# return_tensors="pt",
|
29 |
-
# padding=True
|
30 |
-
# )
|
31 |
-
|
32 |
-
# with torch.no_grad():
|
33 |
-
# outputs = model(**inputs)
|
34 |
-
|
35 |
-
# image_embedding = outputs.logits_per_image.cpu().numpy().tolist()
|
36 |
-
# return {
|
37 |
-
# "type": "image_embedding",
|
38 |
-
# "input_image_url": input_image_url,
|
39 |
-
# "input_text": input_text,
|
40 |
-
# "embedding": image_embedding
|
41 |
-
# }
|
42 |
-
|
43 |
-
# elif input_text:
|
44 |
-
# # Process text input only
|
45 |
-
# inputs = processor(
|
46 |
-
# text=[input_text],
|
47 |
-
# images=None,
|
48 |
-
# return_tensors="pt",
|
49 |
-
# padding=True
|
50 |
-
# )
|
51 |
-
# with torch.no_grad():
|
52 |
-
# outputs = model(**inputs)
|
53 |
-
|
54 |
-
# text_embedding = outputs.logits_per_text.cpu().numpy().tolist()
|
55 |
-
# return {
|
56 |
-
# "type": "text_embedding",
|
57 |
-
# "input_text": input_text,
|
58 |
-
# "embedding": text_embedding
|
59 |
-
# }
|
60 |
-
# else:
|
61 |
-
# return {"error": "Please provide either a text query or an image URL."}
|
62 |
-
|
63 |
-
# except Exception as e:
|
64 |
-
# return {"error": str(e)}
|
65 |
-
|
66 |
-
# # Create the Gradio interface
|
67 |
-
# interface = gr.Interface(
|
68 |
-
# fn=generate_embeddings,
|
69 |
-
# inputs=[
|
70 |
-
# gr.Textbox(label="Text Query (Optional)", placeholder="e.g., red dress (used with image or for text embedding)"),
|
71 |
-
# gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg (used with or without text query)")
|
72 |
-
# ],
|
73 |
-
# outputs="json",
|
74 |
-
# title="FashionCLIP Combined Embedding API",
|
75 |
-
# description="Provide a text query and/or an image URL to compute embeddings for vector search."
|
76 |
-
# )
|
77 |
-
|
78 |
-
# # Launch the app
|
79 |
-
# if __name__ == "__main__":
|
80 |
-
# interface.launch()
|
81 |
-
# print(generate_embeddings("red dress"))
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
import uuid
|
86 |
import requests
|
87 |
-
from PIL import Image
|
88 |
import numpy as np
|
89 |
-
import gradio as gr
|
90 |
from encoder import FashionCLIPEncoder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
# Constants
|
|
|
|
|
|
|
93 |
REQUESTS_HEADERS = {
|
94 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
95 |
}
|
96 |
-
BATCH_SIZE = 30
|
97 |
|
98 |
-
# Initialize
|
|
|
|
|
99 |
encoder = FashionCLIPEncoder()
|
100 |
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
|
103 |
try:
|
104 |
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
return None
|
108 |
except Exception as e:
|
109 |
print(f"Error downloading image: {e}")
|
110 |
return None
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
results = []
|
121 |
-
|
122 |
|
123 |
-
for
|
124 |
try:
|
125 |
# Download image
|
126 |
-
image = download_image_as_pil(url)
|
127 |
if not image:
|
128 |
-
results.append({
|
|
|
|
|
|
|
|
|
129 |
continue
|
130 |
|
131 |
-
|
132 |
batch_images.append(image)
|
133 |
|
134 |
# Process batch when reaching batch size
|
135 |
if len(batch_images) == BATCH_SIZE:
|
136 |
-
process_batch(
|
137 |
-
|
138 |
|
139 |
except Exception as e:
|
140 |
-
results.append({
|
|
|
|
|
|
|
|
|
141 |
|
142 |
# Process remaining images in the last batch
|
143 |
if batch_images:
|
144 |
-
process_batch(
|
145 |
|
146 |
return results
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
# Helper function to process a batch
|
150 |
-
def process_batch(batch_urls, batch_images, results):
|
151 |
-
try:
|
152 |
-
# Generate embeddings
|
153 |
-
embeddings = encoder.encode_images(batch_images)
|
154 |
-
|
155 |
-
for url, embedding in zip(batch_urls, embeddings):
|
156 |
-
# Normalize embedding
|
157 |
-
embedding_normalized = embedding / np.linalg.norm(embedding)
|
158 |
-
|
159 |
-
# Append results
|
160 |
-
results.append({
|
161 |
-
"image_url": url,
|
162 |
-
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
|
163 |
-
"success": True
|
164 |
-
})
|
165 |
-
except Exception as e:
|
166 |
-
for url in batch_urls:
|
167 |
-
results.append({"image_url": url, "error": str(e)})
|
168 |
-
|
169 |
-
|
170 |
-
# Gradio Interface
|
171 |
-
iface = gr.Interface(
|
172 |
-
fn=batch_process_images,
|
173 |
-
inputs=gr.Textbox(
|
174 |
-
lines=5,
|
175 |
-
placeholder="Enter image URLs separated by commas",
|
176 |
-
label="Batch Image URLs",
|
177 |
-
),
|
178 |
-
outputs=gr.JSON(label="Embedding Results"),
|
179 |
-
title="Batch Fashion CLIP Embedding API",
|
180 |
-
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
|
181 |
-
examples=[
|
182 |
-
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
|
183 |
-
],
|
184 |
-
)
|
185 |
-
|
186 |
-
# Launch Gradio App
|
187 |
if __name__ == "__main__":
|
188 |
-
|
|
|
|
1 |
+
# app.py
|
2 |
+
import os
|
3 |
+
from fastapi import FastAPI
|
4 |
+
from pydantic import BaseModel
|
5 |
+
from typing import List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import requests
|
7 |
+
from PIL import Image, UnidentifiedImageError
|
8 |
import numpy as np
|
|
|
9 |
from encoder import FashionCLIPEncoder
|
10 |
+
from pinecone import Pinecone
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Initialize FastAPI app
|
17 |
+
app = FastAPI()
|
18 |
|
19 |
# Constants
|
20 |
+
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
21 |
+
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
|
22 |
+
PINECONE_NAMESPACE = os.getenv("PINECONE_NAMESPACE")
|
23 |
REQUESTS_HEADERS = {
|
24 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
25 |
}
|
26 |
+
BATCH_SIZE = 30
|
27 |
|
28 |
+
# Initialize services
|
29 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
30 |
+
index = pc.Index(PINECONE_INDEX_NAME)
|
31 |
encoder = FashionCLIPEncoder()
|
32 |
|
33 |
+
class ProductData(BaseModel):
|
34 |
+
product_id: str
|
35 |
+
url: str
|
36 |
+
|
37 |
+
class ProcessRequest(BaseModel):
|
38 |
+
products: List[ProductData]
|
39 |
+
upload_to_pinecone: bool = True
|
40 |
+
|
41 |
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
|
42 |
try:
|
43 |
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
|
44 |
+
print(f"URL: {url}, Status Code: {response.status_code}, Content-Type: {response.headers.get('Content-Type')}")
|
45 |
+
|
46 |
+
if response.status_code == 200 and 'image' in response.headers.get('Content-Type', ''):
|
47 |
+
try:
|
48 |
+
with open("temp_image.jpg", "wb") as f:
|
49 |
+
f.write(response.content) # Save the image to a temporary file
|
50 |
+
return Image.open("temp_image.jpg").convert("RGB")
|
51 |
+
except UnidentifiedImageError:
|
52 |
+
print(f"Unidentified image file from URL: {url}")
|
53 |
+
else:
|
54 |
+
print(f"Non-image content for URL: {url}")
|
55 |
return None
|
56 |
except Exception as e:
|
57 |
print(f"Error downloading image: {e}")
|
58 |
return None
|
59 |
|
60 |
+
|
61 |
+
|
62 |
+
def process_batch(batch_products, batch_images, results):
|
63 |
+
try:
|
64 |
+
# Generate embeddings
|
65 |
+
embeddings = encoder.encode_images(batch_images)
|
66 |
+
|
67 |
+
for product, embedding in zip(batch_products, embeddings):
|
68 |
+
# Normalize embedding
|
69 |
+
embedding_normalized = embedding / np.linalg.norm(embedding)
|
70 |
+
|
71 |
+
# Append results
|
72 |
+
results.append({
|
73 |
+
"product_id": product["product_id"],
|
74 |
+
"image_url": product["url"],
|
75 |
+
"embedding": embedding_normalized.tolist(),
|
76 |
+
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
|
77 |
+
"success": True
|
78 |
+
})
|
79 |
+
except Exception as e:
|
80 |
+
for product in batch_products:
|
81 |
+
results.append({
|
82 |
+
"product_id": product["product_id"],
|
83 |
+
"image_url": product["url"],
|
84 |
+
"error": str(e)
|
85 |
+
})
|
86 |
+
|
87 |
+
def batch_process_images(products):
|
88 |
+
if not products:
|
89 |
+
return {"error": "No products provided."}
|
90 |
|
91 |
results = []
|
92 |
+
batch_products, batch_images = [], []
|
93 |
|
94 |
+
for product in products:
|
95 |
try:
|
96 |
# Download image
|
97 |
+
image = download_image_as_pil(product["url"])
|
98 |
if not image:
|
99 |
+
results.append({
|
100 |
+
"product_id": product["product_id"],
|
101 |
+
"image_url": product["url"],
|
102 |
+
"error": "Failed to download image"
|
103 |
+
})
|
104 |
continue
|
105 |
|
106 |
+
batch_products.append(product)
|
107 |
batch_images.append(image)
|
108 |
|
109 |
# Process batch when reaching batch size
|
110 |
if len(batch_images) == BATCH_SIZE:
|
111 |
+
process_batch(batch_products, batch_images, results)
|
112 |
+
batch_products, batch_images = [], []
|
113 |
|
114 |
except Exception as e:
|
115 |
+
results.append({
|
116 |
+
"product_id": product["product_id"],
|
117 |
+
"image_url": product["url"],
|
118 |
+
"error": str(e)
|
119 |
+
})
|
120 |
|
121 |
# Process remaining images in the last batch
|
122 |
if batch_images:
|
123 |
+
process_batch(batch_products, batch_images, results)
|
124 |
|
125 |
return results
|
126 |
|
127 |
+
def upload_to_pinecone(processed_results):
|
128 |
+
"""Upload embeddings to Pinecone"""
|
129 |
+
vectors_to_upsert = []
|
130 |
+
for result in processed_results:
|
131 |
+
if 'error' not in result and 'embedding' in result:
|
132 |
+
vector = {
|
133 |
+
'id': result['product_id'],
|
134 |
+
'values': result['embedding'],
|
135 |
+
'metadata': {
|
136 |
+
'image_url': result['image_url']
|
137 |
+
}
|
138 |
+
}
|
139 |
+
vectors_to_upsert.append(vector)
|
140 |
+
|
141 |
+
if vectors_to_upsert:
|
142 |
+
index.upsert(vectors=vectors_to_upsert, namespace=PINECONE_NAMESPACE)
|
143 |
+
|
144 |
+
return {"uploaded_count": len(vectors_to_upsert)}
|
145 |
+
|
146 |
+
@app.post("/process")
|
147 |
+
async def process_images(request: ProcessRequest):
|
148 |
+
"""
|
149 |
+
Process product images and optionally upload their embeddings to Pinecone
|
150 |
+
|
151 |
+
Parameters:
|
152 |
+
- products: List of products with product_id and url
|
153 |
+
- upload_to_pinecone: Boolean flag to determine if embeddings should be uploaded to Pinecone
|
154 |
+
"""
|
155 |
+
# Convert products to list of dicts
|
156 |
+
products_data = [{"product_id": p.product_id, "url": p.url} for p in request.products]
|
157 |
+
|
158 |
+
# Process images
|
159 |
+
results = batch_process_images(products_data)
|
160 |
+
|
161 |
+
# Upload to Pinecone if requested
|
162 |
+
if request.upload_to_pinecone:
|
163 |
+
upload_result = upload_to_pinecone(results)
|
164 |
+
return {
|
165 |
+
"processing_results": results,
|
166 |
+
"pinecone_upload": upload_result
|
167 |
+
}
|
168 |
+
|
169 |
+
return {"processing_results": results}
|
170 |
+
|
171 |
+
@app.get("/health")
|
172 |
+
async def health_check():
|
173 |
+
return {"status": "healthy"}
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
if __name__ == "__main__":
|
176 |
+
import uvicorn
|
177 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
debug_image.jpg
ADDED
![]() |
temp_image.jpg
ADDED
![]() |