Spaces:
Running
on
Zero
Detector only finds one cat.
Hi there
I tried to execute the script with little modifications locally. But always only one cat is detected. Depending on the threshold setting it is the one in front or the one in the back, but never both. Do you have any advice?
Here is the modified code duplicate:
from transformers import pipeline, SamModel, SamProcessor
import torch
import numpy as np
from PIL import Image, ImageDraw
import os
checkpoint = "google/owlv2-base-patch16-ensemble"
detector = pipeline(model=checkpoint, task="zero-shot-object-detection", device="cpu")
sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to("cpu")
sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
def query(image_path, texts, threshold, sam_threshold):
"""
This function analyzes an image for objects specified in the text input.
Args:
image_path: Path to the image file.
texts: Comma-separated list of object labels to search for in the image.
threshold: Confidence threshold for the object detection pipeline.
sam_threshold: Threshold for the second-stage mask-based verification.
Returns:
Path to the annotated image file.
"""
print("Pfad")
print(image_path)
texts = texts.split(",")
image = Image.open(image_path).convert("RGB")
#image_np = np.array(image)
#print(image_np)
Pass the image directly as an argument to the detector
predictions = detector(
image, # Provide the image here
candidate_labels=texts,
threshold=threshold
)
result_labels = []
for pred in predictions:
box = pred["box"]
score = pred["score"]
label = pred["label"]
box = [round(box["xmin"], 2), round(box["ymin"], 2),
round(box["xmax"], 2), round(box["ymax"], 2)]
inputs = sam_processor(
images=image, # Ensure the correct argument is used
input_boxes=[[box]],
return_tensors="pt"
).to("cpu")
with torch.no_grad():
outputs = sam_model(**inputs)
mask = sam_processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu()
)
iou_scores = outputs["iou_scores"]
masks, testscores, boxes = sam_processor.image_processor.filter_masks(
mask[0],
iou_scores[0].cpu(),
inputs["original_sizes"][0].cpu(),
box,
pred_iou_thresh=sam_threshold,
)
#print(mask[0][0][0])
result_labels.append((mask[0][0][0].numpy(), label))
Create and save the annotated image
draw = ImageDraw.Draw(image)
print(result_labels)
for mask, label in result_labels:
mask = Image.fromarray((mask * 255).astype(np.uint8), mode='L').resize(image.size)
mask = mask.convert("RGBA")
image.paste(mask, (0, 0), mask)
base_name, ext = os.path.splitext(image_path)
output_path = f"{base_name}_annotated{ext}"
image.save(output_path)
return output_path
Example usage
output_path = query("cats.png", "cat", 0.6, 0.88)
print(f"Annotated image saved as {output_path}")