|
import cv2 |
|
import numpy as np |
|
from collections import Counter |
|
|
|
|
|
|
|
from paddleocr import PaddleOCR, draw_ocr |
|
|
|
|
|
|
|
|
|
ocr = PaddleOCR(use_angle_cls=True, lang='en') |
|
def detect_text(image): |
|
|
|
result = ocr.ocr(image, cls=True) |
|
txt = '' |
|
for idx in range(len(result)): |
|
res = result[idx] |
|
for line in res: |
|
txt += line[1][0] |
|
|
|
return txt |
|
def analyze_text(text): |
|
marketing_keywords = ['sale', 'offer', 'discount', 'promotion', 'limited', 'buy', 'now', ] |
|
|
|
|
|
word_count = Counter([word.lower() for word in text.split()]) |
|
keyword_count = sum(word_count[keyword] for keyword in marketing_keywords) |
|
|
|
|
|
if keyword_count > 2: |
|
return "Advertisement" |
|
else: |
|
return "Normal Product Image" |
|
|
|
|
|
|
|
|
|
def analyze_layout(image_path): |
|
|
|
image = cv2.imread(image_path) |
|
|
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
inverted = cv2.bitwise_not(gray) |
|
|
|
|
|
_, thresholded = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) |
|
|
|
|
|
contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
asymmetric_count = 0 |
|
dynamic_shape_count = 0 |
|
|
|
|
|
for contour in contours: |
|
|
|
x, y, w, h = cv2.boundingRect(contour) |
|
|
|
|
|
aspect_ratio = float(w) / h |
|
|
|
|
|
if aspect_ratio < 0.8 or aspect_ratio > 1.2: |
|
asymmetric_count += 1 |
|
|
|
|
|
if len(contour) > 5: |
|
_, _, angle = cv2.fitEllipse(contour) |
|
if angle > 30 and angle < 150: |
|
dynamic_shape_count += 1 |
|
|
|
|
|
is_advertisement = False |
|
if asymmetric_count > 1 or dynamic_shape_count > 1: |
|
is_advertisement = True |
|
|
|
return is_advertisement |
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_color(image_path): |
|
|
|
image = cv2.imread(image_path) |
|
|
|
|
|
hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) |
|
|
|
|
|
mean_saturation = np.mean(hsv_image[:, :, 1]) |
|
mean_value = np.mean(hsv_image[:, :, 2]) |
|
|
|
|
|
if mean_saturation > 150 and mean_value > 150: |
|
return "Advertisement" |
|
else: |
|
return "Not Advertisement" |
|
|
|
|
|
|
|
|
|
def analyze_shapes(image_path): |
|
image = cv2.imread(image_path) |
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
edges = cv2.Canny(gray, 100, 200) |
|
|
|
|
|
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
for cnt in contours: |
|
approx = cv2.approxPolyDP(cnt, 0.01 * cv2.arcLength(cnt, True), True) |
|
if len(approx) in [3, 5, 7]: |
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|