ljh838 commited on
Commit
9adfe8b
·
1 Parent(s): 7078aed

create application file

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ __pycache__
3
+ /cartoonset10k*
anime_face_detector/__init__.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+
3
+ import torch
4
+
5
+ from .detector import LandmarkDetector
6
+
7
+
8
+ def get_config_path(model_name: str) -> pathlib.Path:
9
+ assert model_name in ['faster-rcnn', 'yolov3', 'hrnetv2']
10
+
11
+ package_path = pathlib.Path(__file__).parent.resolve()
12
+ if model_name in ['faster-rcnn', 'yolov3']:
13
+ config_dir = package_path / 'configs' / 'mmdet'
14
+ else:
15
+ config_dir = package_path / 'configs' / 'mmpose'
16
+ return config_dir / f'{model_name}.py'
17
+
18
+
19
+ def get_checkpoint_path(model_name: str) -> pathlib.Path:
20
+ assert model_name in ['faster-rcnn', 'yolov3', 'hrnetv2']
21
+ if model_name in ['faster-rcnn', 'yolov3']:
22
+ file_name = f'mmdet_anime-face_{model_name}.pth'
23
+ else:
24
+ file_name = f'mmpose_anime-face_{model_name}.pth'
25
+
26
+ model_dir = pathlib.Path(torch.hub.get_dir()) / 'checkpoints'
27
+ model_dir.mkdir(exist_ok=True, parents=True)
28
+ model_path = model_dir / file_name
29
+ if not model_path.exists():
30
+ url = f'https://github.com/hysts/anime-face-detector/releases/download/v0.0.1/{file_name}'
31
+ torch.hub.download_url_to_file(url, model_path.as_posix())
32
+
33
+ return model_path
34
+
35
+
36
+ def create_detector(face_detector_name: str = 'yolov3',
37
+ landmark_model_name='hrnetv2',
38
+ device: str = 'cuda:0',
39
+ flip_test: bool = True,
40
+ box_scale_factor: float = 1.1) -> LandmarkDetector:
41
+ print("loading model...")
42
+ assert face_detector_name in ['yolov3', 'faster-rcnn']
43
+ assert landmark_model_name in ['hrnetv2']
44
+ detector_config_path = get_config_path(face_detector_name)
45
+ landmark_config_path = get_config_path(landmark_model_name)
46
+ detector_checkpoint_path = get_checkpoint_path(face_detector_name)
47
+ landmark_checkpoint_path = get_checkpoint_path(landmark_model_name)
48
+ model = LandmarkDetector(landmark_config_path,
49
+ landmark_checkpoint_path,
50
+ detector_config_path,
51
+ detector_checkpoint_path,
52
+ device=device,
53
+ flip_test=flip_test,
54
+ box_scale_factor=box_scale_factor)
55
+ return model
anime_face_detector/configs/mmdet/faster-rcnn.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(type='FasterRCNN',
2
+ backbone=dict(type='ResNet',
3
+ depth=50,
4
+ num_stages=4,
5
+ out_indices=(0, 1, 2, 3),
6
+ frozen_stages=1,
7
+ norm_cfg=dict(type='BN', requires_grad=True),
8
+ norm_eval=True,
9
+ style='pytorch'),
10
+ neck=dict(type='FPN',
11
+ in_channels=[256, 512, 1024, 2048],
12
+ out_channels=256,
13
+ num_outs=5),
14
+ rpn_head=dict(type='RPNHead',
15
+ in_channels=256,
16
+ feat_channels=256,
17
+ anchor_generator=dict(type='AnchorGenerator',
18
+ scales=[8],
19
+ ratios=[0.5, 1.0, 2.0],
20
+ strides=[4, 8, 16, 32, 64]),
21
+ bbox_coder=dict(type='DeltaXYWHBBoxCoder',
22
+ target_means=[0.0, 0.0, 0.0, 0.0],
23
+ target_stds=[1.0, 1.0, 1.0, 1.0])),
24
+ roi_head=dict(
25
+ type='StandardRoIHead',
26
+ bbox_roi_extractor=dict(type='SingleRoIExtractor',
27
+ roi_layer=dict(type='RoIAlign',
28
+ output_size=7,
29
+ sampling_ratio=0),
30
+ out_channels=256,
31
+ featmap_strides=[4, 8, 16, 32]),
32
+ bbox_head=dict(type='Shared2FCBBoxHead',
33
+ in_channels=256,
34
+ fc_out_channels=1024,
35
+ roi_feat_size=7,
36
+ num_classes=1,
37
+ bbox_coder=dict(
38
+ type='DeltaXYWHBBoxCoder',
39
+ target_means=[0.0, 0.0, 0.0, 0.0],
40
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
41
+ reg_class_agnostic=False)),
42
+ test_cfg=dict(rpn=dict(nms_pre=1000,
43
+ max_per_img=1000,
44
+ nms=dict(type='nms', iou_threshold=0.7),
45
+ min_bbox_size=0),
46
+ rcnn=dict(score_thr=0.05,
47
+ nms=dict(type='nms', iou_threshold=0.5),
48
+ max_per_img=100)))
49
+ test_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(type='MultiScaleFlipAug',
52
+ img_scale=(1333, 800),
53
+ flip=False,
54
+ transforms=[
55
+ dict(type='Resize', keep_ratio=True),
56
+ dict(type='RandomFlip'),
57
+ dict(type='Normalize',
58
+ mean=[123.675, 116.28, 103.53],
59
+ std=[58.395, 57.12, 57.375],
60
+ to_rgb=True),
61
+ dict(type='Pad', size_divisor=32),
62
+ dict(type='ImageToTensor', keys=['img']),
63
+ dict(type='Collect', keys=['img'])
64
+ ])
65
+ ]
66
+ data = dict(test=dict(pipeline=test_pipeline))
anime_face_detector/configs/mmdet/yolov3.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(type='YOLOV3',
2
+ backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)),
3
+ neck=dict(type='YOLOV3Neck',
4
+ num_scales=3,
5
+ in_channels=[1024, 512, 256],
6
+ out_channels=[512, 256, 128]),
7
+ bbox_head=dict(type='YOLOV3Head',
8
+ num_classes=1,
9
+ in_channels=[512, 256, 128],
10
+ out_channels=[1024, 512, 256],
11
+ anchor_generator=dict(type='YOLOAnchorGenerator',
12
+ base_sizes=[[(116, 90),
13
+ (156, 198),
14
+ (373, 326)],
15
+ [(30, 61),
16
+ (62, 45),
17
+ (59, 119)],
18
+ [(10, 13),
19
+ (16, 30),
20
+ (33, 23)]],
21
+ strides=[32, 16, 8]),
22
+ bbox_coder=dict(type='YOLOBBoxCoder'),
23
+ featmap_strides=[32, 16, 8]),
24
+ test_cfg=dict(nms_pre=1000,
25
+ min_bbox_size=0,
26
+ score_thr=0.05,
27
+ conf_thr=0.005,
28
+ nms=dict(type='nms', iou_threshold=0.45),
29
+ max_per_img=100))
30
+ test_pipeline = [
31
+ dict(type='LoadImageFromFile'),
32
+ dict(type='MultiScaleFlipAug',
33
+ img_scale=(608, 608),
34
+ flip=False,
35
+ transforms=[
36
+ dict(type='Resize', keep_ratio=True),
37
+ dict(type='RandomFlip'),
38
+ dict(type='Normalize',
39
+ mean=[0, 0, 0],
40
+ std=[255.0, 255.0, 255.0],
41
+ to_rgb=True),
42
+ dict(type='Pad', size_divisor=32),
43
+ dict(type='ImageToTensor', keys=['img']),
44
+ dict(type='Collect', keys=['img'])
45
+ ])
46
+ ]
47
+ data = dict(test=dict(pipeline=test_pipeline))
anime_face_detector/configs/mmpose/hrnetv2.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ channel_cfg = dict(num_output_channels=28,
2
+ dataset_joints=28,
3
+ dataset_channel=[
4
+ list(range(28)),
5
+ ],
6
+ inference_channel=list(range(28)))
7
+
8
+ model = dict(
9
+ type='TopDown',
10
+ backbone=dict(type='HRNet',
11
+ in_channels=3,
12
+ extra=dict(stage1=dict(num_modules=1,
13
+ num_branches=1,
14
+ block='BOTTLENECK',
15
+ num_blocks=(4, ),
16
+ num_channels=(64, )),
17
+ stage2=dict(num_modules=1,
18
+ num_branches=2,
19
+ block='BASIC',
20
+ num_blocks=(4, 4),
21
+ num_channels=(18, 36)),
22
+ stage3=dict(num_modules=4,
23
+ num_branches=3,
24
+ block='BASIC',
25
+ num_blocks=(4, 4, 4),
26
+ num_channels=(18, 36, 72)),
27
+ stage4=dict(num_modules=3,
28
+ num_branches=4,
29
+ block='BASIC',
30
+ num_blocks=(4, 4, 4, 4),
31
+ num_channels=(18, 36, 72, 144),
32
+ multiscale_output=True),
33
+ upsample=dict(mode='bilinear',
34
+ align_corners=False))),
35
+ keypoint_head=dict(type='TopdownHeatmapSimpleHead',
36
+ in_channels=[18, 36, 72, 144],
37
+ in_index=(0, 1, 2, 3),
38
+ input_transform='resize_concat',
39
+ out_channels=channel_cfg['num_output_channels'],
40
+ num_deconv_layers=0,
41
+ extra=dict(final_conv_kernel=1,
42
+ num_conv_layers=1,
43
+ num_conv_kernels=(1, )),
44
+ loss_keypoint=dict(type='JointsMSELoss',
45
+ use_target_weight=True)),
46
+ test_cfg=dict(flip_test=True,
47
+ post_process='unbiased',
48
+ shift_heatmap=True,
49
+ modulate_kernel=11))
50
+
51
+ data_cfg = dict(image_size=[256, 256],
52
+ heatmap_size=[64, 64],
53
+ num_output_channels=channel_cfg['num_output_channels'],
54
+ num_joints=channel_cfg['dataset_joints'],
55
+ dataset_channel=channel_cfg['dataset_channel'],
56
+ inference_channel=channel_cfg['inference_channel'])
57
+
58
+ test_pipeline = [
59
+ dict(type='LoadImageFromFile'),
60
+ dict(type='TopDownAffine'),
61
+ dict(type='ToTensor'),
62
+ dict(type='NormalizeTensor',
63
+ mean=[0.485, 0.456, 0.406],
64
+ std=[0.229, 0.224, 0.225]),
65
+ dict(type='Collect',
66
+ keys=['img'],
67
+ meta_keys=['image_file', 'center', 'scale', 'rotation',
68
+ 'flip_pairs']),
69
+ ]
70
+
71
+ dataset_info = dict(dataset_name='anime_face',
72
+ paper_info=dict(),
73
+ keypoint_info={
74
+ 0:
75
+ dict(name='kpt-0',
76
+ id=0,
77
+ color=[255, 255, 255],
78
+ type='',
79
+ swap='kpt-4'),
80
+ 1:
81
+ dict(name='kpt-1',
82
+ id=1,
83
+ color=[255, 255, 255],
84
+ type='',
85
+ swap='kpt-3'),
86
+ 2:
87
+ dict(name='kpt-2',
88
+ id=2,
89
+ color=[255, 255, 255],
90
+ type='',
91
+ swap=''),
92
+ 3:
93
+ dict(name='kpt-3',
94
+ id=3,
95
+ color=[255, 255, 255],
96
+ type='',
97
+ swap='kpt-1'),
98
+ 4:
99
+ dict(name='kpt-4',
100
+ id=4,
101
+ color=[255, 255, 255],
102
+ type='',
103
+ swap='kpt-0'),
104
+ 5:
105
+ dict(name='kpt-5',
106
+ id=5,
107
+ color=[255, 255, 255],
108
+ type='',
109
+ swap='kpt-10'),
110
+ 6:
111
+ dict(name='kpt-6',
112
+ id=6,
113
+ color=[255, 255, 255],
114
+ type='',
115
+ swap='kpt-9'),
116
+ 7:
117
+ dict(name='kpt-7',
118
+ id=7,
119
+ color=[255, 255, 255],
120
+ type='',
121
+ swap='kpt-8'),
122
+ 8:
123
+ dict(name='kpt-8',
124
+ id=8,
125
+ color=[255, 255, 255],
126
+ type='',
127
+ swap='kpt-7'),
128
+ 9:
129
+ dict(name='kpt-9',
130
+ id=9,
131
+ color=[255, 255, 255],
132
+ type='',
133
+ swap='kpt-6'),
134
+ 10:
135
+ dict(name='kpt-10',
136
+ id=10,
137
+ color=[255, 255, 255],
138
+ type='',
139
+ swap='kpt-5'),
140
+ 11:
141
+ dict(name='kpt-11',
142
+ id=11,
143
+ color=[255, 255, 255],
144
+ type='',
145
+ swap='kpt-19'),
146
+ 12:
147
+ dict(name='kpt-12',
148
+ id=12,
149
+ color=[255, 255, 255],
150
+ type='',
151
+ swap='kpt-18'),
152
+ 13:
153
+ dict(name='kpt-13',
154
+ id=13,
155
+ color=[255, 255, 255],
156
+ type='',
157
+ swap='kpt-17'),
158
+ 14:
159
+ dict(name='kpt-14',
160
+ id=14,
161
+ color=[255, 255, 255],
162
+ type='',
163
+ swap='kpt-22'),
164
+ 15:
165
+ dict(name='kpt-15',
166
+ id=15,
167
+ color=[255, 255, 255],
168
+ type='',
169
+ swap='kpt-21'),
170
+ 16:
171
+ dict(name='kpt-16',
172
+ id=16,
173
+ color=[255, 255, 255],
174
+ type='',
175
+ swap='kpt-20'),
176
+ 17:
177
+ dict(name='kpt-17',
178
+ id=17,
179
+ color=[255, 255, 255],
180
+ type='',
181
+ swap='kpt-13'),
182
+ 18:
183
+ dict(name='kpt-18',
184
+ id=18,
185
+ color=[255, 255, 255],
186
+ type='',
187
+ swap='kpt-12'),
188
+ 19:
189
+ dict(name='kpt-19',
190
+ id=19,
191
+ color=[255, 255, 255],
192
+ type='',
193
+ swap='kpt-11'),
194
+ 20:
195
+ dict(name='kpt-20',
196
+ id=20,
197
+ color=[255, 255, 255],
198
+ type='',
199
+ swap='kpt-16'),
200
+ 21:
201
+ dict(name='kpt-21',
202
+ id=21,
203
+ color=[255, 255, 255],
204
+ type='',
205
+ swap='kpt-15'),
206
+ 22:
207
+ dict(name='kpt-22',
208
+ id=22,
209
+ color=[255, 255, 255],
210
+ type='',
211
+ swap='kpt-14'),
212
+ 23:
213
+ dict(name='kpt-23',
214
+ id=23,
215
+ color=[255, 255, 255],
216
+ type='',
217
+ swap=''),
218
+ 24:
219
+ dict(name='kpt-24',
220
+ id=24,
221
+ color=[255, 255, 255],
222
+ type='',
223
+ swap='kpt-26'),
224
+ 25:
225
+ dict(name='kpt-25',
226
+ id=25,
227
+ color=[255, 255, 255],
228
+ type='',
229
+ swap=''),
230
+ 26:
231
+ dict(name='kpt-26',
232
+ id=26,
233
+ color=[255, 255, 255],
234
+ type='',
235
+ swap='kpt-24'),
236
+ 27:
237
+ dict(name='kpt-27',
238
+ id=27,
239
+ color=[255, 255, 255],
240
+ type='',
241
+ swap='')
242
+ },
243
+ skeleton_info={},
244
+ joint_weights=[1.] * 28,
245
+ sigmas=[])
246
+
247
+ data = dict(test=dict(type='',
248
+ data_cfg=data_cfg,
249
+ pipeline=test_pipeline,
250
+ dataset_info=dataset_info), )
anime_face_detector/detector.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import pathlib
4
+ import warnings
5
+ from typing import Optional, Union
6
+
7
+ import cv2
8
+ import mmcv
9
+ import numpy as np
10
+ import torch.nn as nn
11
+ from mmdet.apis import inference_detector, init_detector
12
+ from mmpose.apis import inference_top_down_pose_model, init_pose_model
13
+ from mmpose.datasets import DatasetInfo
14
+
15
+
16
+ class LandmarkDetector:
17
+ def __init__(
18
+ self,
19
+ landmark_detector_config_or_path: Union[mmcv.Config, str,
20
+ pathlib.Path],
21
+ landmark_detector_checkpoint_path: Union[str, pathlib.Path],
22
+ face_detector_config_or_path: Optional[Union[mmcv.Config, str,
23
+ pathlib.Path]] = None,
24
+ face_detector_checkpoint_path: Optional[Union[
25
+ str, pathlib.Path]] = None,
26
+ device: str = 'cuda:0',
27
+ flip_test: bool = True,
28
+ box_scale_factor: float = 1.1):
29
+ landmark_config = self._load_config(landmark_detector_config_or_path)
30
+ self.dataset_info = DatasetInfo(
31
+ landmark_config.dataset_info) # type: ignore
32
+ face_detector_config = self._load_config(face_detector_config_or_path)
33
+
34
+ self.landmark_detector = self._init_pose_model(
35
+ landmark_config, landmark_detector_checkpoint_path, device,
36
+ flip_test)
37
+ self.face_detector = self._init_face_detector(
38
+ face_detector_config, face_detector_checkpoint_path, device)
39
+
40
+ self.box_scale_factor = box_scale_factor
41
+
42
+ @staticmethod
43
+ def _load_config(
44
+ config_or_path: Optional[Union[mmcv.Config, str, pathlib.Path]]
45
+ ) -> Optional[mmcv.Config]:
46
+ if config_or_path is None or isinstance(config_or_path, mmcv.Config):
47
+ return config_or_path
48
+ return mmcv.Config.fromfile(config_or_path)
49
+
50
+ @staticmethod
51
+ def _init_pose_model(config: mmcv.Config,
52
+ checkpoint_path: Union[str, pathlib.Path],
53
+ device: str, flip_test: bool) -> nn.Module:
54
+ if isinstance(checkpoint_path, pathlib.Path):
55
+ checkpoint_path = checkpoint_path.as_posix()
56
+ model = init_pose_model(config, checkpoint_path, device=device)
57
+ model.cfg.model.test_cfg.flip_test = flip_test
58
+ return model
59
+
60
+ @staticmethod
61
+ def _init_face_detector(config: Optional[mmcv.Config],
62
+ checkpoint_path: Optional[Union[str,
63
+ pathlib.Path]],
64
+ device: str) -> Optional[nn.Module]:
65
+ if config is not None:
66
+ if isinstance(checkpoint_path, pathlib.Path):
67
+ checkpoint_path = checkpoint_path.as_posix()
68
+ model = init_detector(config, checkpoint_path, device=device)
69
+ else:
70
+ model = None
71
+ return model
72
+
73
+ def _detect_faces(self, image: np.ndarray) -> list[np.ndarray]:
74
+ # predicted boxes using mmdet model have the format of
75
+ # [x0, y0, x1, y1, score]
76
+ boxes = inference_detector(self.face_detector, image)[0]
77
+ # scale boxes by `self.box_scale_factor`
78
+ boxes = self._update_pred_box(boxes)
79
+ return boxes
80
+
81
+ def _update_pred_box(self, pred_boxes: np.ndarray) -> list[np.ndarray]:
82
+ boxes = []
83
+ for pred_box in pred_boxes:
84
+ box = pred_box[:4]
85
+ size = box[2:] - box[:2] + 1
86
+ new_size = size * self.box_scale_factor
87
+ center = (box[:2] + box[2:]) / 2
88
+ tl = center - new_size / 2
89
+ br = tl + new_size
90
+ pred_box[:4] = np.concatenate([tl, br])
91
+ boxes.append(pred_box)
92
+ return boxes
93
+
94
+ def _detect_landmarks(
95
+ self, image: np.ndarray,
96
+ boxes: list[dict[str, np.ndarray]]) -> list[dict[str, np.ndarray]]:
97
+ preds, _ = inference_top_down_pose_model(
98
+ self.landmark_detector,
99
+ image,
100
+ boxes,
101
+ format='xyxy',
102
+ dataset_info=self.dataset_info,
103
+ return_heatmap=False)
104
+ return preds
105
+
106
+ @staticmethod
107
+ def _load_image(
108
+ image_or_path: Union[np.ndarray, str, pathlib.Path]) -> np.ndarray:
109
+ if isinstance(image_or_path, np.ndarray):
110
+ image = image_or_path
111
+ elif isinstance(image_or_path, str):
112
+ image = cv2.imread(image_or_path)
113
+ elif isinstance(image_or_path, pathlib.Path):
114
+ image = cv2.imread(image_or_path.as_posix())
115
+ else:
116
+ raise ValueError
117
+ return image
118
+
119
+ def __call__(
120
+ self,
121
+ image_or_path: Union[np.ndarray, str, pathlib.Path],
122
+ boxes: Optional[list[np.ndarray]] = None
123
+ ) -> list[dict[str, np.ndarray]]:
124
+ """Detect face landmarks.
125
+
126
+ Args:
127
+ image_or_path: An image with BGR channel order or an image path.
128
+ boxes: A list of bounding boxes for faces. Each bounding box
129
+ should be of the form [x0, y0, x1, y1, [score]].
130
+
131
+ Returns: A list of detection results. Each detection result has
132
+ bounding box of the form [x0, y0, x1, y1, [score]], and landmarks
133
+ of the form [x, y, score].
134
+ """
135
+ image = self._load_image(image_or_path)
136
+ if boxes is None:
137
+ if self.face_detector is not None:
138
+ boxes = self._detect_faces(image)
139
+ else:
140
+ warnings.warn(
141
+ 'Neither the face detector nor the bounding box is '
142
+ 'specified. So the entire image is treated as the face '
143
+ 'region.')
144
+ h, w = image.shape[:2]
145
+ boxes = [np.array([0, 0, w - 1, h - 1, 1])]
146
+ box_list = [{'bbox': box} for box in boxes]
147
+ return self._detect_landmarks(image, box_list)
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import streamlit as st
3
+ import main
4
+
5
+ f = st.file_uploader('Source', ['png', 'jpg', 'jpeg'], False, help=".png, .jpeg, .jpg 파일만 지원됩니다.")
6
+
7
+ if f is not None:
8
+ img = BytesIO(f.read())
9
+ img.seek(0)
10
+ resultbytes = main.generate(img)
11
+ result = BytesIO(resultbytes)
12
+ result.seek(0)
13
+ st.image(result, caption="Generated Image")
14
+
15
+ st.markdown("by [이재희](https://github.com/ij5)")
gg.png ADDED
main.ipynb ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import cv2\n",
10
+ "from anime_face_detector import create_detector\n",
11
+ "from PIL import Image, ImageDraw, ImageFont\n",
12
+ "import matplotlib.pyplot as plt\n",
13
+ "import numpy as np\n",
14
+ "import math"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "def get_deg(arr):\n",
24
+ " rad = math.atan2(arr[3]-arr[1],arr[2]-arr[0])\n",
25
+ " PI = math.pi\n",
26
+ " deg = (rad*180)/PI\n",
27
+ " return deg"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "detector = create_detector('yolov3', device='cpu')"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 73,
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "# gg = Image.open('gg.png')\n",
46
+ "# ggdraw = ImageDraw.Draw(gg)\n",
47
+ "# ggdraw.rectangle((5,5,gg.width-5,gg.height-5), outline=(255, 0,0), width=5)\n",
48
+ "gg = cv2.imread('gg.png', cv2.IMREAD_UNCHANGED)\n",
49
+ "gg = cv2.cvtColor(gg, cv2.COLOR_BGRA2RGBA)"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 168,
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "name": "stdout",
59
+ "output_type": "stream",
60
+ "text": [
61
+ "(604, 1074, 3)\n"
62
+ ]
63
+ }
64
+ ],
65
+ "source": [
66
+ "img = cv2.imread('test.png')\n",
67
+ "preds = detector(img)\n",
68
+ "img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "\n",
78
+ "# image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))\n",
79
+ "# draw = ImageDraw.Draw(image)\n",
80
+ "\n",
81
+ "# for face in preds:\n",
82
+ "# draw.rectangle((face['bbox'][0], face['bbox'][1], face['bbox'][2], face['bbox'][3]), outline=(255, 0, 0), width=5)\n",
83
+ "# x = face['bbox'][0]\n",
84
+ "# y = face['bbox'][1]\n",
85
+ "# for i, point in enumerate(face['keypoints']):\n",
86
+ "# # draw.ellipse((point[0]-2, point[1]-2, point[0]+2, point[1]+2), fill=(255, 0, 0))\n",
87
+ "# draw.text((point[0], point[1]), str(i), font=ImageFont.truetype('arial.ttf', 10), fill=(255, 0, 0))\n",
88
+ "\n",
89
+ "for face in preds:\n",
90
+ " points = face['keypoints']\n",
91
+ " color = img[int(points[27][1]), int(points[27][0])+10]\n",
92
+ " polygon = np.array([\n",
93
+ " [points[0][0], points[0][1]],\n",
94
+ " [points[1][0], points[1][1]],\n",
95
+ " [points[2][0], points[2][1]],\n",
96
+ " [points[3][0], points[4][1]],\n",
97
+ " [points[4][0], points[4][1]],\n",
98
+ " [points[10][0], points[10][1]],\n",
99
+ " [points[9][0], points[9][1]],\n",
100
+ " [points[8][0], points[8][1]],\n",
101
+ " [points[7][0], points[7][1]],\n",
102
+ " [points[6][0], points[6][1]],\n",
103
+ " [points[5][0], points[5][1]]\n",
104
+ " ], np.int32)\n",
105
+ " cv2.fillConvexPoly(img, polygon, color=(int(color[0]), int(color[1]), int(color[2]), 255))\n",
106
+ " deg = get_deg([points[0][0], points[0][1], points[4][0], points[4][1]])\n",
107
+ " rotated = gg.copy()\n",
108
+ " resize = math.sqrt((points[10][0] - points[5][0])**2 + (points[10][1] - points[5][1])**2)\n",
109
+ " rotated = cv2.resize(rotated, (int(resize), int(resize*1.12)))\n",
110
+ " matrix = cv2.getPerspectiveTransform(\n",
111
+ " np.float32([[0, 0], [rotated.shape[0],0], [0, rotated.shape[1]], [rotated.shape[0],rotated.shape[1]]]),\n",
112
+ " np.float32([[points[5][0], points[5][1]], [points[10][0], points[10][1]], [points[1][0], points[1][1]], [points[3][0], points[3][1]]]))\n",
113
+ " rotated = cv2.warpPerspective(rotated, matrix, (img.shape[1], img.shape[0]))\n",
114
+ "\n",
115
+ " alpha = rotated[:, :, 3] / 255.\n",
116
+ " for i in range(3):\n",
117
+ " w, h = img.shape[:2]\n",
118
+ " rw, rh = rotated.shape[:2]\n",
119
+ " pointx, pointy = points[5][:2]\n",
120
+ " pointx, pointy = int(pointx), int(pointy)\n",
121
+ " img[:, :, i] = (1. - alpha) * img[0:, 0:, i] + alpha * rotated[:, :, i]\n",
122
+ " "
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "plt.imshow(np.asarray(img))\n",
132
+ "cv2.imwrite('result.png', cv2.cvtColor(img, cv2.COLOR_RGBA2BGR))"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": null,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": []
141
+ }
142
+ ],
143
+ "metadata": {
144
+ "kernelspec": {
145
+ "display_name": "Python 3.10.4 ('bot')",
146
+ "language": "python",
147
+ "name": "python3"
148
+ },
149
+ "language_info": {
150
+ "codemirror_mode": {
151
+ "name": "ipython",
152
+ "version": 3
153
+ },
154
+ "file_extension": ".py",
155
+ "mimetype": "text/x-python",
156
+ "name": "python",
157
+ "nbconvert_exporter": "python",
158
+ "pygments_lexer": "ipython3",
159
+ "version": "3.10.4"
160
+ },
161
+ "orig_nbformat": 4,
162
+ "vscode": {
163
+ "interpreter": {
164
+ "hash": "1230fa9187aff02e6ebfc79b73c9c8422b1bc4886baebd37c443f3278ff8d769"
165
+ }
166
+ }
167
+ },
168
+ "nbformat": 4,
169
+ "nbformat_minor": 2
170
+ }
main.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO, StringIO
2
+ import cv2
3
+ from anime_face_detector import create_detector
4
+ from werkzeug.wsgi import FileWrapper
5
+ from flask import Flask, request, Response, send_file
6
+ import math
7
+ import numpy as np
8
+
9
+ def get_deg(arr):
10
+ rad = math.atan2(arr[3]-arr[1],arr[2]-arr[0])
11
+ PI = math.pi
12
+ deg = (rad*180)/PI
13
+ return deg
14
+
15
+ detector = create_detector('yolov3', device='cpu')
16
+
17
+ gg = cv2.imread('gg.png', cv2.IMREAD_UNCHANGED)
18
+ gg = cv2.cvtColor(gg, cv2.COLOR_BGRA2RGBA)
19
+
20
+ def generate(image_file: BytesIO) -> bytes:
21
+ encoded = np.asarray(bytearray(image_file.read()), dtype=np.uint8)
22
+ img = cv2.imdecode(encoded, cv2.IMREAD_COLOR)
23
+ preds = detector(img)
24
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
25
+
26
+ # for face in preds:
27
+ # draw.rectangle((face['bbox'][0], face['bbox'][1], face['bbox'][2], face['bbox'][3]), outline=(255, 0, 0), width=5)
28
+ # x = face['bbox'][0]
29
+ # y = face['bbox'][1]
30
+ # for i, point in enumerate(face['keypoints']):
31
+ # # draw.ellipse((point[0]-2, point[1]-2, point[0]+2, point[1]+2), fill=(255, 0, 0))
32
+ # draw.text((point[0], point[1]), str(i), font=ImageFont.truetype('arial.ttf', 10), fill=(255, 0, 0))
33
+
34
+ if len(preds) == 0:
35
+ return False
36
+
37
+ for face in preds:
38
+ points = face['keypoints']
39
+ color = img[int(points[27][1]), int(points[27][0])+10]
40
+ polygon = np.array([
41
+ [points[0][0], points[0][1]],
42
+ [points[1][0], points[1][1]],
43
+ [points[2][0], points[2][1]],
44
+ [points[3][0], points[4][1]],
45
+ [points[4][0], points[4][1]],
46
+ [points[10][0], points[10][1]],
47
+ [points[9][0], points[9][1]],
48
+ [points[8][0], points[8][1]],
49
+ [points[7][0], points[7][1]],
50
+ [points[6][0], points[6][1]],
51
+ [points[5][0], points[5][1]]
52
+ ], np.int32)
53
+ cv2.fillConvexPoly(img, polygon, color=(int(color[0]), int(color[1]), int(color[2]), 255))
54
+ deg = get_deg([points[0][0], points[0][1], points[4][0], points[4][1]])
55
+ rotated = gg.copy()
56
+ resize = math.sqrt((points[10][0] - points[5][0])**2 + (points[10][1] - points[5][1])**2)
57
+ rotated = cv2.resize(rotated, (int(resize), int(resize*1.12)))
58
+ matrix = cv2.getPerspectiveTransform(
59
+ np.float32([[0, 0], [rotated.shape[0],0], [0, rotated.shape[1]], [rotated.shape[0],rotated.shape[1]]]),
60
+ np.float32([[points[5][0], points[5][1]], [points[10][0], points[10][1]], [points[1][0], points[1][1]], [points[3][0], points[3][1]]]))
61
+ rotated = cv2.warpPerspective(rotated, matrix, (img.shape[1], img.shape[0]))
62
+
63
+ alpha = rotated[:, :, 3] / 255.
64
+ for i in range(3):
65
+ pointx, pointy = points[5][:2]
66
+ pointx, pointy = int(pointx), int(pointy)
67
+ img[:, :, i] = (1. - alpha) * img[0:, 0:, i] + alpha * rotated[:, :, i]
68
+
69
+
70
+
71
+ buffer = cv2.imencode('.png', cv2.cvtColor(img, cv2.COLOR_RGBA2BGRA))[1]
72
+
73
+ return buffer.tobytes()
74
+
75
+
76
+ app = Flask(__name__)
77
+
78
+ @app.post('/generate')
79
+ def index():
80
+ if request.files.get('file') is None:
81
+ return "no file", 400
82
+ file = request.files.get('file')
83
+ dst = BytesIO()
84
+ file.save(dst)
85
+ dst.seek(0)
86
+ result = generate(dst)
87
+ if not result:
88
+ return {"status": 400}, 400
89
+ return Response(result, mimetype='image/png', direct_passthrough=True)
90
+
91
+ if __name__ == "__main__":
92
+ app.run("0.0.0.0", 8080, debug=False)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libgl1
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ opencv-python-headless
2
+ pillow
3
+ flask
4
+ numpy
5
+ mmdet
6
+ mmpose
7
+ git+https://github.com/open-mmlab/mmcv
8
+ torch
9
+ torchvision