hank1996 commited on
Commit
de444ec
·
1 Parent(s): 6104520

Create new file

Browse files
Files changed (1) hide show
  1. lib/models/YOLOP.py +552 -0
lib/models/YOLOP.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import tensor
3
+ import torch.nn as nn
4
+ import sys,os
5
+ import math
6
+ import sys
7
+ sys.path.append(os.getcwd())
8
+ #sys.path.append("lib/models")
9
+ #sys.path.append("lib/utils")
10
+ #sys.path.append("/workspace/wh/projects/DaChuang")
11
+ from lib.utils import initialize_weights
12
+ # from lib.models.common2 import DepthSeperabelConv2d as Conv
13
+ # from lib.models.common2 import SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect
14
+ from lib.models.common import Conv, SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect, SharpenConv
15
+ from torch.nn import Upsample
16
+ from lib.utils import check_anchor_order
17
+ from lib.core.evaluate import SegmentationMetric
18
+ from lib.utils.utils import time_synchronized
19
+
20
+ """
21
+ MCnet_SPP = [
22
+ [ -1, Focus, [3, 32, 3]],
23
+ [ -1, Conv, [32, 64, 3, 2]],
24
+ [ -1, BottleneckCSP, [64, 64, 1]],
25
+ [ -1, Conv, [64, 128, 3, 2]],
26
+ [ -1, BottleneckCSP, [128, 128, 3]],
27
+ [ -1, Conv, [128, 256, 3, 2]],
28
+ [ -1, BottleneckCSP, [256, 256, 3]],
29
+ [ -1, Conv, [256, 512, 3, 2]],
30
+ [ -1, SPP, [512, 512, [5, 9, 13]]],
31
+ [ -1, BottleneckCSP, [512, 512, 1, False]],
32
+ [ -1, Conv,[512, 256, 1, 1]],
33
+ [ -1, Upsample, [None, 2, 'nearest']],
34
+ [ [-1, 6], Concat, [1]],
35
+ [ -1, BottleneckCSP, [512, 256, 1, False]],
36
+ [ -1, Conv, [256, 128, 1, 1]],
37
+ [ -1, Upsample, [None, 2, 'nearest']],
38
+ [ [-1,4], Concat, [1]],
39
+ [ -1, BottleneckCSP, [256, 128, 1, False]],
40
+ [ -1, Conv, [128, 128, 3, 2]],
41
+ [ [-1, 14], Concat, [1]],
42
+ [ -1, BottleneckCSP, [256, 256, 1, False]],
43
+ [ -1, Conv, [256, 256, 3, 2]],
44
+ [ [-1, 10], Concat, [1]],
45
+ [ -1, BottleneckCSP, [512, 512, 1, False]],
46
+ # [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
47
+ [ [17, 20, 23], Detect, [13, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
48
+ [ 17, Conv, [128, 64, 3, 1]],
49
+ [ -1, Upsample, [None, 2, 'nearest']],
50
+ [ [-1,2], Concat, [1]],
51
+ [ -1, BottleneckCSP, [128, 64, 1, False]],
52
+ [ -1, Conv, [64, 32, 3, 1]],
53
+ [ -1, Upsample, [None, 2, 'nearest']],
54
+ [ -1, Conv, [32, 16, 3, 1]],
55
+ [ -1, BottleneckCSP, [16, 8, 1, False]],
56
+ [ -1, Upsample, [None, 2, 'nearest']],
57
+ [ -1, SPP, [8, 2, [5, 9, 13]]] #segmentation output
58
+ ]
59
+ # [2,6,3,9,5,13], [7,19,11,26,17,39], [28,64,44,103,61,183]
60
+ MCnet_0 = [
61
+ [ -1, Focus, [3, 32, 3]],
62
+ [ -1, Conv, [32, 64, 3, 2]],
63
+ [ -1, BottleneckCSP, [64, 64, 1]],
64
+ [ -1, Conv, [64, 128, 3, 2]],
65
+ [ -1, BottleneckCSP, [128, 128, 3]],
66
+ [ -1, Conv, [128, 256, 3, 2]],
67
+ [ -1, BottleneckCSP, [256, 256, 3]],
68
+ [ -1, Conv, [256, 512, 3, 2]],
69
+ [ -1, SPP, [512, 512, [5, 9, 13]]],
70
+ [ -1, BottleneckCSP, [512, 512, 1, False]],
71
+ [ -1, Conv,[512, 256, 1, 1]],
72
+ [ -1, Upsample, [None, 2, 'nearest']],
73
+ [ [-1, 6], Concat, [1]],
74
+ [ -1, BottleneckCSP, [512, 256, 1, False]],
75
+ [ -1, Conv, [256, 128, 1, 1]],
76
+ [ -1, Upsample, [None, 2, 'nearest']],
77
+ [ [-1,4], Concat, [1]],
78
+ [ -1, BottleneckCSP, [256, 128, 1, False]],
79
+ [ -1, Conv, [128, 128, 3, 2]],
80
+ [ [-1, 14], Concat, [1]],
81
+ [ -1, BottleneckCSP, [256, 256, 1, False]],
82
+ [ -1, Conv, [256, 256, 3, 2]],
83
+ [ [-1, 10], Concat, [1]],
84
+ [ -1, BottleneckCSP, [512, 512, 1, False]],
85
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
86
+ [ 16, Conv, [128, 64, 3, 1]],
87
+ [ -1, Upsample, [None, 2, 'nearest']],
88
+ [ [-1,2], Concat, [1]],
89
+ [ -1, BottleneckCSP, [128, 64, 1, False]],
90
+ [ -1, Conv, [64, 32, 3, 1]],
91
+ [ -1, Upsample, [None, 2, 'nearest']],
92
+ [ -1, Conv, [32, 16, 3, 1]],
93
+ [ -1, BottleneckCSP, [16, 8, 1, False]],
94
+ [ -1, Upsample, [None, 2, 'nearest']],
95
+ [ -1, Conv, [8, 2, 3, 1]], #Driving area segmentation output
96
+ [ 16, Conv, [128, 64, 3, 1]],
97
+ [ -1, Upsample, [None, 2, 'nearest']],
98
+ [ [-1,2], Concat, [1]],
99
+ [ -1, BottleneckCSP, [128, 64, 1, False]],
100
+ [ -1, Conv, [64, 32, 3, 1]],
101
+ [ -1, Upsample, [None, 2, 'nearest']],
102
+ [ -1, Conv, [32, 16, 3, 1]],
103
+ [ -1, BottleneckCSP, [16, 8, 1, False]],
104
+ [ -1, Upsample, [None, 2, 'nearest']],
105
+ [ -1, Conv, [8, 2, 3, 1]], #Lane line segmentation output
106
+ ]
107
+ # The lane line and the driving area segment branches share information with each other
108
+ MCnet_share = [
109
+ [ -1, Focus, [3, 32, 3]], #0
110
+ [ -1, Conv, [32, 64, 3, 2]], #1
111
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
112
+ [ -1, Conv, [64, 128, 3, 2]], #3
113
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
114
+ [ -1, Conv, [128, 256, 3, 2]], #5
115
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
116
+ [ -1, Conv, [256, 512, 3, 2]], #7
117
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
118
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
119
+ [ -1, Conv,[512, 256, 1, 1]], #10
120
+ [ -1, Upsample, [None, 2, 'nearest']], #11
121
+ [ [-1, 6], Concat, [1]], #12
122
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
123
+ [ -1, Conv, [256, 128, 1, 1]], #14
124
+ [ -1, Upsample, [None, 2, 'nearest']], #15
125
+ [ [-1,4], Concat, [1]], #16
126
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #17
127
+ [ -1, Conv, [128, 128, 3, 2]], #18
128
+ [ [-1, 14], Concat, [1]], #19
129
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #20
130
+ [ -1, Conv, [256, 256, 3, 2]], #21
131
+ [ [-1, 10], Concat, [1]], #22
132
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #23
133
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
134
+ [ 16, Conv, [256, 64, 3, 1]], #25
135
+ [ -1, Upsample, [None, 2, 'nearest']], #26
136
+ [ [-1,2], Concat, [1]], #27
137
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #28
138
+ [ -1, Conv, [64, 32, 3, 1]], #29
139
+ [ -1, Upsample, [None, 2, 'nearest']], #30
140
+ [ -1, Conv, [32, 16, 3, 1]], #31
141
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
142
+ [ 16, Conv, [256, 64, 3, 1]], #33
143
+ [ -1, Upsample, [None, 2, 'nearest']], #34
144
+ [ [-1,2], Concat, [1]], #35
145
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #36
146
+ [ -1, Conv, [64, 32, 3, 1]], #37
147
+ [ -1, Upsample, [None, 2, 'nearest']], #38
148
+ [ -1, Conv, [32, 16, 3, 1]], #39
149
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #40 lane line segment neck
150
+ [ [31,39], Concat, [1]], #41
151
+ [ -1, Conv, [32, 8, 3, 1]], #42 Share_Block
152
+ [ [32,42], Concat, [1]], #43
153
+ [ -1, Upsample, [None, 2, 'nearest']], #44
154
+ [ -1, Conv, [16, 2, 3, 1]], #45 Driving area segmentation output
155
+ [ [40,42], Concat, [1]], #46
156
+ [ -1, Upsample, [None, 2, 'nearest']], #47
157
+ [ -1, Conv, [16, 2, 3, 1]] #48Lane line segmentation output
158
+ ]
159
+ # The lane line and the driving area segment branches without share information with each other
160
+ MCnet_no_share = [
161
+ [ -1, Focus, [3, 32, 3]], #0
162
+ [ -1, Conv, [32, 64, 3, 2]], #1
163
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
164
+ [ -1, Conv, [64, 128, 3, 2]], #3
165
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
166
+ [ -1, Conv, [128, 256, 3, 2]], #5
167
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
168
+ [ -1, Conv, [256, 512, 3, 2]], #7
169
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
170
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
171
+ [ -1, Conv,[512, 256, 1, 1]], #10
172
+ [ -1, Upsample, [None, 2, 'nearest']], #11
173
+ [ [-1, 6], Concat, [1]], #12
174
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
175
+ [ -1, Conv, [256, 128, 1, 1]], #14
176
+ [ -1, Upsample, [None, 2, 'nearest']], #15
177
+ [ [-1,4], Concat, [1]], #16
178
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #17
179
+ [ -1, Conv, [128, 128, 3, 2]], #18
180
+ [ [-1, 14], Concat, [1]], #19
181
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #20
182
+ [ -1, Conv, [256, 256, 3, 2]], #21
183
+ [ [-1, 10], Concat, [1]], #22
184
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #23
185
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
186
+ [ 16, Conv, [256, 64, 3, 1]], #25
187
+ [ -1, Upsample, [None, 2, 'nearest']], #26
188
+ [ [-1,2], Concat, [1]], #27
189
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #28
190
+ [ -1, Conv, [64, 32, 3, 1]], #29
191
+ [ -1, Upsample, [None, 2, 'nearest']], #30
192
+ [ -1, Conv, [32, 16, 3, 1]], #31
193
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
194
+ [ -1, Upsample, [None, 2, 'nearest']], #33
195
+ [ -1, Conv, [8, 3, 3, 1]], #34 Driving area segmentation output
196
+ [ 16, Conv, [256, 64, 3, 1]], #35
197
+ [ -1, Upsample, [None, 2, 'nearest']], #36
198
+ [ [-1,2], Concat, [1]], #37
199
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #38
200
+ [ -1, Conv, [64, 32, 3, 1]], #39
201
+ [ -1, Upsample, [None, 2, 'nearest']], #40
202
+ [ -1, Conv, [32, 16, 3, 1]], #41
203
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #42 lane line segment neck
204
+ [ -1, Upsample, [None, 2, 'nearest']], #43
205
+ [ -1, Conv, [8, 2, 3, 1]] #44 Lane line segmentation output
206
+ ]
207
+ MCnet_feedback = [
208
+ [ -1, Focus, [3, 32, 3]], #0
209
+ [ -1, Conv, [32, 64, 3, 2]], #1
210
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
211
+ [ -1, Conv, [64, 128, 3, 2]], #3
212
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
213
+ [ -1, Conv, [128, 256, 3, 2]], #5
214
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
215
+ [ -1, Conv, [256, 512, 3, 2]], #7
216
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
217
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
218
+ [ -1, Conv,[512, 256, 1, 1]], #10
219
+ [ -1, Upsample, [None, 2, 'nearest']], #11
220
+ [ [-1, 6], Concat, [1]], #12
221
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
222
+ [ -1, Conv, [256, 128, 1, 1]], #14
223
+ [ -1, Upsample, [None, 2, 'nearest']], #15
224
+ [ [-1,4], Concat, [1]], #16
225
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #17
226
+ [ -1, Conv, [128, 128, 3, 2]], #18
227
+ [ [-1, 14], Concat, [1]], #19
228
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #20
229
+ [ -1, Conv, [256, 256, 3, 2]], #21
230
+ [ [-1, 10], Concat, [1]], #22
231
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #23
232
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
233
+ [ 16, Conv, [256, 128, 3, 1]], #25
234
+ [ -1, Upsample, [None, 2, 'nearest']], #26
235
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #28
236
+ [ -1, Conv, [64, 32, 3, 1]], #29
237
+ [ -1, Upsample, [None, 2, 'nearest']], #30
238
+ [ -1, Conv, [32, 16, 3, 1]], #31
239
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
240
+ [ -1, Upsample, [None, 2, 'nearest']], #33
241
+ [ -1, Conv, [8, 2, 3, 1]], #34 Driving area segmentation output
242
+ [ 16, Conv, [256, 128, 3, 1]], #35
243
+ [ -1, Upsample, [None, 2, 'nearest']], #36
244
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #38
245
+ [ -1, Conv, [64, 32, 3, 1]], #39
246
+ [ -1, Upsample, [None, 2, 'nearest']], #40
247
+ [ -1, Conv, [32, 16, 3, 1]], #41
248
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #42 lane line segment neck
249
+ [ -1, Upsample, [None, 2, 'nearest']], #43
250
+ [ -1, Conv, [8, 2, 3, 1]] #44 Lane line segmentation output
251
+ ]
252
+ MCnet_Da_feedback1 = [
253
+ [46, 26, 35], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
254
+ [ -1, Focus, [3, 32, 3]], #0
255
+ [ -1, Conv, [32, 64, 3, 2]], #1
256
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
257
+ [ -1, Conv, [64, 128, 3, 2]], #3
258
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
259
+ [ -1, Conv, [128, 256, 3, 2]], #5
260
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
261
+ [ -1, Conv, [256, 512, 3, 2]], #7
262
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
263
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
264
+ [ -1, Conv,[512, 256, 1, 1]], #10
265
+ [ -1, Upsample, [None, 2, 'nearest']], #11
266
+ [ [-1, 6], Concat, [1]], #12
267
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
268
+ [ -1, Conv, [256, 128, 1, 1]], #14
269
+ [ -1, Upsample, [None, 2, 'nearest']], #15
270
+ [ [-1,4], Concat, [1]], #16 backbone+fpn
271
+ [ -1,Conv,[256,256,1,1]], #17
272
+ [ 16, Conv, [256, 128, 3, 1]], #18
273
+ [ -1, Upsample, [None, 2, 'nearest']], #19
274
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #20
275
+ [ -1, Conv, [64, 32, 3, 1]], #21
276
+ [ -1, Upsample, [None, 2, 'nearest']], #22
277
+ [ -1, Conv, [32, 16, 3, 1]], #23
278
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #24 driving area segment neck
279
+ [ -1, Upsample, [None, 2, 'nearest']], #25
280
+ [ -1, Conv, [8, 2, 3, 1]], #26 Driving area segmentation output
281
+ [ 16, Conv, [256, 128, 3, 1]], #27
282
+ [ -1, Upsample, [None, 2, 'nearest']], #28
283
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #29
284
+ [ -1, Conv, [64, 32, 3, 1]], #30
285
+ [ -1, Upsample, [None, 2, 'nearest']], #31
286
+ [ -1, Conv, [32, 16, 3, 1]], #32
287
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #33 lane line segment neck
288
+ [ -1, Upsample, [None, 2, 'nearest']], #34
289
+ [ -1, Conv, [8, 2, 3, 1]], #35Lane line segmentation output
290
+ [ 23, Conv, [16, 16, 3, 2]], #36
291
+ [ -1, Conv, [16, 32, 3, 2]], #2 times 2xdownsample 37
292
+ [ [-1,17], Concat, [1]], #38
293
+ [ -1, BottleneckCSP, [288, 128, 1, False]], #39
294
+ [ -1, Conv, [128, 128, 3, 2]], #40
295
+ [ [-1, 14], Concat, [1]], #41
296
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #42
297
+ [ -1, Conv, [256, 256, 3, 2]], #43
298
+ [ [-1, 10], Concat, [1]], #44
299
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #45
300
+ [ [39, 42, 45], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]] #Detect output 46
301
+ ]
302
+ # The lane line and the driving area segment branches share information with each other and feedback to det_head
303
+ MCnet_Da_feedback2 = [
304
+ [47, 26, 35], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
305
+ [25, 28, 31, 33], #layer in Da_branch to do SAD
306
+ [34, 37, 40, 42], #layer in LL_branch to do SAD
307
+ [ -1, Focus, [3, 32, 3]], #0
308
+ [ -1, Conv, [32, 64, 3, 2]], #1
309
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
310
+ [ -1, Conv, [64, 128, 3, 2]], #3
311
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
312
+ [ -1, Conv, [128, 256, 3, 2]], #5
313
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
314
+ [ -1, Conv, [256, 512, 3, 2]], #7
315
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
316
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
317
+ [ -1, Conv,[512, 256, 1, 1]], #10
318
+ [ -1, Upsample, [None, 2, 'nearest']], #11
319
+ [ [-1, 6], Concat, [1]], #12
320
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
321
+ [ -1, Conv, [256, 128, 1, 1]], #14
322
+ [ -1, Upsample, [None, 2, 'nearest']], #15
323
+ [ [-1,4], Concat, [1]], #16 backbone+fpn
324
+ [ -1,Conv,[256,256,1,1]], #17
325
+ [ 16, Conv, [256, 128, 3, 1]], #18
326
+ [ -1, Upsample, [None, 2, 'nearest']], #19
327
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #20
328
+ [ -1, Conv, [64, 32, 3, 1]], #21
329
+ [ -1, Upsample, [None, 2, 'nearest']], #22
330
+ [ -1, Conv, [32, 16, 3, 1]], #23
331
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #24 driving area segment neck
332
+ [ -1, Upsample, [None, 2, 'nearest']], #25
333
+ [ -1, Conv, [8, 2, 3, 1]], #26 Driving area segmentation output
334
+ [ 16, Conv, [256, 128, 3, 1]], #27
335
+ [ -1, Upsample, [None, 2, 'nearest']], #28
336
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #29
337
+ [ -1, Conv, [64, 32, 3, 1]], #30
338
+ [ -1, Upsample, [None, 2, 'nearest']], #31
339
+ [ -1, Conv, [32, 16, 3, 1]], #32
340
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #33 lane line segment neck
341
+ [ -1, Upsample, [None, 2, 'nearest']], #34
342
+ [ -1, Conv, [8, 2, 3, 1]], #35Lane line segmentation output
343
+ [ 23, Conv, [16, 64, 3, 2]], #36
344
+ [ -1, Conv, [64, 256, 3, 2]], #2 times 2xdownsample 37
345
+ [ [-1,17], Concat, [1]], #38
346
+ [-1, Conv, [512, 256, 3, 1]], #39
347
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #40
348
+ [ -1, Conv, [128, 128, 3, 2]], #41
349
+ [ [-1, 14], Concat, [1]], #42
350
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #43
351
+ [ -1, Conv, [256, 256, 3, 2]], #44
352
+ [ [-1, 10], Concat, [1]], #45
353
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #46
354
+ [ [40, 42, 45], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]] #Detect output 47
355
+ ]
356
+ MCnet_share1 = [
357
+ [24, 33, 45], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
358
+ [25, 28, 31, 33], #layer in Da_branch to do SAD
359
+ [34, 37, 40, 42], #layer in LL_branch to do SAD
360
+ [ -1, Focus, [3, 32, 3]], #0
361
+ [ -1, Conv, [32, 64, 3, 2]], #1
362
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
363
+ [ -1, Conv, [64, 128, 3, 2]], #3
364
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
365
+ [ -1, Conv, [128, 256, 3, 2]], #5
366
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
367
+ [ -1, Conv, [256, 512, 3, 2]], #7
368
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
369
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
370
+ [ -1, Conv,[512, 256, 1, 1]], #10
371
+ [ -1, Upsample, [None, 2, 'nearest']], #11
372
+ [ [-1, 6], Concat, [1]], #12
373
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
374
+ [ -1, Conv, [256, 128, 1, 1]], #14
375
+ [ -1, Upsample, [None, 2, 'nearest']], #15
376
+ [ [-1,4], Concat, [1]], #16
377
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #17
378
+ [ -1, Conv, [128, 128, 3, 2]], #18
379
+ [ [-1, 14], Concat, [1]], #19
380
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #20
381
+ [ -1, Conv, [256, 256, 3, 2]], #21
382
+ [ [-1, 10], Concat, [1]], #22
383
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #23
384
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
385
+ [ 16, Conv, [256, 128, 3, 1]], #25
386
+ [ -1, Upsample, [None, 2, 'nearest']], #26
387
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #27
388
+ [ -1, Conv, [64, 32, 3, 1]], #28
389
+ [ -1, Upsample, [None, 2, 'nearest']], #29
390
+ [ -1, Conv, [32, 16, 3, 1]], #30
391
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #31 driving area segment neck
392
+ [ -1, Upsample, [None, 2, 'nearest']], #32
393
+ [ -1, Conv, [8, 2, 3, 1]], #33 Driving area segmentation output
394
+ [ 16, Conv, [256, 128, 3, 1]], #34
395
+ [ -1, Upsample, [None, 2, 'nearest']], #35
396
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #36
397
+ [ -1, Conv, [64, 32, 3, 1]], #37
398
+ [ -1, Upsample, [None, 2, 'nearest']], #38
399
+ [ -1, Conv, [32, 16, 3, 1]], #39
400
+ [ 30, SharpenConv, [16,16, 3, 1]], #40
401
+ [ -1, Conv, [16, 16, 3, 1]], #41
402
+ [ [-1, 39], Concat, [1]], #42
403
+ [ -1, BottleneckCSP, [32, 8, 1, False]], #43 lane line segment neck
404
+ [ -1, Upsample, [None, 2, 'nearest']], #44
405
+ [ -1, Conv, [8, 2, 3, 1]] #45 Lane line segmentation output
406
+ ]"""
407
+
408
+
409
+ # The lane line and the driving area segment branches without share information with each other and without link
410
+ YOLOP = [
411
+ [24, 33, 42], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
412
+ [ -1, Focus, [3, 32, 3]], #0
413
+ [ -1, Conv, [32, 64, 3, 2]], #1
414
+ [ -1, BottleneckCSP, [64, 64, 1]], #2
415
+ [ -1, Conv, [64, 128, 3, 2]], #3
416
+ [ -1, BottleneckCSP, [128, 128, 3]], #4
417
+ [ -1, Conv, [128, 256, 3, 2]], #5
418
+ [ -1, BottleneckCSP, [256, 256, 3]], #6
419
+ [ -1, Conv, [256, 512, 3, 2]], #7
420
+ [ -1, SPP, [512, 512, [5, 9, 13]]], #8
421
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #9
422
+ [ -1, Conv,[512, 256, 1, 1]], #10
423
+ [ -1, Upsample, [None, 2, 'nearest']], #11
424
+ [ [-1, 6], Concat, [1]], #12
425
+ [ -1, BottleneckCSP, [512, 256, 1, False]], #13
426
+ [ -1, Conv, [256, 128, 1, 1]], #14
427
+ [ -1, Upsample, [None, 2, 'nearest']], #15
428
+ [ [-1,4], Concat, [1]], #16 #Encoder
429
+
430
+ [ -1, BottleneckCSP, [256, 128, 1, False]], #17
431
+ [ -1, Conv, [128, 128, 3, 2]], #18
432
+ [ [-1, 14], Concat, [1]], #19
433
+ [ -1, BottleneckCSP, [256, 256, 1, False]], #20
434
+ [ -1, Conv, [256, 256, 3, 2]], #21
435
+ [ [-1, 10], Concat, [1]], #22
436
+ [ -1, BottleneckCSP, [512, 512, 1, False]], #23
437
+ [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detection head 24
438
+
439
+ [ 16, Conv, [256, 128, 3, 1]], #25
440
+ [ -1, Upsample, [None, 2, 'nearest']], #26
441
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #27
442
+ [ -1, Conv, [64, 32, 3, 1]], #28
443
+ [ -1, Upsample, [None, 2, 'nearest']], #29
444
+ [ -1, Conv, [32, 16, 3, 1]], #30
445
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #31
446
+ [ -1, Upsample, [None, 2, 'nearest']], #32
447
+ [ -1, Conv, [8, 2, 3, 1]], #33 Driving area segmentation head
448
+
449
+ [ 16, Conv, [256, 128, 3, 1]], #34
450
+ [ -1, Upsample, [None, 2, 'nearest']], #35
451
+ [ -1, BottleneckCSP, [128, 64, 1, False]], #36
452
+ [ -1, Conv, [64, 32, 3, 1]], #37
453
+ [ -1, Upsample, [None, 2, 'nearest']], #38
454
+ [ -1, Conv, [32, 16, 3, 1]], #39
455
+ [ -1, BottleneckCSP, [16, 8, 1, False]], #40
456
+ [ -1, Upsample, [None, 2, 'nearest']], #41
457
+ [ -1, Conv, [8, 2, 3, 1]] #42 Lane line segmentation head
458
+ ]
459
+
460
+
461
+ class MCnet(nn.Module):
462
+ def __init__(self, block_cfg, **kwargs):
463
+ super(MCnet, self).__init__()
464
+ layers, save= [], []
465
+ self.nc = 1
466
+ self.detector_index = -1
467
+ self.det_out_idx = block_cfg[0][0]
468
+ self.seg_out_idx = block_cfg[0][1:]
469
+
470
+
471
+ # Build model
472
+ for i, (from_, block, args) in enumerate(block_cfg[1:]):
473
+ block = eval(block) if isinstance(block, str) else block # eval strings
474
+ if block is Detect:
475
+ self.detector_index = i
476
+ block_ = block(*args)
477
+ block_.index, block_.from_ = i, from_
478
+ layers.append(block_)
479
+ save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1) # append to savelist
480
+ assert self.detector_index == block_cfg[0][0]
481
+
482
+ self.model, self.save = nn.Sequential(*layers), sorted(save)
483
+ self.names = [str(i) for i in range(self.nc)]
484
+
485
+ # set stride、anchor for detector
486
+ Detector = self.model[self.detector_index] # detector
487
+ if isinstance(Detector, Detect):
488
+ s = 128 # 2x min stride
489
+ # for x in self.forward(torch.zeros(1, 3, s, s)):
490
+ # print (x.shape)
491
+ with torch.no_grad():
492
+ model_out = self.forward(torch.zeros(1, 3, s, s))
493
+ detects, _, _= model_out
494
+ Detector.stride = torch.tensor([s / x.shape[-2] for x in detects]) # forward
495
+ # print("stride"+str(Detector.stride ))
496
+ Detector.anchors /= Detector.stride.view(-1, 1, 1) # Set the anchors for the corresponding scale
497
+ check_anchor_order(Detector)
498
+ self.stride = Detector.stride
499
+ self._initialize_biases()
500
+
501
+ initialize_weights(self)
502
+
503
+ def forward(self, x):
504
+ cache = []
505
+ out = []
506
+ det_out = None
507
+ Da_fmap = []
508
+ LL_fmap = []
509
+ for i, block in enumerate(self.model):
510
+ if block.from_ != -1:
511
+ x = cache[block.from_] if isinstance(block.from_, int) else [x if j == -1 else cache[j] for j in block.from_] #calculate concat detect
512
+ x = block(x)
513
+ if i in self.seg_out_idx: #save driving area segment result
514
+ m=nn.Sigmoid()
515
+ out.append(m(x))
516
+ if i == self.detector_index:
517
+ det_out = x
518
+ cache.append(x if block.index in self.save else None)
519
+ out.insert(0,det_out)
520
+ return out
521
+
522
+
523
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
524
+ # https://arxiv.org/abs/1708.02002 section 3.3
525
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
526
+ # m = self.model[-1] # Detect() module
527
+ m = self.model[self.detector_index] # Detect() module
528
+ for mi, s in zip(m.m, m.stride): # from
529
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
530
+ b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
531
+ b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
532
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
533
+
534
+ def get_net(cfg, **kwargs):
535
+ m_block_cfg = YOLOP
536
+ model = MCnet(m_block_cfg, **kwargs)
537
+ return model
538
+
539
+
540
+ if __name__ == "__main__":
541
+ from torch.utils.tensorboard import SummaryWriter
542
+ model = get_net(False)
543
+ input_ = torch.randn((1, 3, 256, 256))
544
+ gt_ = torch.rand((1, 2, 256, 256))
545
+ metric = SegmentationMetric(2)
546
+ model_out,SAD_out = model(input_)
547
+ detects, dring_area_seg, lane_line_seg = model_out
548
+ Da_fmap, LL_fmap = SAD_out
549
+ for det in detects:
550
+ print(det.shape)
551
+ print(dring_area_seg.shape)
552
+ print(lane_line_seg.shape)