-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathpse_r50_icdar15.yaml
139 lines (127 loc) · 3.25 KB
/
pse_r50_icdar15.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42
val_while_train: True
drop_overflow_update: False
model:
type: det
transform: null
backbone:
name: det_resnet50
pretrained: True
neck:
name: PSEFPN
out_channels: 256
head:
name: PSEHead
hidden_size: 256
out_channels: 7
postprocess:
name: PSEPostprocess
binary_thresh: 0
box_thresh: 0.85
min_area: 16
box_type: 'quad'
scale: 1
metric:
name: DetMetric
main_indicator: f-score
loss:
name: PSEDiceLoss
ohem_ratio: 4
scheduler:
scheduler: step_decay
lr: 0.0001
decay_rate: 0.9
num_epochs: 1000
optimizer:
opt: adam
filter_bias_and_bn: False
weight_decay: 0.0005
# only used for mixed precision training
loss_scaler:
type: dynamic
loss_scale: 512
scale_factor: 2
scale_window: 1000
train: # ema: True
ema: True
ckpt_save_dir: './tmp_det'
dataset_sink_mode: True
dataset:
type: DetDataset
dataset_root: ../ocr_datasets
data_dir: ic15/ch4_training_images
label_file: ic15/train_det_gt.txt
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- DetLabelEncode:
- RandomColorAdjust:
brightness: 0.1255 # 32.0 / 255
saturation: 0.5
- RandomScale:
scale_range: [ 0.1, 2.0 ] # [0.2, 3.0]
p: 1.0
- RandomRotate:
degrees: [ -10, 10 ] # [-12, 12]
expand_canvas: False
p: 1.0
- PSEGtDecode:
kernel_num: 7
min_shrink_ratio: 0.4
min_shortest_edge: 640
- RandomCropWithMask:
size: [ 640,640 ] # target crop size
main_key: gt_text
crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
output_columns: [ 'image', 'gt_text','gt_kernels','mask' ] #'img_path']
net_input_column_index: [ 0 ] # input indices for network forward func in output_columns
loader:
shuffle: True
batch_size: 8
drop_remainder: True
num_workers: 2
eval:
ckpt_load_path: 'tmp_det/best.ckpt'
dataset_sink_mode: False
dataset:
type: DetDataset
dataset_root: ../ocr_datasets
data_dir: ic15/ch4_test_images
label_file: ic15/test_det_gt.txt
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- DetLabelEncode:
- DetResize:
limit_side_len: 736 # 736
limit_type: min
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the labels for evaluation
output_columns: [ 'image', 'polys', 'ignore_tags', 'shape_list']
net_input_column_index: [ 0 ]
label_column_index: [ 1, 2] # input indices marked as label
loader:
shuffle: False
batch_size: 1
drop_remainder: False
num_workers: 2