model = dict( type="DBNet", backbone=dict( type="CLIPResNet", depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=-1, norm_cfg=dict(type="BN", requires_grad=True), norm_eval=False, style="pytorch", dcn=dict(type="DCNv2", deform_groups=1, fallback_on_stride=False), # init_cfg=dict( # type='Pretrained', # checkpoint='https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth'), stage_with_dcn=(False, True, True, True), ), neck=dict( type="FPNC", in_channels=[256, 512, 1024, 2048], lateral_channels=256, asf_cfg=dict(attention_type="ScaleChannelSpatial"), ), det_head=dict( type="DBHead", in_channels=256, module_loss=dict(type="DBModuleLoss"), postprocessor=dict( type="DBPostprocessor", text_repr_type="quad", epsilon_ratio=0.002, ), ), data_preprocessor=dict( type="TextDetDataPreprocessor", mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_size_divisor=32, ), init_cfg=dict( type="Pretrained", checkpoint="https://download.openmmlab.com/mmocr/textdet/dbnetpp/" "dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/" "dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth", ), ) test_pipeline = [ # dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), dict(type="Resize", scale=(4068, 1024), keep_ratio=True), dict( type="PackTextDetInputs", # meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'), meta_keys=("img_shape", "scale_factor"), ), ] # Visualization vis_backends = [dict(type="LocalVisBackend")] visualizer = dict( type="TextDetLocalVisualizer", name="visualizer", vis_backends=vis_backends, )