kaggle cowboy
8th:[Training]MMDetection+CascadeRCNN+Weight&Bias
Cowboy outfits detection MMDetection+CascadeRCNN+Weight&Bias
References:
- Some amazing notebooks on MMdetection from Sreevishnu Damodaran
- https://www.kaggle.com/c/cowboyoutfits/discussion/254656
- https://www.kaggle.com/c/cowboyoutfits/discussion/254354
Preparation of MMDetection Config
sixxtools/split_dataset_sixx copy.py
import sys sys.path.insert(0, "./mmdetection") #os.chdir('/home/oschung_skcc/my/git/mmdetection') #data_path = os.path.join(os.getcwd(), 'data', 'cowboy') #!mkdir new_anno # Imports import os import numpy as np from pathlib import Path import copy import torch import mmdet from mmdet.apis import set_random_seed from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.apis import train_detector import json from pycocotools.coco import COCO import random seed = 1234 """Sets the random seeds.""" set_random_seed(seed, deterministic=False) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False os.environ['PYTHONHASHSEED'] = str(seed) random.seed(seed) def create_subset(trn_coco, cats, test_n=180): new_coco = {} new_coco['info'] = [{ "description": "CowboySuit", "url": "http://github.com/dmlc/gluon-cv", "version": "1.0", "year": 2021, "contributor": "GluonCV/AutoGluon", "date_created":"2021/07/01" }] new_coco["licenses"]: [{ "url": "http://creativecommons.org/licenses/by/2.0/","id": 4,"name": "Attribution License" }] cat_ids = trn_coco.getCatIds(cats) train_img_ids = set() test_img_ids = set() for cat in cat_ids[::-1]: img_ids = copy.copy(trn_coco.getImgIds(catIds=[cat])) random.shuffle(img_ids) tn = min(test_n, int(len(img_ids) * 0.5)) new_test = set(img_ids[:tn]) exist_test_ids = new_test.intersection(train_img_ids) test_ids = new_test.difference(exist_test_ids) train_ids = set(img_ids).difference(test_ids) print(f'{cat} all: {len(img_ids)}, tn: {tn}, new_test: {len(new_test)}, ==> train_ids: {len(train_ids)}, test_ids: {len(test_ids)} ') train_img_ids.update(train_ids) test_img_ids.update(test_ids) # print(len(test_img_ids)) # prune duplicates dup = train_img_ids.intersection(test_img_ids) train_img_ids = train_img_ids - dup train_anno_ids = set() test_anno_ids = set() for cat in cat_ids: train_anno_ids.update(trn_coco.getAnnIds(imgIds=list(train_img_ids), catIds=[cat])) test_anno_ids.update(trn_coco.getAnnIds(imgIds=list(test_img_ids), catIds=[cat])) assert len(train_img_ids.intersection(test_img_ids)) == 0, 'img id conflicts, {} '.format(train_img_ids.intersection(test_img_ids)) assert len(train_anno_ids.intersection(test_anno_ids)) == 0, 'anno id conflicts' print('train img ids #:', len(train_img_ids), 'train anno #:', len(train_anno_ids)) print('valid img ids #:', len(test_img_ids), 'test anno #:', len(test_anno_ids)) new_coco_test = copy.deepcopy(new_coco) new_coco["images"] = trn_coco.loadImgs(list(train_img_ids)) new_coco["annotations"] = trn_coco.loadAnns(list(train_anno_ids)) for ann in new_coco["annotations"]: ann.pop('segmentation', None) new_coco["categories"] = trn_coco.loadCats(cat_ids) new_coco_test["images"] = trn_coco.loadImgs(list(test_img_ids)) new_coco_test["annotations"] = trn_coco.loadAnns(list(test_anno_ids)) for ann in new_coco_test["annotations"]: ann.pop('segmentation', None) new_coco_test["categories"] = trn_coco.loadCats(cat_ids) print('new train split, images:', len(new_coco["images"]), 'annos:', len(new_coco["annotations"])) print('new valid split, images:', len(new_coco_test["images"]), 'annos:', len(new_coco_test["annotations"])) return new_coco, new_coco_test coco = COCO('data/cowboy/train.json') nc, nc_test = create_subset(coco, ['belt', 'sunglasses', 'boot', 'cowboy_hat', 'jacket', ]) with open('data/cowboy/new_anno/new_train.json', 'w') as f: json.dump(nc, f) with open('data/cowboy/new_anno/new_valid.json', 'w') as f: json.dump(nc_test, f)
$ python sixxtools/makeConfig_sixx.py -i 'configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' -o 'sixxconfigs/cascade_rcnn_r50_fpn_1x_coco.py'
다운로드 checkpoint
General Training Settings
model = dict( roi_head=dict( bbox_head=[ dict( num_classes=5, dict( num_classes=5, dict( num_classes=5, # Epochs for the runner that runs the workflow runner = dict(max_epochs=12) # Learning rate of optimizers. The LR is divided by 8 since the config file is originally for 8 GPUs optimizer = dict( lr=0.02/8) ## Learning rate scheduler config used to register LrUpdater hook lr_config = dict( #policy='step', #step=[8, 11] policy='CosineAnnealing', # The policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9. by_epoch=False, min_lr=1e-07, warmup='linear', # The warmup policy, also support `exp` and `constant`. warmup_iters=500, # The number of iterations for warmup warmup_ratio=0.001, # The ratio of the starting learning rate used for warmup ) # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation. checkpoint_config = dict(interval=1) # The save interval is 1 load_from = 'checkpoints/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth' workflow = [('train', 1), ('val',1)]
Configure Datasets for Training and Evaluation
dataset_type = 'CocoDataset' classes = ("belt","sunglasses","boot","cowboy_hat","jacket") data_images = 'data/cowboy/images' data = dict( samples_per_gpu=4, # Batch size of a single GPU used in testing workers_per_gpu=2, # Worker to pre-fetch data for each single GPU train=dict( type='CocoDataset', ann_file = 'data/cowboy/new_anno/new_train.json', img_prefix = data_images, classes = classes, val=dict( type='CocoDataset', ann_file = 'data/cowboy/new_anno/new_valid.json', img_prefix = data_images, classes = classes, test=dict( type='CocoDataset', ann_file = 'data/cowboy/new_anno/new_valid.json', img_prefix = data_images, classes = classes,
Setting Metric for Evaluation
# The config to build the evaluation hook, refer to https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/evaluation/eval_hooks.py#L7 for more details. # Set the epoch intervel to perform evaluation # Metrics used during evaluation evaluation = dict(interval=1, metric='bbox', save_best='bbox_mAP')
Prepare the Pre-processing & Augmentation Pipelines
또 다른 데이터 Augmentation 전략을 선택할수 있다.
albu_train_transforms = [ dict(type='ShiftScaleRotate', shift_limit=0.0625, scale_limit=0.15, rotate_limit=15, p=0.4), dict(type='RandomBrightnessContrast', brightness_limit=0.2, contrast_limit=0.2, p=0.5), dict(type='IAAAffine', shear=(-10.0, 10.0), p=0.4), dict(type="Blur", p=1.0, blur_limit=7), dict(type='CLAHE', p=0.5), dict(type='Equalize', mode='cv', p=0.4), dict( type="OneOf", transforms=[ dict(type="GaussianBlur", p=1.0, blur_limit=7), dict(type="MedianBlur", p=1.0, blur_limit=7), ], p=0.4, ), ] # train_pipeline = [ # dict(type='LoadImageFromFile'), # dict(type='LoadAnnotations', with_bbox=True, with_mask=True), # dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), # dict(type='RandomFlip', flip_ratio=0.5), dict( type='Albu', transforms=albu_train_transforms, bbox_params=dict( \ttype='BboxParams', \tformat='pascal_voc', label_fields=['gt_labels'], min_visibility=0.0, filter_lost_elements=True), keymap=dict(img='image', gt_bboxes='bboxes'), update_pad_shape=False, skip_img_without_anno=True), # dict( # type='Normalize', # mean=[123.675, 116.28, 103.53], # std=[58.395, 57.12, 57.375], # to_rgb=True), # dict(type='Pad', size_divisor=32), # dict(type='DefaultFormatBundle'), # dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) # ]
WandB(Weights & Biases) Integration
Experiment Tracking 과 Logging을 위해 WandB와 연결함.
# Change the wandb username and project name below wnb_username = 'onesixx' wnb_project_name = 'kaggle_cowboy_outfits' model_name = 'cascade_rcnn_r50_fpn_1x' job = 1 # 2,3..... log_config = dict( interval=40, #50, # Interval to print the log hooks=[ dict(type='TextLoggerHook', interval=1), dict(type='MMDetWandbHook', interval=10, init_kwargs=dict( project= wnb_project_name, name= f'exp-{model_name}-job{job}', entity = wnb_username ), log_checkpoint=True, log_checkpoint_metadata=True, num_eval_images=100, bbox_score_thr=0.6 ) ] )
Training and Evaluation
Experiment Tracking and Logging with Weights & Biases
Inference
Inference and Visualize Output