git clone https://github.com/open-mmlab/mmaction2.git
cd mmaction
mmaction2
|- mmaction
|- tools
|- configs
|- data
| |- kinetics400
| | |- rawframes_train
| | |- rawframes_val
| | |- kinetics_train_list.txt
| | |- kinetics_val_list.txt
| |- ucf101
| | ...
| ...
# single-gpu test
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
# multi-gpu test
bash ./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [optional arguments]
checkpoints
디렉토리 생성 후, ckpt 파일 저장
파이썬 파일 생성
import torch
from mmdet.apis import init_recognizer, inference_recognizer
import mmcv
cfgs = 'configs/model1/cfgs1.py'
ckpt = 'checkpoints/ckpt.pth'
device = torch.device('cuda')
# config, ckpt를 활용해 모델 생성
model = init_recognizer(cfgs, ckpt, device=device)
# inference
video = '비디오 위치'
labels = 'label 위치'
results = inference_recognizer(model, video)
labels = open(labels).readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]
print(f'The top-5 labels with corresponding scores are: ')
for result in results:
print(f'{result[0]}: ', result[1])
mmaction/models/backbones/resnet_tsm.py
파일 생성
from ..builder import BACKBONES
from .resnet import ResNet
@BACKBONES.register_modules()
class ResNetTSM(ResNet):
def __init__(self, depth, num_segments=8):
pass
def forward(self, x):
pass
mmaction/models/backbones/__init__.py
에 아래 코드 추가
from .resnet_tsm import ResNetTSM
configuration file을 아래와 같이 수정
# before
backbone = dict(type='ResNetTSM', pretrained='torchvision://resnet50', depth=50, norm_eval=False)
# after
backbone = dict(type='ResNetTSM', pretrained='torchvision://resnet50', depth=50, norm_eval=False, shift_div=8)
forward_train()
forward_test()
distributed training과 non-distributed training 지원
- MMDistributedDataParallel
, MMDataParallel
# Single GPU
python tools/train.py ${CONFIG_FILE} [optional arguments]
# Multi GPU
bash ./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
configuration 수정
model = dict(
type='Recognizer2D',
backbone = dict(
type='ResNet',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False
),
cls_head = dict(
type='TSNHead',
num_classes=101,
in_channels=2048,
spatial_type='avg'
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.4,
init_std=0.01
),
train_cfg = None,
test_cfg = dict(average_clips=None)
)
데이터셋 수정
# dataset settings
dataset_type = 'RawframeDataset'
data_root = 'data/ucf101/rawframes_train/'
data_root_val = 'data/ucf101/rawframes_val/'
ann_file_train = 'data/ucf101/ucf101_train_list.txt'
ann_file_val = 'data/ucf101/ucf101_val_list.txt'
ann_file_test = 'data/ucf101/ucf101_val_list.txt'
Modify Training Schedule
fine tuning은 일반적으로 더 작은 학습률과 epochs 수 필요
# optimizer
optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
lr_config = dict(policy='step', step=[20,40])
total_epochs = 50
checkpoint_config = dict(interval=5)
pre-trained model 사용
# configuration 추가
load_from = '~~/*.pth'
가장 쉬운 방법은 이미 존재하는 데이터셋 형태로 변환하는 것
RawframeDataset
, VideoDataset
세 종류의 annotation 존재
RawframeDataset은 “frame directory”, “total_frame”, “label” 형태의 text 파일로 구성
some/directory-1 163 1
some/directory-2 122 1
some/directory-3 258 2
some/directory-4 234 2
some/directory-5 295 3
some/directory-6 121 3
VideoDataset은 “filepath”, “label” 형태의 text 파일로 구성
some/path/000.mp4 1
some/path/001.mp4 1
some/path/002.mp4 2
some/path/003.mp4 2
some/path/004.mp4 3
some/path/005.mp4 3
ActivityDataset은 json 형태로 표현
{
"video1": {
"duration_second": 211.53,
"duration_frame": 6337,
"annotations": [
{
...
}
],
...
},
"video2": ...
}
Custom Dataset 생성법
load_annotations(self)
, evaluate(self, results, metrics, logger)
, dump_results(self, results, out)
메소드를 overwrite위 과정 수행 후, configuration 수정
# dataset settings
dataset_type = 'RawframeDataset'
data_root = 'path/to/your/root'
data_root_val = 'path/to/your/root_val'
ann_file_train = 'data/custom/custom_train_list.txt'
ann_file_val = 'data/custom/custom_val_list.txt'
ann_file_test = 'data/custom/custom_val_list.txt'
data = dict(
videos_per_gpu=32,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=ann_file_train,
...),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
...),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
...))
예시
import copy
import os.path as osp
import mmcv
from .base import BaseDataset
from .builder import DATASETS
@DATASETS.register_module()
class MyDataset(BaseDataset):
def __init__(self,
ann_file,
pipeline,
data_prefix=None,
test_mode=False,
filename_tmpl='img_{:05}.jpg'):
super(MyDataset, self).__init__(ann_file, pipeline, test_mode)
self.filename_tmpl = filename_tmpl
def load_annotations(self):
video_infos = []
with open(self.ann_file, 'r') as fin:
for line in fin:
if line.startswith("directory"):
continue
frame_dir, total_frames, label = line.split(',')
if self.data_prefix is not None:
frame_dir = osp.join(self.data_prefix, frame_dir)
video_infos.append(
dict(
frame_dir=frame_dir,
total_frames=int(total_frames),
label=int(label)))
return video_infos
def prepare_train_frames(self, idx):
results = copy.deepcopy(self.video_infos[idx])
results['filename_tmpl'] = self.filename_tmpl
return self.pipeline(results)
def prepare_test_frames(self, idx):
results = copy.deepcopy(self.video_infos[idx])
results['filename_tmpl'] = self.filename_tmpl
return self.pipeline(results)
def evaluate(self,
results,
metrics='top_k_accuracy',
topk=(1, 5),
logger=None):
pass
dataset_A_train = dict(
type='MyDataset',
ann_file=ann_file_train,
pipeline=train_pipeline
)
모듈 생성
# mmaction/core/optimizer/my_optimizer.py
from mmcv.runner import OPTIMIZERS
from torch.optim import Optimizer
@OPTIMIZERS.register_module()
class MyOptimizer(Optimizer):
def __init__(self, a, b, c):
pass
__init__.py
수정
from .my_optimizer import MyOptimizer
configuration 수정
optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
새 모듈 작성 (mmaction/models/backbones/resnet.py
)
import torch.nn as nn
from ..builder import BACKBONES
@BACKBONES.register_module()
class ResNet(nn.Module):
def __init__(self, arg1, arg2):
pass
def forward(self, x):
pass
def init_weights(self, pretrained=None):
pass
__init__.py
수정
from .resnet import ResNet
configuration 수정
model = dict(
...
backbone=dict(
type='ResNet',
arg1=xxx,
arg2=xxx),
...
)
새 모듈 작성 (mmaction/models/heads/tsn_head.py
)
import .base as BaseHead
from ..builder import HEADS
@HEADS.register_module()
class TSNHead(BaseHead):
def __init__(self, arg1, arg2):
pass
def forward(self, x):
pass
def init_weights(self, pretrained=None):
pass
__init__.py
수정
from .tsn_head import TSNHead
configuration 수정
model = dict(
...
cls_head=dict(
type='TSNHead',
num_classes=400,
in_channels=2048,
arg1=xxx,
arg2=xxx),
...
)
새 모듈 작성 (mmaction/models/backbones/my_loss.py
)
import torch
import torch.nn as nn
from ..builder import LOSSES
def my_loss(pred, target):
assert pred.size() == target.size() and target.numel() > 0
loss = torch.abs(pred-target)
return loss
@LOSSES.register_module()
class MyLoss(nn.Module):
def forward(self, pred, target):
loss = my_loss(pred, target)
return loss
__init__.py
수정
from .my_loss import MyLoss, my_loss
configuration 수정
loss_bbox=dict(type='MyLoss')
# configuration 수정
lr_config = dict(policy='step', step=[20, 40])
train.py
에서 다음과 같이 학습 진행 runner.register_training_hooks(
cfg.lr_config,
optimizer_config,
cfg.checkpoint_config,
cfg.log_config,
cfg.get('momentum_config', None)
)
새 모듈 작성 (mmaction/core/scheduler.py
)
@HOOKS.register_module()
class RelativeStepLRUpdateHook(LRUpdaterHook):
def __init__(self, steps, lrs, **kwargs):
super().__init__(**kwargs)
assert len(steps) == (len(lrs))
self.steps = steps
self.lrs = lrs
def get_lr(self, runner, base_lr):
progress = runner.epoch if self.by_epoch else runner.iter
for i in range(len(self.steps)):
if progress < self.steps[i]:
return self.lrs[i]
configuration 수정
lr_config=dict(policy='RelativeStep', steps=[20, 40, 60], lrs=[0.1, 0.01, 0.001])
pip install onnx onnxruntime
python tools/deployment/pytorch2onnx.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --shape $SHAPE --verify
python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --is-localizer --shape $SHAPE --verify
https://mmaction2.readthedocs.io/en/latest/getting_started.html
https://mmaction2.readthedocs.io/en/latest/tutorials/2_finetune.html
https://mmaction2.readthedocs.io/en/latest/tutorials/3_new_dataset.html
https://mmaction2.readthedocs.io/en/latest/tutorials/5_new_modules.html
https://mmaction2.readthedocs.io/en/latest/tutorials/6_export_model.html