DDRNet custom dataset학습 시도.

김찬우·2025년 9월 3일

https://github.com/ydhongHIT/DDRNet
DDRNet연동 깃허브

https://github.com/chenjun2hao/DDRNet.pytorch
해당 깃허브의 코드로 custom dataset 학습 시도

ETRI dataset에서 다운받은 format은 images -> jpg와 labels -> txt형태로 구성.

해당 깃허브에서는 labels의 형태를 mask를 받아 학습하는 형태로 구성.
(data loader에 따라서 mask를 그대로 사용하기도 하고 txt형태의 데이터를 받기도 한다)

label을 불러오는데에는 lst파일을 이용하여 경로를 받는다.

1. txt를 png형태로 데이터셋을 변형

2. 해당 경로에 따른 lst파일 생성

3. 파일 변환이 정상적으로 되었는지 테스트.

txt2png 코드

import os
import cv2
import numpy as np

#IMAGE_DIR = r"C:\DDRNet\data\ETRI\train\images"
#IMAGE_DIR = r"C:\DDRNet\data\ETRI\val\images"
IMAGE_DIR = r"C:\DDRNet\data\ETRI\test\images"

#TXT_LABEL_DIR = r"C:\DDRNet\data\ETRI\train\labels"
#TXT_LABEL_DIR = r"C:\DDRNet\data\ETRI\val\labels"
TXT_LABEL_DIR = r"C:\DDRNet\data\ETRI\test\labels"

#OUTPUT_PNG_DIR = r"C:\DDRNet\data\ETRI\train\masks_png"
#OUTPUT_PNG_DIR = r"C:\DDRNet\data\ETRI\val\masks_png"
OUTPUT_PNG_DIR = r"C:\DDRNet\data\ETRI\test\masks_png"

CLASS_TO_ID = {str(i): i for i in range(42)}

def convert_txt_to_png():
    os.makedirs(OUTPUT_PNG_DIR, exist_ok=True)
    
    txt_files = [f for f in os.listdir(TXT_LABEL_DIR) if f.endswith('.txt')]
    print(f"{len(txt_files)}개의 .txt 파일을 변환.")

    processed_count = 0
    for txt_filename in txt_files:
        base_filename = os.path.splitext(txt_filename)[0]
        
        img_path = None
        for ext in ['.jpg', '.jpeg', '.png']:
            potential_path = os.path.join(IMAGE_DIR, base_filename + ext)
            if os.path.exists(potential_path):
                img_path = potential_path
                break
        
        if not img_path:
            print(f"원본 이미지가 존재하지 않음.")
            continue
            
        img = cv2.imread(img_path)
        if img is None:
            print(f"경로 오류")
            continue
        height, width, _ = img.shape
        
        mask = np.zeros((height, width), dtype=np.uint8)
        
        txt_path = os.path.join(TXT_LABEL_DIR, txt_filename)
        with open(txt_path, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) < 3:
                    continue
                
                class_id_str = parts[0]
                
                if class_id_str not in CLASS_TO_ID:
                    continue
                mask_value = CLASS_TO_ID[class_id_str]
                
                if len(parts[1:]) % 2 != 0:
                    continue

                try:
                    normalized_coords = np.array(parts[1:], dtype=np.float32).reshape((-1, 2))

                    pixel_coords = (normalized_coords * np.array([width, height])).astype(np.int32)
                
                except ValueError:
                    continue
              
                cv2.fillPoly(mask, [pixel_coords], color=mask_value)
    
        output_path = os.path.join(OUTPUT_PNG_DIR, base_filename + '.png')
        cv2.imwrite(output_path, mask)
        processed_count += 1
        if processed_count % 100 == 0:
            print(f"{processed_count}/{len(txt_files)} 파일 처리 완료...")

    print(f"변환 완료")


if __name__ == '__main__':
    convert_txt_to_png()

lst 파일 생성

import os

def create_lst_files(base_path):
    splits = ['train', 'val', 'test']

    for split in splits:
        # 예: C:\DDRNet\data\ETRI\train\images
        image_dir = os.path.join(base_path, split, 'images')

        if not os.path.isdir(image_dir):
            print(f"폴더의 위치 찾을 수 없음.")
            continue

        lst_content = []
        
        image_files = os.listdir(image_dir)
        
        jpg_files = sorted([f for f in image_files if f.lower().endswith('.jpg')])

        for image_file in jpg_files:
            base_name = os.path.splitext(image_file)[0]
            
            image_path_relative = f"{split}/images/{base_name}.jpg"
            label_path_relative = f"{split}/labels/{base_name}.txt"
            
            line = f"{image_path_relative} {label_path_relative}"
            lst_content.append(line)

        if lst_content:
            lst_file_path = os.path.join(base_path, f"{split}.lst")
            with open(lst_file_path, 'w') as f:
                f.write('\n'.join(lst_content))
            print(f"✅ '{lst_file_path}' (총 {len(lst_content)} 줄)")

dataset_base_path = r'C:\DDRNet\data\ETRI'
create_lst_files(dataset_base_path)

파일 변환이 정상적으로 되었는지 테스트.

import os
import cv2
import numpy as np
from PIL import Image

PNG_MASK_DIR = r"C:\DDRNet\data\ETRI\train\masks_png" 
OUTPUT_VIS_DIR = r"C:\DDRNet\data\ETRI\train\masks_visualized"

# dataset이 총 42종이므로 클래스를 41까지 색상을 분리해서 확인
COLOR_PALETTE = [
    (0, 0, 0),       # 0: 배경 (Black)
    (128, 0, 0),     # 1: Dark Red
    (0, 128, 0),     # 2: Dark Green
    (128, 128, 0),   # 3: Dark Yellow
    (0, 0, 128),     # 4: Dark Blue
    (128, 0, 128),   # 5: Dark Magenta
    (0, 128, 128),   # 6: Dark Cyan
    (128, 128, 128), # 7: Gray
    (64, 0, 0),      # 8:
    (192, 0, 0),     # 9:
    (64, 128, 0),    # 10:
    (192, 128, 0),   # 11:
    (64, 0, 128),    # 12:
    (192, 0, 128),   # 13:
    (64, 128, 128),  # 14:
    (192, 128, 128), # 15:
    (0, 64, 0),      # 16:
    (128, 64, 0),    # 17:
    (0, 192, 0),     # 18:
    (128, 192, 0),   # 19:
    (0, 64, 128),    # 20:
    (128, 64, 128),  # 21:
    (0, 192, 128),   # 22:
    (128, 192, 128), # 23:
    (64, 64, 0),     # 24:
    (192, 64, 0),    # 25:
    (64, 192, 0),    # 26:
    (192, 192, 0),   # 27:
    (64, 64, 128),   # 28:
    (192, 64, 128),  # 29:
    (64, 192, 128),  # 30:
    (192, 192, 128), # 31:
    (0, 0, 64),      # 32:
    (128, 0, 64),    # 33:
    (0, 128, 64),    # 34:
    (128, 128, 64),  # 35:
    (0, 0, 192),     # 36:
    (128, 0, 192),   # 37:
    (0, 128, 192),   # 38:
    (128, 128, 192), # 39:
    (64, 0, 64),     # 40:
    (192, 0, 64),    # 41:
]


def visualize_masks():
    os.makedirs(OUTPUT_VIS_DIR, exist_ok=True)
    
    png_files = [f for f in os.listdir(PNG_MASK_DIR) if f.endswith('.png')]
    print(f"총 {len(png_files)}개의 .png 마스크를 시각화합니다.")

    processed_count = 0
    for png_filename in png_files:
        png_path = os.path.join(PNG_MASK_DIR, png_filename)
        
        mask = cv2.imread(png_path, cv2.IMREAD_UNCHANGED)
        
        if mask is None:
            print(f"경고: 마스크 파일 {png_path}를 읽을 수 없습니다. 건너뜁니다.")
            continue
            
        height, width = mask.shape
        colored_mask = np.zeros((height, width, 3), dtype=np.uint8)
        
        # 각 픽셀의 클래스 ID에 따라 색상 적용
        for class_id in range(len(COLOR_PALETTE)):
            indices = (mask == class_id)
            
            colored_mask[indices, 0] = COLOR_PALETTE[class_id][0] # Blue 채널
            colored_mask[indices, 1] = COLOR_PALETTE[class_id][1] # Green 채널
            colored_mask[indices, 2] = COLOR_PALETTE[class_id][2] # Red 채널
            
        output_path = os.path.join(OUTPUT_VIS_DIR, png_filename)
        cv2.imshow("visualize", colored_mask)
        cv2.waitKey()
        #cv2.imwrite(output_path, colored_mask)
        
        processed_count += 1
        if processed_count % 100 == 0:
            print(f"{processed_count}/{len(png_files)} 파일 시각화 완료...")

if __name__ == '__main__':
    visualize_masks()

작동 영상

데이터셋 조정을 마치고 학습을 진행한 결과.

명령어
python tools/train.py --cfg experiments/cityscapes/ddrnet_39.yaml
prompt 내용

loss값이 0으로 고정되고 Acc값도 소수점의 자리에 위치하는 것을 확인.

dataset에 255와 같은 mask 값이 존재하는지 데이터 결함 테스트도 진행했으나 dataset 자체에는 문제가 없다는 것을 확인.
numpy 버전에 대해서 문제가 있는것으로 추정되나 현재 GPU와 알맞은 pytorch 버전에 대해서 환경을 맞출 수 없기에 다른 github code를 사용하는 방법으로 회선하여 학습을 시도해보는 것을 목표로 진행하기로 함.

학습 관련 기능이 들어있는 github
DDRNet.pytorch, deci.ai 시도
Segmentation-Pytorch 학습 시도 중

Cityscapes dataset이 아닌 custom dataset을 학습하기에 data loader와 pkl등의 코드 생성중

김찬우

이전 포스트

DDRNet(Deep Dual-resolution Networks)

다음 포스트

DDRNet custom dataset학습 시도.

DDRNet(Deep Dual-resolution Networks)

DDRNet 학습.

0개의 댓글