*메소드
#_crop를 짤린 부분의 box짜르기
def _crop(img, labels, max_loop=250):
shape = tf.shape(img)
def matrix_iof(a, b):
lt = tf.math.maximum(a[:, tf.newaxis, :2], b[:, :2])
rb = tf.math.minimum(a[:, tf.newaxis, 2:], b[:, 2:])
area_i = tf.math.reduce_prod(rb - lt, axis=2) * \
tf.cast(tf.reduce_all(lt < rb, axis=2), tf.float32)
area_a = tf.math.reduce_prod(a[:, 2:] - a[:, :2], axis=1)
return area_i / tf.math.maximum(area_a[:, tf.newaxis], 1)
def crop_loop_body(i, img, labels):
valid_crop = tf.constant(1, tf.int32)
pre_scale = tf.constant([0.3, 0.45, 0.6, 0.8, 1.0], dtype=tf.float32)
scale = pre_scale[tf.random.uniform([], 0, 5, dtype=tf.int32)]
short_side = tf.cast(tf.minimum(shape[0], shape[1]), tf.float32)
h = w = tf.cast(scale * short_side, tf.int32)
h_offset = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
w_offset = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
roi = tf.stack([w_offset, h_offset, w_offset + w, h_offset + h])
roi = tf.cast(roi, tf.float32)
value = matrix_iof(labels[:, :4], roi[tf.newaxis])
valid_crop = tf.cond(tf.math.reduce_any(value >= 1),
lambda: valid_crop, lambda: 0)
centers = (labels[:, :2] + labels[:, 2:4]) / 2
mask_a = tf.reduce_all(
tf.math.logical_and(roi[:2] < centers, centers < roi[2:]),
axis=1)
labels_t = tf.boolean_mask(labels, mask_a)
valid_crop = tf.cond(tf.reduce_any(mask_a),
lambda: valid_crop, lambda: 0)
img_t = img[h_offset:h_offset + h, w_offset:w_offset + w, :]
h_offset = tf.cast(h_offset, tf.float32)
w_offset = tf.cast(w_offset, tf.float32)
labels_t = tf.stack(
[labels_t[:, 0] - w_offset, labels_t[:, 1] - h_offset,
labels_t[:, 2] - w_offset, labels_t[:, 3] - h_offset,
labels_t[:, 4]], axis=1)
return tf.cond(valid_crop == 1,
lambda: (max_loop, img_t, labels_t),
lambda: (i + 1, img, labels))
_, img, labels = tf.while_loop(
lambda i, img, labels: tf.less(i, max_loop),
crop_loop_body,
[tf.constant(-1), img, labels],
shape_invariants=[tf.TensorShape([]),
tf.TensorShape([None, None, 3]),
tf.TensorShape([None, 5])])
return img, labels
print('슝=3')
#_pad_to_square: 이미지가 정사각형이 되도록
# 이미지 오른쪽이나 아래 방향으로 평균 색상 영역 추가
# 여기서는 변화가 없기에 box정보도 변화없다
def _pad_to_square(img):
height = tf.shape(img)[0]
width = tf.shape(img)[1]
def pad_h():
img_pad_h = tf.ones([width - height, width, 3]) * tf.reduce_mean(img, axis=[0, 1], keepdims=True)
return tf.concat([img, img_pad_h], axis=0)
def pad_w():
img_pad_w = tf.ones([height, height - width, 3]) * tf.reduce_mean(img, axis=[0, 1], keepdims=True)
return tf.concat([img, img_pad_w], axis=1)
img = tf.case([(tf.greater(height, width), pad_w),
(tf.less(height, width), pad_h)], default=lambda: img)
return img
print('슝=3')
#_resize도 box에 영향을 끼침
def _resize(img, labels):
w_f = tf.cast(tf.shape(img)[1], tf.float32)
h_f = tf.cast(tf.shape(img)[0], tf.float32)
locs = tf.stack([labels[:, 0] / w_f, labels[:, 1] / h_f,
labels[:, 2] / w_f, labels[:, 3] / h_f] ,axis=1)
locs = tf.clip_by_value(locs, 0, 1.0)
labels = tf.concat([locs, labels[:, 4][:, tf.newaxis]], axis=1)
resize_case = tf.random.uniform([], 0, 5, dtype=tf.int32)
def resize(method):
def _resize():
# size h,w
return tf.image.resize(img, [IMAGE_HEIGHT, IMAGE_WIDTH], method=method, antialias=True)
return _resize
img = tf.case([(tf.equal(resize_case, 0), resize('bicubic')),
(tf.equal(resize_case, 1), resize('area')),
(tf.equal(resize_case, 2), resize('nearest')),
(tf.equal(resize_case, 3), resize('lanczos3'))],
default=resize('bilinear'))
return img, labels
print('슝=3')
#_flip도 box에 영향끼침
def _flip(img, labels):
flip_case = tf.random.uniform([], 0, 2, dtype=tf.int32)
def flip_func():
flip_img = tf.image.flip_left_right(img)
flip_labels = tf.stack([1 - labels[:, 2], labels[:, 1],
1 - labels[:, 0], labels[:, 3],
labels[:, 4]], axis=1)
return flip_img, flip_labels
img, labels = tf.case([(tf.equal(flip_case, 0), flip_func)],default=lambda: (img, labels))
return img, labels
print('슝=3')
def _distort(img):
img = tf.image.random_brightness(img, 0.4)
img = tf.image.random_contrast(img, 0.5, 1.5)
img = tf.image.random_saturation(img, 0.5, 1.5)
img = tf.image.random_hue(img, 0.1)
return img
print('슝=3')
Prior box 적용
SSD모델에 사용
데이터셋에 반영되어야한다.
아래의 메소드: prior box와 bounding box사이의 IoU
자카드 유사도(jaccard index):유사도 측정
def _jaccard(box_a, box_b):
inter = _intersect(box_a, box_b)
area_a = tf.broadcast_to(
tf.expand_dims(
(box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]), 1),
tf.shape(inter)) # [A,B]
area_b = tf.broadcast_to(
tf.expand_dims(
(box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]), 0),
tf.shape(inter)) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
print('슝=3')
자카드 유사도 계산 메소드
encode_tf: TFRecord의 데이터셋의 라벨 가공
jaccard 메소드: label의 ground truth bbox와 가장 overlap비율이 높은 matched prior
_encode_bbox 메소드: bbox의 xcale동일하게 보정
groud truth bbox 존재 여부를 concat하여 새로운 label로 업데이트
def _encode_bbox(matched, priors, variances=[0.1, 0.2]):
g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
g_cxcy /= (variances[0] * priors[:, 2:])
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = tf.math.log(g_wh) / variances[1]
return tf.concat([g_cxcy, g_wh], 1)
print('슝=3')
def encode_tf(labels, priors):
match_threshold = 0.45
priors = tf.cast(priors, tf.float32)
bbox = labels[:, :4]
conf = labels[:, -1]
# jaccard index
overlaps = _jaccard(bbox, priors)
best_prior_overlap = tf.reduce_max(overlaps, 1)
best_prior_idx = tf.argmax(overlaps, 1, tf.int32)
best_truth_overlap = tf.reduce_max(overlaps, 0)
best_truth_idx = tf.argmax(overlaps, 0, tf.int32)
best_truth_overlap = tf.tensor_scatter_nd_update(
best_truth_overlap, tf.expand_dims(best_prior_idx, 1),
tf.ones_like(best_prior_idx, tf.float32) * 2.)
best_truth_idx = tf.tensor_scatter_nd_update(
best_truth_idx, tf.expand_dims(best_prior_idx, 1),
tf.range(tf.size(best_prior_idx), dtype=tf.int32))
# Scale Ground-Truth Boxes
matches_bbox = tf.gather(bbox, best_truth_idx) # [num_priors, 4]
loc_t = _encode_bbox(matches_bbox, priors)
conf_t = tf.gather(conf, best_truth_idx) # [num_priors]
conf_t = tf.where(tf.less(best_truth_overlap, match_threshold), tf.zeros_like(conf_t), conf_t)
return tf.concat([loc_t, conf_t[..., tf.newaxis]], axis=1)
print('슝=3')
load_dataset
tfrecord데이터셋에 적용하여 SSD학습을 위한 데이터셋 생성하는 최종 load_dataset구현한다.
def _transform_data(train, priors):
def transform_data(img, labels):
img = tf.cast(img, tf.float32)
if train:
img, labels = _crop(img, labels)
img = _pad_to_square(img)
img, labels = _resize(img, labels)
if train:
img, labels = _flip(img, labels)
if train:
img = _distort(img)
labels = encode_tf(labels=labels, priors=priors)
img = img/255.0
return img, labels
return transform_data
print('슝=3')
def _parse_tfrecord(train, priors):
def parse_tfrecord(tfrecord):
features = {
'filename': tf.io.FixedLenFeature([], tf.string),
'height': tf.io.FixedLenFeature([], tf.int64),
'width': tf.io.FixedLenFeature([], tf.int64),
'classes': tf.io.VarLenFeature(tf.int64),
'x_mins': tf.io.VarLenFeature(tf.float32),
'y_mins': tf.io.VarLenFeature(tf.float32),
'x_maxes': tf.io.VarLenFeature(tf.float32),
'y_maxes': tf.io.VarLenFeature(tf.float32),
'difficult':tf.io.VarLenFeature(tf.int64),
'image_raw': tf.io.FixedLenFeature([], tf.string),
}
parsed_example = tf.io.parse_single_example(tfrecord, features)
img = tf.image.decode_jpeg(parsed_example['image_raw'], channels=3)
width = tf.cast(parsed_example['width'], tf.float32)
height = tf.cast(parsed_example['height'], tf.float32)
labels = tf.sparse.to_dense(parsed_example['classes'])
labels = tf.cast(labels, tf.float32)
labels = tf.stack(
[tf.sparse.to_dense(parsed_example['x_mins']),
tf.sparse.to_dense(parsed_example['y_mins']),
tf.sparse.to_dense(parsed_example['x_maxes']),
tf.sparse.to_dense(parsed_example['y_maxes']),labels], axis=1)
img, labels = _transform_data(train, priors)(img, labels)
return img, labels
return parse_tfrecord
print('슝=3')
def load_tfrecord_dataset(tfrecord_name, train=True, priors=None, buffer_size=1024):
raw_dataset = tf.data.TFRecordDataset(tfrecord_name)
raw_dataset = raw_dataset.cache()
if train:
raw_dataset = raw_dataset.repeat()
raw_dataset = raw_dataset.shuffle(buffer_size=buffer_size)
dataset = raw_dataset.map(_parse_tfrecord(train, priors), num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
print('슝=3')
def load_dataset(priors, train=True, buffer_size=1024):
if train:
dataset = load_tfrecord_dataset(
tfrecord_name=TRAIN_TFRECORD_PATH,
train=train,
priors=priors,
buffer_size=buffer_size)
else:
dataset = load_tfrecord_dataset(
tfrecord_name=VALID_TFRECORD_PATH,
train=train,
priors=priors,
buffer_size=buffer_size)
return dataset
print('슝=3')