OpenCV 해보기 3

세바님·2024년 4월 1일

opencv

선형 회귀

먼저 회귀란, 한 개 이상의 변수를 다룰 때 특정 변수와 다른 변수와의 관계를 분석하고 이를 토대로 모델을 정의해 값을 예측하는 방법이다.

이중 선형 회귀는 통계학에서 가장 기본적인 회귀 분석 기법으로, 데이터를 가장 잘 대변하는 최적의 선을 찾는 과정이다.

선형 회귀의 목적은 두 가지 이다.

데이터 예측: 새로운 데이터 포인트의 종속 변수 값을 예측한다.
변수 간 관계 이해: 독립 변수가 종속 변수에 어떤 영향을 미치는지 이해한다.

CNN

기존의 영상 인식은 학습한 내용만을 가지고 수행되었다. 다시 말해 자신이 학습한 것과 조금만 달라져도 인식을 할 수 없다는 이야기다.

사람이라면 이 객체는 ~한 특징을 가지니깐 개가 맞아"가 아니라 그냥 경험적인 측면에서 어떻게 구체적으로 설명할 수는 없지만 개라는 것을 확신할 수 있게 된다.
하지만 컴퓨터는 저 사진 중 하나만을 학습했을 경우 나머지는 개라고 인식하지 못한다는 것이다. 그래서 등장한 것이 합성곱 신경망(CNN)이다.

문제를 해결한 CNN의 원리는 다음과 같다.

뇌 속 시각 피질의 신경 세포들은 물체의 방향과 장소가 바뀌어도 별문제 없이 인식할 수 있다는 점에서 착안
객체 고유의 특징을 학습하여 물체의 위치와 방향에 관계없이 객체를 인식
이미지의 픽셀 값으로부터 직접 시각 패턴을 학습
각 레이어의 입출력 데이터 형상 유지
이미지의 공간 정보를 유지하면서 이미지간 상관관계를 인식

전이학습

일반적으로 모든 층을 쌓아 모델을 설계하는 것은 비용이 많이 들고, 또 많은 수의 데이터를 모아 가중치를 얻는 것도 비용이 많이 든다.
이 문제를 해결하기 위해 아주 큰 데이터셋에 훈련된 가중치를 들고 와서 우리 데이터셋에 맞게 보정하여 사용하는 것을 전이학습이라고 한다.

예제

사물(동물) 구분

import sys
import numpy as np
import cv2


filename = 'data/beagle.jpg'

img = cv2.imread(filename)

if img is None:
    print('Image load failed!')
    exit()

# Load network

net = cv2.dnn.readNet('data/bvlc_googlenet.caffemodel', 'data/deploy.prototxt')

if net.empty():
    print('Network load failed!')
    exit()

# Load class names

classNames = None
with open('data/classification_classes_ILSVRC2012.txt', 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')

# Inference

inputBlob = cv2.dnn.blobFromImage(img, 1, (224, 224), (104, 117, 123))
net.setInput(inputBlob)
prob = net.forward()

# Check results & Display

out = prob.flatten()
classId = np.argmax(out)
confidence = out[classId]

text = '%s (%4.2f%%)' % (classNames[classId], confidence * 100)
cv2.putText(img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, cv2.LINE_AA)

cv2.imshow('img', img)
cv2.waitKey()
cv2.destroyAllWindows()

얼굴 검출

import cv2


model = 'data/dnn/res10_300x300_ssd_iter_140000_fp16.caffemodel'
config = 'data/dnn/deploy.prototxt'
#model = 'opencv_face_detector_uint8.pb'
#config = 'opencv_face_detector.pbtxt'

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print('Camera open failed!')
    exit()

net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    exit()

while True:
    _, frame = cap.read()
    if frame is None:
        break

    blob = cv2.dnn.blobFromImage(frame, 1, (300, 300), (104, 177, 123))
    net.setInput(blob)
    detect = net.forward()

    detect = detect[0, 0, :, :]
    (h, w) = frame.shape[:2]

    for i in range(detect.shape[0]):
        confidence = detect[i, 2]
        if confidence < 0.5:
            break

        x1 = int(detect[i, 3] * w)
        y1 = int(detect[i, 4] * h)
        x2 = int(detect[i, 5] * w)
        y2 = int(detect[i, 6] * h)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0))

        label = 'Face: %4.3f' % confidence
        cv2.putText(frame, label, (x1, y1 - 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 1, cv2.LINE_AA)

    cv2.imshow('frame', frame)

    if cv2.waitKey(1) == ord('q'):
        break

cv2.destroyAllWindows()

욕 제스처 모자이크 프로그램 만들기

import cv2
import mediapipe as mp
import numpy as np

max_num_hands = 1
gesture = {
    0:'fist', 1:'one', 2:'two', 3:'three', 4:'four', 5:'five',
    6:'six', 7:'rock', 8:'spiderman', 9:'yeah', 10:'ok', 11:'fy'
}

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=max_num_hands,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

file = np.genfromtxt('data/gesture_train_fy.csv', delimiter=',')
angle = file[:,:-1].astype(np.float32)
label = file[:, -1].astype(np.float32)
knn = cv2.ml.KNearest_create()
knn.train(angle, cv2.ml.ROW_SAMPLE, label)

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, img = cap.read()
    if not ret:
        continue

    img = cv2.flip(img, 1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    result = hands.process(img)

    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    if result.multi_hand_landmarks is not None:
        for res in result.multi_hand_landmarks:
            joint = np.zeros((21, 3))
            for j, lm in enumerate(res.landmark):
                joint[j] = [lm.x, lm.y, lm.z]

            v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:]
            v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:]
            v = v2 - v1
            v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

            angle = np.arccos(np.einsum('nt,nt->n',
                v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:]))

            angle = np.degrees(angle) 

            data = np.array([angle], dtype=np.float32)
            ret, results, neighbours, dist = knn.findNearest(data, 3)
            idx = int(results[0][0])

            if idx == 11:
                hand_size = np.linalg.norm(joint[0] - joint[12])
                mosaic_size = int(hand_size * 0.03) 
                x1, y1 = tuple((joint.min(axis=0)[:2] * [img.shape[1], img.shape[0]] * 0.95).astype(int))
                x2, y2 = tuple((joint.max(axis=0)[:2] * [img.shape[1], img.shape[0]] * 1.05).astype(int))
                fy_img = img[y1:y2, x1:x2].copy()
                fy_img = cv2.resize(fy_img, dsize=None, fx=0.05, fy=0.05, interpolation=cv2.INTER_NEAREST)
                fy_img = cv2.resize(fy_img, dsize=(x2 - x1, y2 - y1), interpolation=cv2.INTER_NEAREST)
                img[y1:y2, x1:x2] = cv2.resize(fy_img, (x2 - x1, y2 - y1), interpolation=cv2.INTER_NEAREST)

       

    cv2.imshow('Filter', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

새벽두시에이거하고앉았네잠도못자고이게뭐하는짓이냐,,,,,

세바님

꼴리는대로 사는게 꿈입니다

이전 포스트

OpenCV 해보기 2

다음 포스트