Deep Learning - OCR - Image, Story, Invoice, SudokuπŸ¦”

화이티 Β·2024λ…„ 1μ›” 24일
0

Deep Learning

λͺ©λ‘ 보기
24/24

https://github.com/UB-Mannheim/tesseract/wiki
Download tessaract

What is OCR?

OCR - Optical Character Recognition κ΄‘ν•™ 문자 인식

  • μ΄λ―Έμ§€μ—μ„œ 글씨 μ°ΎκΈ°
  • 문자 μ˜μ—­μ„ μ°Ύμ•„λ‚΄κΈ°> μ˜μ—­ μ†μ—μ„œ > 문자 μΈμ‹ν•˜κΈ°

Tesseract

  1. 1980λ…„λŒ€ 졜초둜 개발된 OCR μ•Œκ³ λ¦¬μ¦˜μ€‘ ν•˜λ‚˜
  • λΉ λ₯΄μ§€λ§Œ μ„±λŠ₯이 λ³„λ‘œλ‹€
  1. EasyOCR: κ°„λ‹¨ν•˜κ³  적관적인 OCR μ•Œκ³ λ¦¬μ¦˜
  • κ²°κ³Όλ₯Ό bbox μ’Œν‘œλ‘œ λ°˜ν™˜
  • python library
  1. PaddleOCR:
  • ν…μŠ€νŠΈ 감지, ν…μŠ€νŠΈ λ°©ν–₯ λΆ„λ₯˜κΈ° λ“± ν…μŠ€νŠΈ 인식 제곡
  • μ™Έκ΅­ API library > ν™˜κ²½μ„€μ •μ΄ ν•„μš”ν•¨
  1. ꡬ글 OCR API:https://cloud.google.com/vision/docs/quickstart
  2. Naver ν΄λ‘œλ²„ OCR :https://clova.ai/ocr?lang=ko
!pip install pytesseract
import pytesseract
# r : 뒀에 μ˜€λŠ” λ¬Έμžμ—΄μ„ κ·ΈλŒ€λ‘œ κΈ°μ–΅
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
print("C:\Program Files\Tesseract-OCR\tesseract.exe")

Image

import cv2
img = cv2.imread('./data/text.png')
text = pytesseract.image_to_string(img, lang = 'eng')
print(text)

    1. κ°€μ§€λŸ°ν•œ λ°©ν–₯
    1. 글씨와 배경의 λšœλ ·ν•œ 차이
    1. 큰 글씨
  • μ‚¬μš©μž: ν•Έλ“œν°μ—μ„œ 화면에 κ³ μ •μœ„μΉ˜ 찍닀, μ‚¬μ§„μ˜ ν˜„νƒœλ₯Ό μ œν•œμ‹œμΌœμ„œ 데이터λ₯Ό λ°›κ²Œν•˜κΈ°
  • κ΄€λ¦¬μž:
    -- λ°©ν–₯을 κ°€μ§€λŸ°ν•˜κ²Œ ν•œλ‹€
    -- 글씨와 배경의 차이λ₯Ό λ§Œλ“ λ‹€
    -- 글씨λ₯Ό ν‚€μš΄λ‹€

    Story

import cv2
img = cv2.imread('./data/story.png')
text = pytesseract.image_to_string(img, lang = 'kor')
print(text)

Invoice

import cv2
img = cv2.imread('./data/invoice.jpg')
text = pytesseract.image_to_string(img, lang = 'kor')
print(text)

흑백 λ°°κ²½ λ§Œλ“€κΈ°

# ν‘λ°±μœΌλ‘œ λ§Œλ“€κΈ°

import cv2
img = cv2.imread('./data/invoice.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.waitKey(0)
cv2.destroyAllWindows()


# ν‘λ°±μœΌλ‘œ λ§Œλ“€κΈ°

import cv2
img = cv2.imread('./data/invoice.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,150,255,cv2.THRESH_BINARY)
text = pytesseract.image_to_string(img_binary, lang = 'kor')

cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

sudoku

# 이진화 μ—…κ·Έλ ˆμ΄λ“œ
import cv2
img = cv2.imread('./data/sodoku.png')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,50,255,cv2.THRESH_BINARY)
cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 이진화 μ—…κ·Έλ ˆμ΄λ“œ = μ μ‘ν˜• 이진화 방법
# 빛에 μ˜ν•΄μ„œ μ΄λ―Έμ§€μ˜ 값이 κ· μΌν•˜μ§€ μ•Šμ„λ•Œ μ‚¬μš©ν•˜λŠ” 방법
# μ˜μ—­μ„ λ‚˜λˆ μ„œ μ˜μ—­λ³„λ‘œ 이진화λ₯Ό 진행 > λ¬Έν„±κ°’ (μž„κ³„κ°’)을 μ•Œμ•„μ„œ 계산
import cv2
img = cv2.imread('./data/sodoku.png')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,50,255,cv2.THRESH_BINARY)
binary = cv2.adaptiveThreshold(img_gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)


cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.imshow('binary',binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

Blur

# 쀑앙을 μ œμ™Έν•œ μ˜μ—­μ„ blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    img = cv2.flip(img,1)
    # μ‚¬κ°ν˜• μ•ˆμͺ½ μ˜μ—­μ˜ κ°’
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 쀑앙 μ˜μ—­μ— blur 처리되기 μ „ κ°’ 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    k = cv2.waitKey(30)
    if k == 49:
        break
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

## 쀑앙을 μ œμ™Έν•œ μ˜μ—­μ„ blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    #img = cv2.flip(img,1)
    # μ‚¬κ°ν˜• μ•ˆμͺ½ μ˜μ—­μ˜ κ°’
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 쀑앙 μ˜μ—­μ— blur 처리되기 μ „ κ°’ 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    # blur μ˜μ—­ 데두리 ν‘œμ‹œν•˜κΈ°
    cv2.rectangle(img,(int(width/3), int(height/6)),# μ’Œμƒλ‹¨
                  (int(width/3*2), int(height/6*5)), # μš°ν•˜λ‹¨
                (255,255,255), 2 #색상, λ‘κ»˜
                  )
    #1, μ˜μ—­ μ•ˆμ— μžˆλŠ” 값을 μ‚¬μš©ν•΄μ„œ OCRν•˜κΈ°
    #2. μ›ν•˜λŠ” 겂 (이름)이 있으면 카메라 μ’…λ£Œν•˜κΈ°
    
    k = cv2.waitKey(30)
    if k == 49:
        break
    elif k == 50:
        # 이진화λ₯Ό ν•˜κΈ° μœ„ν•΄μ„œ ν‘λ°±μœΌλ‘œ λ³€ν™˜ν•˜κΈ°
        gray = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        # μ μ‘ν˜• 이진화 ν•˜κΈ°
        binary = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)
        import numpy as np
        kernel = np.ones((3,3),np.uint8)
        close = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
        text = pytesseract.image_to_string(binary, lang ='kor')
        print(text)
        import matplotlib.pyplot as plt
        plt.imshow(binary,cmap ='gray')
        plt.show()
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

## 쀑앙을 μ œμ™Έν•œ μ˜μ—­μ„ blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    #img = cv2.flip(img,1)
    # μ‚¬κ°ν˜• μ•ˆμͺ½ μ˜μ—­μ˜ κ°’
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 쀑앙 μ˜μ—­μ— blur 처리되기 μ „ κ°’ 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    # blur μ˜μ—­ 데두리 ν‘œμ‹œν•˜κΈ°
    cv2.rectangle(img,(int(width/3), int(height/6)),# μ’Œμƒλ‹¨
                  (int(width/3*2), int(height/6*5)), # μš°ν•˜λ‹¨
                (255,255,255), 2 #색상, λ‘κ»˜
                  )
    #1, μ˜μ—­ μ•ˆμ— μžˆλŠ” 값을 μ‚¬μš©ν•΄μ„œ OCRν•˜κΈ°
    #2. μ›ν•˜λŠ” 겂 (이름)이 있으면 카메라 μ’…λ£Œν•˜κΈ°
    
    k = cv2.waitKey(30)
    if k == 49:
        break
    elif k == 50:
        # 이진화λ₯Ό ν•˜κΈ° μœ„ν•΄μ„œ ν‘λ°±μœΌλ‘œ λ³€ν™˜ν•˜κΈ°
        gray = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        # μ μ‘ν˜• 이진화 ν•˜κΈ°
        binary = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)
        import numpy as np
        kernel = np.ones((3,3),np.uint8)
        # νšλΆ€λΆ„ 팽창 > 검은 λ…Έμ΄μ¦ˆ λΆ€λΆ„ 쀄이기>검은 글씨 κ΅΅κΈ°κ°€ 쀄어
        dilation = cv2.dilate(binary, kernel, iterations = 1)
        erosion = cv2.erode(dilation, kernel, iterations = 2)
        text = pytesseract.image_to_string(binary, lang ='kor')
        print(text)
       # μ›ν•˜λŠ” κ°’(이름)이 있으면 카메라 μ’…λ£Œν—ˆκΈ°
        if 'ν‹°ν‹°' in text:
            break
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

profile
μ—΄μ‹¬νžˆ κ³΅λΆ€ν•©μ‹œλ‹€! The best is yet to come! πŸ’œ

0개의 λŒ“κΈ€

κ΄€λ ¨ μ±„μš© 정보