Deep Learning - OCR - Image, Story, Invoice, Sudoku🦔

화이티 ·2024년 1월 24일

Deep Learning OCR

Deep Learning

목록 보기

24/24

https://github.com/UB-Mannheim/tesseract/wiki
Download tessaract

What is OCR?

OCR - Optical Character Recognition 광학 문자 인식

이미지에서 글씨 찾기
문자 영역을 찾아내기> 영역 속에서 > 문자 인식하기

Tesseract

1980년대 최초로 개발된 OCR 알고리즘중 하나

빠르지만 성능이 별로다

EasyOCR: 간단하고 적관적인 OCR 알고리즘

결과를 bbox 좌표로 반환
python library

PaddleOCR:

텍스트 감지, 텍스트 방향 분류기 등 텍스트 인식 제공
외국 API library > 환경설정이 필요함

구글 OCR API:https://cloud.google.com/vision/docs/quickstart
Naver 클로버 OCR :https://clova.ai/ocr?lang=ko

!pip install pytesseract

import pytesseract
# r : 뒤에 오는 문자열을 그대로 기억
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

print("C:\Program Files\Tesseract-OCR\tesseract.exe")

Image

import cv2
img = cv2.imread('./data/text.png')
text = pytesseract.image_to_string(img, lang = 'eng')
print(text)

1. 가지런한 방향
1. 글씨와 배경의 뚜렷한 차이
1. 큰 글씨
사용자: 핸드폰에서 화면에 고정위치 찍다, 사진의 현태를 제한시켜서 데이터를 받게하기
관리자:
-- 방향을 가지런하게 한다
-- 글씨와 배경의 차이를 만든다
-- 글씨를 키운다
Story

import cv2
img = cv2.imread('./data/story.png')
text = pytesseract.image_to_string(img, lang = 'kor')
print(text)

Invoice

import cv2
img = cv2.imread('./data/invoice.jpg')
text = pytesseract.image_to_string(img, lang = 'kor')
print(text)

흑백 배경 만들기

# 흑백으로 만들기

import cv2
img = cv2.imread('./data/invoice.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.waitKey(0)
cv2.destroyAllWindows()


# 흑백으로 만들기

import cv2
img = cv2.imread('./data/invoice.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,150,255,cv2.THRESH_BINARY)
text = pytesseract.image_to_string(img_binary, lang = 'kor')

cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

sudoku

# 이진화 업그레이드
import cv2
img = cv2.imread('./data/sodoku.png')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,50,255,cv2.THRESH_BINARY)
cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 이진화 업그레이드 = 적응형 이진화 방법
# 빛에 의해서 이미지의 값이 균일하지 않을때 사용하는 방법
# 영역을 나눠서 영역별로 이진화를 진행 > 문턱값 (임계값)을 알아서 계산
import cv2
img = cv2.imread('./data/sodoku.png')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray,50,255,cv2.THRESH_BINARY)
binary = cv2.adaptiveThreshold(img_gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)


cv2.imshow('img',img)
cv2.imshow('img_gray',img_gray)
cv2.imshow('img_binary',img_binary)
cv2.imshow('binary',binary)
cv2.waitKey(0)
cv2.destroyAllWindows()

Blur

# 중앙을 제외한 영역을 blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    img = cv2.flip(img,1)
    # 사각형 안쪽 영역의 값
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 중앙 영역에 blur 처리되기 전 값 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    k = cv2.waitKey(30)
    if k == 49:
        break
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

## 중앙을 제외한 영역을 blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    #img = cv2.flip(img,1)
    # 사각형 안쪽 영역의 값
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 중앙 영역에 blur 처리되기 전 값 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    # blur 영역 데두리 표시하기
    cv2.rectangle(img,(int(width/3), int(height/6)),# 좌상단
                  (int(width/3*2), int(height/6*5)), # 우하단
                (255,255,255), 2 #색상, 두께
                  )
    #1, 영역 안에 있는 값을 사용해서 OCR하기
    #2. 원하는 겂 (이름)이 있으면 카메라 종료하기
    
    k = cv2.waitKey(30)
    if k == 49:
        break
    elif k == 50:
        # 이진화를 하기 위해서 흑백으로 변환하기
        gray = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        # 적응형 이진화 하기
        binary = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)
        import numpy as np
        kernel = np.ones((3,3),np.uint8)
        close = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
        text = pytesseract.image_to_string(binary, lang ='kor')
        print(text)
        import matplotlib.pyplot as plt
        plt.imshow(binary,cmap ='gray')
        plt.show()
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

## 중앙을 제외한 영역을 blur 처리
import cv2
video = cv2.VideoCapture(0)
while video.isOpened:
    ret, img = video.read()
    if not ret:
        break
    width = img.shape[1]
    height = img.shape[0]
    #img = cv2.flip(img,1)
    # 사각형 안쪽 영역의 값
    tmp = img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]
    # 이미지 전체 blur 처리
    img = cv2.blur (img, (20,20))
    # 중앙 영역에 blur 처리되기 전 값 집어널기
    img[int(height/6) :int(height/6*5),int(width/3): int(width/3*2) ]= tmp
    # blur 영역 데두리 표시하기
    cv2.rectangle(img,(int(width/3), int(height/6)),# 좌상단
                  (int(width/3*2), int(height/6*5)), # 우하단
                (255,255,255), 2 #색상, 두께
                  )
    #1, 영역 안에 있는 값을 사용해서 OCR하기
    #2. 원하는 겂 (이름)이 있으면 카메라 종료하기
    
    k = cv2.waitKey(30)
    if k == 49:
        break
    elif k == 50:
        # 이진화를 하기 위해서 흑백으로 변환하기
        gray = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        # 적응형 이진화 하기
        binary = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 9,5)
        import numpy as np
        kernel = np.ones((3,3),np.uint8)
        # 횐부분 팽창 > 검은 노이즈 부분 줄이기>검은 글씨 굵기가 줄어
        dilation = cv2.dilate(binary, kernel, iterations = 1)
        erosion = cv2.erode(dilation, kernel, iterations = 2)
        text = pytesseract.image_to_string(binary, lang ='kor')
        print(text)
       # 원하는 값(이름)이 있으면 카메라 종료허기
        if '티티' in text:
            break
    cv2.imshow('img',img)
video.release()
cv2.destroyAllWindows()

화이티

열심히 공부합시다! The best is yet to come! 💜

이전 포스트

Deep Learning - OCR - Image, Story, Invoice, Sudoku🦔

Deep Learning

What is OCR?

Tesseract

Image

Story

Invoice

흑백 배경 만들기

sudoku

Blur

Deep Learning - 이미지처리하기 🐩

0개의 댓글

관련 채용 정보