Python OCR tesseract

agnusdei·2023년 7월 5일
0

%matplotlib inline

!sudo apt install tesseract-ocr
!pip3 install pytesseract
!pip3 install opencv-contrib-python # 주요 및 추가 모듈 설치
!pip3 install Pillow==9.1.0
!pip3 install coolsms_python_sdk

import pytesseract
import shutil
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np
import sys
from sdk.api.message import Message
from sdk.exceptions import CoolsmsException
from datetime import datetime
from flask import Flask

large = cv2.imread('/content/sample_data/zzz.png', cv2.IMREAD_COLOR)
rgb = cv2.pyrDown(large)
# rgb = large
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[idx])
    mask[y:y+h, x:x+w] = 0
    cv2.drawContours(mask, contours, idx, (0, 0, 0), 0)
    r = int(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
    if r > 0.45 and w > 8 and h > 8:
        cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 0, 0), 0)
# show image with contours rect

cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 0, 0), 0)
cv2.waitKey()
h = h + 5 # 텍스트가 짤리는 것을 방지하기 위해 여유 공간 확보
w = w + 5
x = x - 5
y = y - 5
print(x,y, x+w, y+h) # 좌표 수치 확인

rec = cv2.rectangle(rgb, (x, y), (x+w, y+h), (0, 255, 0), 1)
plt.imshow(rec)

img = Image.fromarray(rec) 
area = (x,y, x+w, y+h) # 좌표 저장
cropped_img = img.crop(area)
 
np_array_cropped_img = np.array(cropped_img)
 
plt.imshow(np_array_cropped_img)
plt.axis("on")
plt.show()

im = Image.fromarray(np_array_cropped_img)
im.save("stream_check.png")

config = ('-l kor+eng --oem 3 --psm 11')
msg = pytesseract.image_to_string("stream_check.png", config=config)

now = datetime.now()
print("OCR 추출된 텍스트 : ",msg)

if msg.find('Stream unavailable') == 0:
  print("스트리밍 중지!! 현재 시각 : ", now)

else:
  print("스트리밍 정상 작동중")

def sms():
  # set api key, api secret
  api_key = "..."
  api_secret = "..."
  ## 4 params(to, from, type, text) are mandatory. must be filled
  params = dict()
  params['type'] = 'sms' # Message type ( sms, lms, mms, ata )
  params['to'] = '...' 
  params['from'] = '...'
  params['text'] = '스트림이 중지됐습니다. 확인하세요!'
  cool = Message(api_key, api_secret)
  try:
    response = cool.send(params)  
    print("Success Count : %s" % response['success_count'])
    print("Error Count : %s" % response['error_count'])
    print("Group ID : %s" % response['group_id'])
    if "error_list" in response:
      print("Error List : %s" % response['error_list'])
  except CoolsmsException as e:
    print("Error Code : %s" % e.code)
    print("Error Message : %s" % e.msg)
    sys.exit()

문자 도착까지 잘 작동하는 것을 확인할 수 있습니다.

0개의 댓글