Azure OCR이 인식이 가장 좋아서 Azure에서 제공되는 OCR을 사용했다.
Easy OCR도 인식률이 나쁘진 않지만 Azure보단 좋지 않았다.
학습을 시키면 좀 더 좋아질까? 싶기도 하다.
기존 코드에서 자잘하게 코드가 바꼈다.
import os
import warnings
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings("ignore", category=UserWarning)
import torch
from PIL import Image, ImageFilter
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
import time
dir_path = './crops'
def azure_ocr(path):
subscription_key = "azure key"
endpoint = "end point"
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
img = open(path, "rb")
read_response = computervision_client.read_in_stream(img, language="ko", raw=True)
read_operation_location = read_response.headers["Operation-Location"]
operation_id = read_operation_location.split("/")[-1]
while True:
read_result = computervision_client.get_read_result(operation_id)
if read_result.status not in ['notStarted', 'running']:
break
time.sleep(1)
if read_result.status == OperationStatusCodes.succeeded:
for text_result in read_result.analyze_result.read_results:
for line in text_result.lines:
result = line.text
# print(line.bounding_box)
return result
# 학습된 모델 호출
model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5n_best.pt', force_reload=True)
# 이미지 불러오기
img = Image.open('../dataset/test/img_753950_1.jpg') # PIL image
img = img.convert("L")
img = img.filter(ImageFilter.GaussianBlur(radius =1))
# 모델에 이미지 입력
results = model(img, size=640)
# results를 pandas로 정리
df = results.pandas().xyxy[0]
print(df)
'''
xmin ymin xmax ymax confidence class name
0 116.761169 208.202805 1044.926025 767.356384 0.976881 0 car
1 359.405090 608.750916 566.712891 660.803223 0.876195 1 plate
2 1051.030151 484.713440 1102.585327 516.971680 0.750280 1 plate
'''
crops = results.crop(save=False)
# conf = (crop[0]['conf'].item() * 100)
for crop in crops :
if 'plate' in crop['label'] :
image = crop['im']
im = Image.fromarray(image)
now = time.localtime()
t = "%04d%02d%02d-%02d%02d%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec)
file_name = f'plate_{t}.png'
im.save(os.path.join(dir_path, file_name), 'png')
if df['name'][0] == 'plate' :
plate_name = df['name'][0]
plate_conf = int((round(df['confidence'][0], 2)) * 100)
else :
plate_name = df['name'][1]
plate_conf = int((round(df['confidence'][1], 2)) * 100)
print("====== Crop Image Plate predict =======")
print(f'{plate_name} 예측 확률 : {plate_conf}%')
print("=======================================")
azure = azure_ocr(f'{dir_path}/{file_name}')
print("\n===== Crop Image OCR Read - Azure =====")
print(f'Azure OCR 결과 : {azure}')
print("=======================================")
Easy OCR을 학습시키면 조금 인식률이 좋아진다고는 하는데,
폰트가 하나뿐이라서 그렇게 많이 차이날 것 같진 않다..
추후 고도화 작업을 진행한다면 classification으로 학습을 진행해보고 싶다.