1. PDF to PNG
!pip install PyMuPDF
import fitz
def pdf_to_png(pdf_file, output_folder, pdf_page_number = 0):
doc = fitz.open(pdf_file)
file_name = pdf_file.split('\\')[-1][:-4] # 파일 이름 추출
try:
if pdf_page_number == 0: # pdf_page_number 특정 값 미지정 시, 전체 변환
for i, page in enumerate(doc):
img = page.get_pixmap() # 이미지 변환
img.save(output_folder + '\\' + file_name + f'_{i}_output.png') # 변환된 이미지 저장
print('전체 변환')
elif pdf_page_number != 0:
page = doc.load_page(pdf_page_number - 1) # 특정 페이지 가져오기
i = pdf_page_number
img = page.get_pixmap() # 이미지 변환
img.save(output_folder + '\\' + file_name + f'_{i}_only_output.png') # 변환된 이미지 저장
print(pdf_page_number, '페이지 변환')
except ValueError:
print('Error: page not in document')
pdf_file = r'C:\Users\tmax\Desktop\OCR_QA\000_test\image\png\test_2.pdf'
output_folder = r'C:\Users\tmax\Desktop\OCR_QA\000_test\image\png'
pdf_to_png(pdf_file, output_folder,100)
2. PNG to JPG
! pip install Pillow
from PIL import Image
def png_to_jpg(input_path, output_path):
try:
png_image = Image.open(input_path) # PNG 이미지 열기
png_image.convert("RGB").save(output_path, "JPEG") # JPEG로 변환하여 저장
print("성공")
except Exception as e:
print(f"오류 발생: {e}")
data_list = ['receipt1','receipt2','receipt3','receipt4','receipt5','receipt6','receipt7']
for a in data_list:
input_path = r'C:\Users\OCR_QA\000_test\image\png\receipt' + '\\'
output_path = r'C:\Users\OCR_QA\000_test\image\jpg\receipt' + '\\'
input_file = input_path + a + '.png'
output_file = output_path + a + '.jpg'
png_to_jpg(input_file, output_file)
Every day I work with different documents, and at the moment I am already writing my thesis. There is less than a month left before the defense, I am very worried because I need to get a high score for my thesis. But I found a lot of useful information and it is in PDF format, which does not suit me at all, and I found a company and there is a document typing services https://www.typingservice.org/ and you know, I received the documents in the required format quickly, and most importantly, without errors. I recommend this site to everyone.