kleverocr / ocr.py
pmkhanh7890's picture
fix gradio app
555cf98
"""
Author: Khanh Phan
Date: 2023-11-01
"""
import time
from os import listdir
from os.path import (
isfile,
join,
)
import cv2
from src.postprocessing import postprocess_result
from src.settings import (
IMAGE_FORMAT,
OCR_JA,
OCR_ML,
)
from src.visualization import visualize_result
def paddleOCR(path):
"""
perform ocr
args:
path(str): path to input folder
return(str):
text in markdown format
"""
# imgs = [f for f in listdir(path) if isfile(join(path, f))]
imgs = []
for file in listdir(path):
if isfile(join(path, file)) and file.endswith(IMAGE_FORMAT):
imgs.append(file)
for img_file in imgs:
img_path = join(path, img_file)
image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
"""
cls = False: to improve the performance
recognize text only from -90 to 90 degree
"""
time_start = time.time()
result = OCR_JA.ocr(image, cls=True, det=True, rec=True)
time_ocr = time.time()
result = postprocess_result(image, result, OCR_ML)
visualize_result(result, img_path)
print(
f"{img_file}\t{len(result[0])}\t"
f"{time_ocr - time_start}\t{time.time() - time_ocr}",
)
if __name__ == "__main__":
path = "data/"
paddleOCR(path)