""" Author: Khanh Phan Date: 2023-11-01 """ import time from os import listdir from os.path import ( isfile, join, ) import cv2 from src.postprocessing import postprocess_result from src.settings import ( IMAGE_FORMAT, OCR_JA, OCR_ML, ) from src.visualization import visualize_result def paddleOCR(path): """ perform ocr args: path(str): path to input folder return(str): text in markdown format """ # imgs = [f for f in listdir(path) if isfile(join(path, f))] imgs = [] for file in listdir(path): if isfile(join(path, file)) and file.endswith(IMAGE_FORMAT): imgs.append(file) for img_file in imgs: img_path = join(path, img_file) image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) """ cls = False: to improve the performance recognize text only from -90 to 90 degree """ time_start = time.time() result = OCR_JA.ocr(image, cls=True, det=True, rec=True) time_ocr = time.time() result = postprocess_result(image, result, OCR_ML) visualize_result(result, img_path) print( f"{img_file}\t{len(result[0])}\t" f"{time_ocr - time_start}\t{time.time() - time_ocr}", ) if __name__ == "__main__": path = "data/" paddleOCR(path)