Spaces:
Sleeping
Sleeping
| """ | |
| Author: Khanh Phan | |
| Date: 2023-11-01 | |
| """ | |
| import time | |
| from os import listdir | |
| from os.path import ( | |
| isfile, | |
| join, | |
| ) | |
| import cv2 | |
| from src.postprocessing import postprocess_result | |
| from src.settings import ( | |
| IMAGE_FORMAT, | |
| OCR_JA, | |
| OCR_ML, | |
| ) | |
| from src.visualization import visualize_result | |
| def paddleOCR(path): | |
| """ | |
| perform ocr | |
| args: | |
| path(str): path to input folder | |
| return(str): | |
| text in markdown format | |
| """ | |
| # imgs = [f for f in listdir(path) if isfile(join(path, f))] | |
| imgs = [] | |
| for file in listdir(path): | |
| if isfile(join(path, file)) and file.endswith(IMAGE_FORMAT): | |
| imgs.append(file) | |
| for img_file in imgs: | |
| img_path = join(path, img_file) | |
| image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) | |
| """ | |
| cls = False: to improve the performance | |
| recognize text only from -90 to 90 degree | |
| """ | |
| time_start = time.time() | |
| result = OCR_JA.ocr(image, cls=True, det=True, rec=True) | |
| time_ocr = time.time() | |
| result = postprocess_result(image, result, OCR_ML) | |
| visualize_result(result, img_path) | |
| print( | |
| f"{img_file}\t{len(result[0])}\t" | |
| f"{time_ocr - time_start}\t{time.time() - time_ocr}", | |
| ) | |
| if __name__ == "__main__": | |
| path = "data/" | |
| paddleOCR(path) | |