Bravo6B9 commited on
Commit
dbecf47
·
verified ·
1 Parent(s): 0c4f888

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,13 +1,20 @@
1
  import gradio as gr
 
2
  from PIL import Image
3
- import pytesseract
4
- from transformers import pipeline
5
 
6
- # Load LLM pipeline (small model for demo)
 
 
 
 
7
  llm = pipeline("text-generation", model="distilgpt2")
8
 
9
  def process_image(image):
10
- text = pytesseract.image_to_string(Image.fromarray(image))
 
 
 
 
11
  llm_output = llm(text, max_length=100, do_sample=True)[0]["generated_text"]
12
  return f"OCR Text:\n{text}\n\nLLM Response:\n{llm_output}"
13
 
 
1
  import gradio as gr
2
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
3
  from PIL import Image
 
 
4
 
5
+ # Load OCR model
6
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-stage1')
7
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-stage1')
8
+
9
+ # Load LLM
10
  llm = pipeline("text-generation", model="distilgpt2")
11
 
12
  def process_image(image):
13
+ pil_image = Image.fromarray(image)
14
+ pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values
15
+ generated_ids = model.generate(pixel_values)
16
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
17
+
18
  llm_output = llm(text, max_length=100, do_sample=True)[0]["generated_text"]
19
  return f"OCR Text:\n{text}\n\nLLM Response:\n{llm_output}"
20