Seth0330 commited on
Commit
f08e772
·
verified ·
1 Parent(s): d24a0cf

Update backend/app/main.py

Browse files
Files changed (1) hide show
  1. backend/app/main.py +171 -7
backend/app/main.py CHANGED
@@ -1,9 +1,18 @@
1
  import os
2
- from fastapi import FastAPI
 
 
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
- from .db import Base, engine
 
 
 
 
 
 
5
 
6
- # Ensure data directory exists (for SQLite file)
7
  os.makedirs("data", exist_ok=True)
8
 
9
  # Create tables
@@ -11,7 +20,7 @@ Base.metadata.create_all(bind=engine)
11
 
12
  app = FastAPI(title="Document Capture Demo – Backend")
13
 
14
- # Allow CORS from anywhere (fine for a demo Space)
15
  app.add_middleware(
16
  CORSMiddleware,
17
  allow_origins=["*"],
@@ -21,10 +30,165 @@ app.add_middleware(
21
  )
22
 
23
 
 
 
 
 
 
 
 
 
24
  @app.get("/ping")
25
  def ping():
 
 
 
 
 
26
  """
27
- Simple health check endpoint.
28
- You can hit this from the browser to confirm backend is running.
29
  """
30
- return {"status": "ok", "message": "backend alive"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import time
3
+ from typing import List, Dict
4
+
5
+ from fastapi import FastAPI, UploadFile, File, Depends
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.staticfiles import StaticFiles
8
+ from sqlalchemy.orm import Session
9
+
10
+ from .db import Base, engine, SessionLocal
11
+ from .models import ExtractionRecord
12
+ from .schemas import ExtractionRecordBase, ExtractionStage
13
+ from .openrouter_client import extract_fields_from_document
14
 
15
+ # Ensure data dir exists for SQLite
16
  os.makedirs("data", exist_ok=True)
17
 
18
  # Create tables
 
20
 
21
  app = FastAPI(title="Document Capture Demo – Backend")
22
 
23
+ # CORS (for safety we allow all; you can tighten later)
24
  app.add_middleware(
25
  CORSMiddleware,
26
  allow_origins=["*"],
 
30
  )
31
 
32
 
33
+ def get_db():
34
+ db = SessionLocal()
35
+ try:
36
+ yield db
37
+ finally:
38
+ db.close()
39
+
40
+
41
  @app.get("/ping")
42
  def ping():
43
+ """Healthcheck."""
44
+ return {"status": "ok", "message": "backend alive"}
45
+
46
+
47
+ def make_stages(total_ms: int, status: str) -> Dict[str, ExtractionStage]:
48
  """
49
+ Build synthetic stage timing data for the History UI.
50
+ For now we just split total_ms into 4 stages.
51
  """
52
+ if total_ms <= 0:
53
+ total_ms = 1000
54
+
55
+ return {
56
+ "uploading": ExtractionStage(
57
+ time=int(total_ms * 0.15),
58
+ status="completed",
59
+ variation="normal",
60
+ ),
61
+ "aiAnalysis": ExtractionStage(
62
+ time=int(total_ms * 0.55),
63
+ status="completed" if status == "completed" else "failed",
64
+ variation="normal",
65
+ ),
66
+ "dataExtraction": ExtractionStage(
67
+ time=int(total_ms * 0.2),
68
+ status="completed" if status == "completed" else "skipped",
69
+ variation="fast",
70
+ ),
71
+ "outputRendering": ExtractionStage(
72
+ time=int(total_ms * 0.1),
73
+ status="completed" if status == "completed" else "skipped",
74
+ variation="normal",
75
+ ),
76
+ }
77
+
78
+
79
+ @app.post("/api/extract")
80
+ async def extract_document(
81
+ file: UploadFile = File(...),
82
+ db: Session = Depends(get_db),
83
+ ):
84
+ """
85
+ Main extraction endpoint used by the Dashboard.
86
+ 1) Read the uploaded file
87
+ 2) Call OpenRouter + Qwen3-VL
88
+ 3) Store a record in SQLite
89
+ 4) Return extraction result + metadata
90
+ """
91
+ start = time.time()
92
+ content = await file.read()
93
+ content_type = file.content_type or "application/octet-stream"
94
+ size_mb = len(content) / 1024 / 1024
95
+ size_str = f"{size_mb:.2f} MB"
96
+
97
+ try:
98
+ extracted = await extract_fields_from_document(content, content_type, file.filename)
99
+ total_ms = int((time.time() - start) * 1000)
100
+
101
+ confidence = float(extracted.get("confidence", 90))
102
+ fields = extracted.get("fields", {})
103
+ fields_extracted = len(fields) if isinstance(fields, dict) else 0
104
+
105
+ status = "completed"
106
+ error_message = None
107
+ except Exception as e:
108
+ total_ms = int((time.time() - start) * 1000)
109
+ confidence = 0.0
110
+ fields = {}
111
+ fields_extracted = 0
112
+ status = "failed"
113
+ error_message = str(e)
114
+
115
+ # Save record to DB
116
+ rec = ExtractionRecord(
117
+ file_name=file.filename,
118
+ file_type=content_type,
119
+ file_size=size_str,
120
+ status=status,
121
+ confidence=confidence,
122
+ fields_extracted=fields_extracted,
123
+ total_time_ms=total_ms,
124
+ raw_output=str(fields),
125
+ error_message=error_message,
126
+ )
127
+ db.add(rec)
128
+ db.commit()
129
+ db.refresh(rec)
130
+
131
+ stages = make_stages(total_ms, status)
132
+
133
+ # Response shape that frontend will consume
134
+ return {
135
+ "id": rec.id,
136
+ "fileName": rec.file_name,
137
+ "fileType": rec.file_type,
138
+ "fileSize": rec.file_size,
139
+ "status": status,
140
+ "confidence": confidence,
141
+ "fieldsExtracted": fields_extracted,
142
+ "totalTime": total_ms,
143
+ "fields": fields,
144
+ "stages": {k: v.dict() for k, v in stages.items()},
145
+ "errorMessage": error_message,
146
+ }
147
+
148
+
149
+ @app.get("/api/history", response_model=List[ExtractionRecordBase])
150
+ def get_history(db: Session = Depends(get_db)):
151
+ """
152
+ Used by the History page.
153
+ Returns last 100 records, with synthetic stage data.
154
+ """
155
+ recs = (
156
+ db.query(ExtractionRecord)
157
+ .order_by(ExtractionRecord.created_at.desc())
158
+ .limit(100)
159
+ .all()
160
+ )
161
+
162
+ output: List[ExtractionRecordBase] = []
163
+ for r in recs:
164
+ stages = make_stages(r.total_time_ms or 1000, r.status or "completed")
165
+ output.append(
166
+ ExtractionRecordBase(
167
+ id=r.id,
168
+ fileName=r.file_name,
169
+ fileType=r.file_type or "",
170
+ fileSize=r.file_size or "",
171
+ extractedAt=r.created_at,
172
+ status=r.status or "completed",
173
+ confidence=r.confidence or 0.0,
174
+ fieldsExtracted=r.fields_extracted or 0,
175
+ totalTime=r.total_time_ms or 0,
176
+ stages=stages,
177
+ errorMessage=r.error_message,
178
+ )
179
+ )
180
+ return output
181
+
182
+
183
+ # Static frontend mounting (used after we build React)
184
+ # Dockerfile copies the Vite build into backend/frontend_dist
185
+ frontend_dir = os.path.join(
186
+ os.path.dirname(os.path.dirname(__file__)), "frontend_dist"
187
+ )
188
+
189
+ if os.path.isdir(frontend_dir):
190
+ app.mount(
191
+ "/",
192
+ StaticFiles(directory=frontend_dir, html=True),
193
+ name="frontend",
194
+ )