|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import os |
|
|
from pathlib import Path |
|
|
from typing import Dict |
|
|
|
|
|
|
|
|
_SPACE_PERSIST_ROOT = Path("/data") |
|
|
|
|
|
def _is_writable(p: Path) -> bool: |
|
|
try: |
|
|
p.mkdir(parents=True, exist_ok=True) |
|
|
test = p / ".write_test" |
|
|
test.write_text("ok", encoding="utf-8") |
|
|
test.unlink(missing_ok=True) |
|
|
return True |
|
|
except Exception: |
|
|
return False |
|
|
|
|
|
def base_dir() -> Path: |
|
|
""" |
|
|
Return the base persistent directory for this app, creating it if needed. |
|
|
|
|
|
On Hugging Face Spaces this will be `/data/econsult`. |
|
|
For local/dev runs (where `/data` might not exist), we fall back to `./.persist/econsult`. |
|
|
""" |
|
|
if _is_writable(_SPACE_PERSIST_ROOT): |
|
|
root = _SPACE_PERSIST_ROOT / "econsult" |
|
|
else: |
|
|
root = Path(".") / ".persist" / "econsult" |
|
|
root.mkdir(parents=True, exist_ok=True) |
|
|
return root |
|
|
|
|
|
def guidelines_dir() -> Path: |
|
|
d = base_dir() / "guidelines" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def faiss_index_dir() -> Path: |
|
|
d = base_dir() / "faiss_index" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def exports_dir() -> Path: |
|
|
d = base_dir() / "exports" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def cases_dir() -> Path: |
|
|
d = base_dir() / "cases" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def audit_dir() -> Path: |
|
|
d = base_dir() / "audit" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def hf_cache_dir() -> Path: |
|
|
""" |
|
|
Return the local Hugging Face cache directory. |
|
|
|
|
|
We default to `/data/econsult/hf_cache` on Spaces, or `./.persist/econsult/hf_cache` locally. |
|
|
""" |
|
|
d = base_dir() / "hf_cache" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
return d |
|
|
|
|
|
def initialize_environment() -> Dict[str, str]: |
|
|
""" |
|
|
Ensure HF cache env vars are set to the app's cache dir and return the key environment settings. |
|
|
""" |
|
|
cache = hf_cache_dir() |
|
|
os.environ.setdefault("HF_HOME", str(cache)) |
|
|
os.environ.setdefault("HF_DATASETS_CACHE", str(cache)) |
|
|
os.environ.setdefault("TRANSFORMERS_CACHE", str(cache)) |
|
|
|
|
|
|
|
|
env = { |
|
|
"HF_HOME": os.environ.get("HF_HOME", ""), |
|
|
"MODEL_ID": os.environ.get("MODEL_ID", "google/medgemma-27b-text-it"), |
|
|
"MODEL_FALLBACK_ID": os.environ.get("MODEL_FALLBACK_ID", "google/medgemma-4b-it"), |
|
|
"FORCE_CPU_LLM": os.environ.get("FORCE_CPU_LLM", ""), |
|
|
"E2E_STUB": os.environ.get("E2E_STUB", ""), |
|
|
"QUANT_MODE": os.environ.get("QUANT_MODE", "4bit"), |
|
|
} |
|
|
print(f"[paths] base_dir={base_dir()} hf_cache_dir={cache} HF_HOME={env['HF_HOME']}") |
|
|
return env |
|
|
|
|
|
def describe_paths() -> Dict[str, str]: |
|
|
""" |
|
|
Convenience helper for the health page to display all important paths. |
|
|
""" |
|
|
return { |
|
|
"base_dir": str(base_dir()), |
|
|
"guidelines_dir": str(guidelines_dir()), |
|
|
"faiss_index_dir": str(faiss_index_dir()), |
|
|
"cases_dir": str(cases_dir()), |
|
|
"exports_dir": str(exports_dir()), |
|
|
"audit_dir": str(audit_dir()), |
|
|
"hf_cache_dir": str(hf_cache_dir()), |
|
|
} |
|
|
|
|
|
|