Spaces:

caokai1073
/

StriMap

Sleeping

App Files Files Community

caokai1073 commited on Nov 8

Commit

374d0bb

verified ·

1 Parent(s): c7acc8d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +41 -20

src/streamlit_app.py CHANGED Viewed

@@ -18,56 +18,64 @@ Upload a **CSV** file with columns `Peptide` and `HLA`,
 or a **FASTA** file containing peptide sequences (headers optionally include HLA type).
 """)
-import os
 os.environ["HF_HOME"] = "/data/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
 os.environ["TORCH_HOME"] = "/data/huggingface"
-os.environ["ESM_CACHE_DIR"] = "/data/phla_cache"
-os.makedirs("/data/phla_cache", exist_ok=True)
 # ==============================
-# 模型加载函数（缓存）
 # ==============================
 @st.cache_resource
-def get_model():
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
-    # 尝试从本地加载，如果失败则从 HF Hub 下载
-    local_path = "/app/src/model.pt"
     if not os.path.exists(local_path):
-        st.warning("Model not found locally. Downloading from Hugging Face Hub...")
         local_path = hf_hub_download(
-            repo_id="caokai1073/StriMap",  # 替换为你的 Space repo
-            filename="src/model.pt"
         )
     model, device = load_model(local_path, device=device)
     return model, device
-model, device = get_model()
 # ==============================
-# 文件上传（使用 /tmp 临时目录）
 # ==============================
-uploaded_file = st.file_uploader("Upload CSV or FASTA", type=["csv", "fasta"])
 if uploaded_file:
-    # 将上传文件保存到可写的 /tmp 路径
-    temp_path = os.path.join("/tmp", uploaded_file.name)
     with open(temp_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
     # ==============================
     # 文件解析
     # ==============================
-    if uploaded_file.name.endswith(".csv"):
         df = pd.read_csv(temp_path)
     else:
         seqs = []
         for rec in SeqIO.parse(temp_path, "fasta"):
             header = rec.id
             seq = str(rec.seq)
-            # 尝试从header提取HLA，比如 ">HLA-A*02:01|SLLMWITQC"
             if "|" in header:
                 hla, _ = header.split("|", 1)
             else:
@@ -79,10 +87,13 @@ if uploaded_file:
     st.dataframe(df.head())
     # ==============================
-    # 模型预测
     # ==============================
     if st.button("🚀 Run Prediction"):
-        with st.spinner("Running model inference..."):
             result_df = predict_from_df(df, model)
         st.success("✅ Prediction complete!")
@@ -97,4 +108,14 @@ if uploaded_file:
             data=csv,
             file_name="hla_binding_predictions.csv",
             mime="text/csv",
-        )

 or a **FASTA** file containing peptide sequences (headers optionally include HLA type).
 """)
+# ==============================
+# 全局路径设置
+# ==============================
+CACHE_DIR = "/data/phla_cache"
+MODEL_DIR = "/app/src"
+UPLOAD_DIR = "/data/uploads"
+for d in [CACHE_DIR, MODEL_DIR, UPLOAD_DIR]:
+    os.makedirs(d, exist_ok=True)
+# 环境变量（确保所有模型和 ESM 缓存写入 /data）
 os.environ["HF_HOME"] = "/data/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface"
 os.environ["TORCH_HOME"] = "/data/huggingface"
+os.environ["ESM_CACHE_DIR"] = CACHE_DIR
 # ==============================
+# 模型加载函数（延迟加载 + 缓存）
 # ==============================
 @st.cache_resource
+def load_model_cached():
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    local_path = os.path.join(MODEL_DIR, "model.pt")
     if not os.path.exists(local_path):
+        st.warning("🔄 Model not found locally. Downloading from Hugging Face model repo...")
+        # ⚠️ 使用 Model Repo，而不是 Space Repo
         local_path = hf_hub_download(
+            repo_id="caokai1073/StriMap-model",  # 建议单独创建模型仓库
+            filename="model.pt",
+            cache_dir=MODEL_DIR
         )
     model, device = load_model(local_path, device=device)
     return model, device
 # ==============================
+# 上传文件（安全写入 /data/uploads）
 # ==============================
+uploaded_file = st.file_uploader("📤 Upload CSV or FASTA", type=["csv", "fasta"])
 if uploaded_file:
+    safe_name = uploaded_file.name.replace(" ", "_")
+    temp_path = os.path.join(UPLOAD_DIR, safe_name)
     with open(temp_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
     # ==============================
     # 文件解析
     # ==============================
+    if safe_name.endswith(".csv"):
         df = pd.read_csv(temp_path)
     else:
         seqs = []
         for rec in SeqIO.parse(temp_path, "fasta"):
             header = rec.id
             seq = str(rec.seq)
             if "|" in header:
                 hla, _ = header.split("|", 1)
             else:
     st.dataframe(df.head())
     # ==============================
+    # 模型预测（延迟加载）
     # ==============================
     if st.button("🚀 Run Prediction"):
+        with st.spinner("🔄 Loading model (this may take ~1 min first time)..."):
+            model, device = load_model_cached()
+        with st.spinner("Running inference..."):
             result_df = predict_from_df(df, model)
         st.success("✅ Prediction complete!")
             data=csv,
             file_name="hla_binding_predictions.csv",
             mime="text/csv",
+        )
+# ==============================
+# Debug / data check (optional)
+# ==============================
+if st.sidebar.button("📁 List /data files"):
+    files = []
+    for root, _, filenames in os.walk("/data"):
+        for f in filenames:
+            files.append(os.path.join(root, f))
+    st.sidebar.write(files)