Merve Noyan
commited on
Commit
·
521af34
1
Parent(s):
dc0d637
fixes
Browse files
app.py
CHANGED
|
@@ -17,8 +17,8 @@ api = HfApi()
|
|
| 17 |
paper_list = PaperList()
|
| 18 |
|
| 19 |
path = api.hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset")
|
| 20 |
-
|
| 21 |
-
paper_id_to_index = {str(row["id"]): i for i, row in
|
| 22 |
|
| 23 |
|
| 24 |
with gr.Blocks() as demo_search:
|
|
@@ -65,17 +65,23 @@ def load_data(paper_id: str) -> tuple[str, str, str, str, str, str, str, str, st
|
|
| 65 |
index = paper_id_to_index[paper_id]
|
| 66 |
except KeyError:
|
| 67 |
raise gr.Error(f"Paper ID {paper_id} not found.")
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
paper = raw_data[index]
|
| 70 |
return (
|
| 71 |
paper["id"],
|
| 72 |
paper["title"],
|
| 73 |
-
|
| 74 |
paper["arxiv_id"],
|
| 75 |
-
|
| 76 |
-
"\n".join(
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
|
|
@@ -105,12 +111,12 @@ def create_pr(
|
|
| 105 |
|
| 106 |
data = copy.deepcopy(df)
|
| 107 |
data[index]["title"] = title.strip()
|
| 108 |
-
data[index]["authors"] =
|
| 109 |
data[index]["arxiv_id"] = arxiv_id.strip()
|
| 110 |
-
data[index]["GitHub"] =
|
| 111 |
-
data[index]["Space"] =
|
| 112 |
-
data[index]["Model"] =
|
| 113 |
-
data[index]["Dataset"] =
|
| 114 |
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
|
| 115 |
data.to_csv(f)
|
| 116 |
|
|
@@ -201,4 +207,4 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 201 |
demo_edit.render()
|
| 202 |
|
| 203 |
if __name__ == "__main__":
|
| 204 |
-
demo.queue(api_open=False).launch(show_api=False)
|
|
|
|
| 17 |
paper_list = PaperList()
|
| 18 |
|
| 19 |
path = api.hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset")
|
| 20 |
+
actual_df = pd.read_csv(path)
|
| 21 |
+
paper_id_to_index = {str(row["id"]): i for i, row in actual_df.iterrows()}
|
| 22 |
|
| 23 |
|
| 24 |
with gr.Blocks() as demo_search:
|
|
|
|
| 65 |
index = paper_id_to_index[paper_id]
|
| 66 |
except KeyError:
|
| 67 |
raise gr.Error(f"Paper ID {paper_id} not found.")
|
| 68 |
+
|
| 69 |
+
paper = actual_df.iloc[index]
|
| 70 |
|
|
|
|
| 71 |
return (
|
| 72 |
paper["id"],
|
| 73 |
paper["title"],
|
| 74 |
+
paper["authors"],
|
| 75 |
paper["arxiv_id"],
|
| 76 |
+
"\n".join([PaperList.create_link("GitHub", url) for url in paper["GitHub"]] if paper["GitHub"]!="[]" else " "),
|
| 77 |
+
"\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
|
| 78 |
+
for repo_id in paper["Space"]
|
| 79 |
+
] if paper["Space"] != "[]" else [" "]),
|
| 80 |
+
"\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in paper["Model"]]
|
| 81 |
+
if paper["Model"] != "[]" else [" "]),
|
| 82 |
+
"\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}") for repo_id in paper["Dataset"]
|
| 83 |
+
] if paper["Dataset"] != "[]" else [" "]
|
| 84 |
+
)
|
| 85 |
)
|
| 86 |
|
| 87 |
|
|
|
|
| 111 |
|
| 112 |
data = copy.deepcopy(df)
|
| 113 |
data[index]["title"] = title.strip()
|
| 114 |
+
data[index]["authors"] = authors
|
| 115 |
data[index]["arxiv_id"] = arxiv_id.strip()
|
| 116 |
+
data[index]["GitHub"] = github_links
|
| 117 |
+
data[index]["Space"] = space_ids
|
| 118 |
+
data[index]["Model"] = model_ids
|
| 119 |
+
data[index]["Dataset"] = dataset_ids
|
| 120 |
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
|
| 121 |
data.to_csv(f)
|
| 122 |
|
|
|
|
| 207 |
demo_edit.render()
|
| 208 |
|
| 209 |
if __name__ == "__main__":
|
| 210 |
+
demo.queue(api_open=False).launch(show_api=False, debug=True)
|
papers.py
CHANGED
|
@@ -3,6 +3,7 @@ import operator
|
|
| 3 |
import datasets
|
| 4 |
import pandas as pd
|
| 5 |
from huggingface_hub import HfApi
|
|
|
|
| 6 |
|
| 7 |
api = HfApi()
|
| 8 |
|
|
@@ -26,9 +27,8 @@ class PaperList:
|
|
| 26 |
@staticmethod
|
| 27 |
def get_df() -> pd.DataFrame:
|
| 28 |
df = datasets.load_dataset("CVPR2024/CVPR2024-papers", split="train").to_pandas()
|
| 29 |
-
df["authors_str"] = df["authors"].apply(lambda x: ", ".join(x))
|
| 30 |
df["paper_page"] = df["arxiv_id"].apply(
|
| 31 |
-
lambda arxiv_id: f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else ""
|
| 32 |
)
|
| 33 |
return df
|
| 34 |
|
|
@@ -43,23 +43,22 @@ class PaperList:
|
|
| 43 |
new_row = {
|
| 44 |
"ID": row["id"],
|
| 45 |
"Title": row["title"],
|
| 46 |
-
"Authors":
|
| 47 |
-
"Paper page": PaperList.create_link(row["arxiv_id"], row["paper_page"]),
|
| 48 |
-
"GitHub": "\n".join([PaperList.create_link("GitHub", url) for url in row["GitHub"]]),
|
| 49 |
"Spaces": "\n".join(
|
| 50 |
[
|
| 51 |
PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
|
| 52 |
for repo_id in row["Space"]
|
| 53 |
-
]
|
| 54 |
-
),
|
| 55 |
"Models": "\n".join(
|
| 56 |
[PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in row["Model"]]
|
| 57 |
-
|
| 58 |
"Datasets": "\n".join(
|
| 59 |
[
|
| 60 |
PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}")
|
| 61 |
for repo_id in row["Dataset"]
|
| 62 |
-
]
|
| 63 |
),
|
| 64 |
}
|
| 65 |
rows.append(new_row)
|
|
@@ -80,5 +79,5 @@ class PaperList:
|
|
| 80 |
) -> pd.DataFrame:
|
| 81 |
df = self.df_raw.copy()
|
| 82 |
df = df[df["title"].str.contains(title_search_query, case=False)]
|
| 83 |
-
df = df[df["
|
| 84 |
return self.prettify(df)
|
|
|
|
| 3 |
import datasets
|
| 4 |
import pandas as pd
|
| 5 |
from huggingface_hub import HfApi
|
| 6 |
+
from math import isnan
|
| 7 |
|
| 8 |
api = HfApi()
|
| 9 |
|
|
|
|
| 27 |
@staticmethod
|
| 28 |
def get_df() -> pd.DataFrame:
|
| 29 |
df = datasets.load_dataset("CVPR2024/CVPR2024-papers", split="train").to_pandas()
|
|
|
|
| 30 |
df["paper_page"] = df["arxiv_id"].apply(
|
| 31 |
+
lambda arxiv_id: f"https://huggingface.co/papers/{arxiv_id}" if not isnan(arxiv_id) else ""
|
| 32 |
)
|
| 33 |
return df
|
| 34 |
|
|
|
|
| 43 |
new_row = {
|
| 44 |
"ID": row["id"],
|
| 45 |
"Title": row["title"],
|
| 46 |
+
"Authors": row["authors"],
|
| 47 |
+
"Paper page": PaperList.create_link(row["arxiv_id"], row["paper_page"]) if not isnan(row["arxiv_id"]) else " ",
|
| 48 |
+
"GitHub": "\n".join([PaperList.create_link("GitHub", url) for url in row["GitHub"]] if row["GitHub"]!="[]" else " "),
|
| 49 |
"Spaces": "\n".join(
|
| 50 |
[
|
| 51 |
PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
|
| 52 |
for repo_id in row["Space"]
|
| 53 |
+
] if row["Space"] != "[]" else [" "]),
|
|
|
|
| 54 |
"Models": "\n".join(
|
| 55 |
[PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in row["Model"]]
|
| 56 |
+
if row["Model"] != "[]" else [" "]) ,
|
| 57 |
"Datasets": "\n".join(
|
| 58 |
[
|
| 59 |
PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}")
|
| 60 |
for repo_id in row["Dataset"]
|
| 61 |
+
] if row["Dataset"] != "[]" else [" "]
|
| 62 |
),
|
| 63 |
}
|
| 64 |
rows.append(new_row)
|
|
|
|
| 79 |
) -> pd.DataFrame:
|
| 80 |
df = self.df_raw.copy()
|
| 81 |
df = df[df["title"].str.contains(title_search_query, case=False)]
|
| 82 |
+
df = df[df["authors"].str.contains(author_search_query, case=False)]
|
| 83 |
return self.prettify(df)
|