Merve Noyan commited on
Commit
521af34
·
1 Parent(s): dc0d637
Files changed (2) hide show
  1. app.py +20 -14
  2. papers.py +9 -10
app.py CHANGED
@@ -17,8 +17,8 @@ api = HfApi()
17
  paper_list = PaperList()
18
 
19
  path = api.hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset")
20
- df = pd.read_csv(path)
21
- paper_id_to_index = {str(row["id"]): i for i, row in df.iterrows()}
22
 
23
 
24
  with gr.Blocks() as demo_search:
@@ -65,17 +65,23 @@ def load_data(paper_id: str) -> tuple[str, str, str, str, str, str, str, str, st
65
  index = paper_id_to_index[paper_id]
66
  except KeyError:
67
  raise gr.Error(f"Paper ID {paper_id} not found.")
 
 
68
 
69
- paper = raw_data[index]
70
  return (
71
  paper["id"],
72
  paper["title"],
73
- "\n".join(paper["authors"]),
74
  paper["arxiv_id"],
75
- "\n".join(paper["GitHub"]),
76
- "\n".join(paper["Space"]),
77
- "\n".join(paper["Model"]),
78
- "\n".join(paper["Dataset"]),
 
 
 
 
 
79
  )
80
 
81
 
@@ -105,12 +111,12 @@ def create_pr(
105
 
106
  data = copy.deepcopy(df)
107
  data[index]["title"] = title.strip()
108
- data[index]["authors"] = split_and_strip(authors)
109
  data[index]["arxiv_id"] = arxiv_id.strip()
110
- data[index]["GitHub"] = split_and_strip(github_links)
111
- data[index]["Space"] = split_and_strip(space_ids)
112
- data[index]["Model"] = split_and_strip(model_ids)
113
- data[index]["Dataset"] = split_and_strip(dataset_ids)
114
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
115
  data.to_csv(f)
116
 
@@ -201,4 +207,4 @@ with gr.Blocks(css="style.css") as demo:
201
  demo_edit.render()
202
 
203
  if __name__ == "__main__":
204
- demo.queue(api_open=False).launch(show_api=False)
 
17
  paper_list = PaperList()
18
 
19
  path = api.hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset")
20
+ actual_df = pd.read_csv(path)
21
+ paper_id_to_index = {str(row["id"]): i for i, row in actual_df.iterrows()}
22
 
23
 
24
  with gr.Blocks() as demo_search:
 
65
  index = paper_id_to_index[paper_id]
66
  except KeyError:
67
  raise gr.Error(f"Paper ID {paper_id} not found.")
68
+
69
+ paper = actual_df.iloc[index]
70
 
 
71
  return (
72
  paper["id"],
73
  paper["title"],
74
+ paper["authors"],
75
  paper["arxiv_id"],
76
+ "\n".join([PaperList.create_link("GitHub", url) for url in paper["GitHub"]] if paper["GitHub"]!="[]" else " "),
77
+ "\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
78
+ for repo_id in paper["Space"]
79
+ ] if paper["Space"] != "[]" else [" "]),
80
+ "\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in paper["Model"]]
81
+ if paper["Model"] != "[]" else [" "]),
82
+ "\n".join([PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}") for repo_id in paper["Dataset"]
83
+ ] if paper["Dataset"] != "[]" else [" "]
84
+ )
85
  )
86
 
87
 
 
111
 
112
  data = copy.deepcopy(df)
113
  data[index]["title"] = title.strip()
114
+ data[index]["authors"] = authors
115
  data[index]["arxiv_id"] = arxiv_id.strip()
116
+ data[index]["GitHub"] = github_links
117
+ data[index]["Space"] = space_ids
118
+ data[index]["Model"] = model_ids
119
+ data[index]["Dataset"] = dataset_ids
120
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
121
  data.to_csv(f)
122
 
 
207
  demo_edit.render()
208
 
209
  if __name__ == "__main__":
210
+ demo.queue(api_open=False).launch(show_api=False, debug=True)
papers.py CHANGED
@@ -3,6 +3,7 @@ import operator
3
  import datasets
4
  import pandas as pd
5
  from huggingface_hub import HfApi
 
6
 
7
  api = HfApi()
8
 
@@ -26,9 +27,8 @@ class PaperList:
26
  @staticmethod
27
  def get_df() -> pd.DataFrame:
28
  df = datasets.load_dataset("CVPR2024/CVPR2024-papers", split="train").to_pandas()
29
- df["authors_str"] = df["authors"].apply(lambda x: ", ".join(x))
30
  df["paper_page"] = df["arxiv_id"].apply(
31
- lambda arxiv_id: f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else ""
32
  )
33
  return df
34
 
@@ -43,23 +43,22 @@ class PaperList:
43
  new_row = {
44
  "ID": row["id"],
45
  "Title": row["title"],
46
- "Authors": ", ".join(row["authors"]),
47
- "Paper page": PaperList.create_link(row["arxiv_id"], row["paper_page"]),
48
- "GitHub": "\n".join([PaperList.create_link("GitHub", url) for url in row["GitHub"]]),
49
  "Spaces": "\n".join(
50
  [
51
  PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
52
  for repo_id in row["Space"]
53
- ]
54
- ),
55
  "Models": "\n".join(
56
  [PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in row["Model"]]
57
- ),
58
  "Datasets": "\n".join(
59
  [
60
  PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}")
61
  for repo_id in row["Dataset"]
62
- ]
63
  ),
64
  }
65
  rows.append(new_row)
@@ -80,5 +79,5 @@ class PaperList:
80
  ) -> pd.DataFrame:
81
  df = self.df_raw.copy()
82
  df = df[df["title"].str.contains(title_search_query, case=False)]
83
- df = df[df["authors_str"].str.contains(author_search_query, case=False)]
84
  return self.prettify(df)
 
3
  import datasets
4
  import pandas as pd
5
  from huggingface_hub import HfApi
6
+ from math import isnan
7
 
8
  api = HfApi()
9
 
 
27
  @staticmethod
28
  def get_df() -> pd.DataFrame:
29
  df = datasets.load_dataset("CVPR2024/CVPR2024-papers", split="train").to_pandas()
 
30
  df["paper_page"] = df["arxiv_id"].apply(
31
+ lambda arxiv_id: f"https://huggingface.co/papers/{arxiv_id}" if not isnan(arxiv_id) else ""
32
  )
33
  return df
34
 
 
43
  new_row = {
44
  "ID": row["id"],
45
  "Title": row["title"],
46
+ "Authors": row["authors"],
47
+ "Paper page": PaperList.create_link(row["arxiv_id"], row["paper_page"]) if not isnan(row["arxiv_id"]) else " ",
48
+ "GitHub": "\n".join([PaperList.create_link("GitHub", url) for url in row["GitHub"]] if row["GitHub"]!="[]" else " "),
49
  "Spaces": "\n".join(
50
  [
51
  PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
52
  for repo_id in row["Space"]
53
+ ] if row["Space"] != "[]" else [" "]),
 
54
  "Models": "\n".join(
55
  [PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in row["Model"]]
56
+ if row["Model"] != "[]" else [" "]) ,
57
  "Datasets": "\n".join(
58
  [
59
  PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}")
60
  for repo_id in row["Dataset"]
61
+ ] if row["Dataset"] != "[]" else [" "]
62
  ),
63
  }
64
  rows.append(new_row)
 
79
  ) -> pd.DataFrame:
80
  df = self.df_raw.copy()
81
  df = df[df["title"].str.contains(title_search_query, case=False)]
82
+ df = df[df["authors"].str.contains(author_search_query, case=False)]
83
  return self.prettify(df)