import operator

import datasets
import pandas as pd
from huggingface_hub import HfApi
from math import isnan

api = HfApi()


class PaperList:
    COLUMN_INFO = [
        ["ID", "str"],
        ["Title", "str"],
        ["Authors", "str"],
        ["Paper page", "markdown"],
        ["GitHub", "markdown"],
        ["Spaces", "markdown"],
        ["Models", "markdown"],
        ["Datasets", "markdown"],
    ]

    def __init__(self):
        self.df_raw = self.get_df()
        self.df_prettified = self.prettify(self.df_raw)

    @staticmethod
    def get_df() -> pd.DataFrame:
        df = datasets.load_dataset("CVPR2024/CVPR2024-papers", split="train").to_pandas()
        df["paper_page"] = df["arxiv_id"].apply(
            lambda arxiv_id: f"https://huggingface.co/papers/{arxiv_id}" if not isnan(arxiv_id) else ""
        )
        return df

    @staticmethod
    def create_link(text: str, url: str) -> str:
        return f'<a href="{url}" target="_blank">{text}</a>'

    @staticmethod
    def prettify(df: pd.DataFrame) -> pd.DataFrame:
        rows = []
        for _, row in df.iterrows():
            new_row = {
                "ID": row["id"],
                "Title": row["title"],
                "Authors": row["authors"],
                "Paper page": PaperList.create_link(row["arxiv_id"], row["paper_page"]) if not isnan(row["arxiv_id"]) else  " ",
                "GitHub": "\n".join([PaperList.create_link("GitHub", url) for url in row["GitHub"]] if row["GitHub"]!="[]" else " "),
                "Spaces": "\n".join(
                    [
                        PaperList.create_link(repo_id, f"https://huggingface.co/spaces/{repo_id}")
                        for repo_id in row["Space"]
                    ] if row["Space"] != "[]" else [" "]),
                "Models": "\n".join(
                    [PaperList.create_link(repo_id, f"https://huggingface.co/{repo_id}") for repo_id in row["Model"]]
                    if row["Model"] != "[]" else [" "]) ,
                "Datasets": "\n".join(
                    [
                        PaperList.create_link(repo_id, f"https://huggingface.co/datasets/{repo_id}")
                        for repo_id in row["Dataset"]
                    ] if row["Dataset"] != "[]" else [" "]
                ),
            }
            rows.append(new_row)
        return pd.DataFrame(rows, columns=PaperList.get_column_names())

    @staticmethod
    def get_column_names():
        return list(map(operator.itemgetter(0), PaperList.COLUMN_INFO))

    def get_column_datatypes(self, column_names: list[str]) -> list[str]:
        mapping = dict(self.COLUMN_INFO)
        return [mapping[name] for name in column_names]

    def search(
        self,
        title_search_query: str,
        author_search_query: str,
    ) -> pd.DataFrame:
        df = self.df_raw.copy()
        df = df[df["title"].str.contains(title_search_query, case=False)]
        df = df[df["authors"].str.contains(author_search_query, case=False)]
        return self.prettify(df)