Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +7 -7
- assets/__init__.py +0 -0
- assets/custom_css.css +87 -0
- assets/leaderboard_data.json +0 -0
- requirements.txt +2 -0
- run.ipynb +1 -0
- run.py +244 -0
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.25.0
|
8 |
-
app_file:
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
---
|
3 |
+
title: mini_leaderboard_main
|
4 |
+
emoji: 🔥
|
5 |
+
colorFrom: indigo
|
6 |
+
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
sdk_version: 4.25.0
|
9 |
+
app_file: run.py
|
10 |
pinned: false
|
11 |
+
hf_oauth: true
|
12 |
---
|
|
|
|
assets/__init__.py
ADDED
File without changes
|
assets/custom_css.css
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Hides the final AutoEvalColumn */
|
2 |
+
#llm-benchmark-tab-table table td:last-child,
|
3 |
+
#llm-benchmark-tab-table table th:last-child {
|
4 |
+
display: none;
|
5 |
+
}
|
6 |
+
|
7 |
+
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
8 |
+
table td:first-child,
|
9 |
+
table th:first-child {
|
10 |
+
max-width: 400px;
|
11 |
+
overflow: auto;
|
12 |
+
white-space: nowrap;
|
13 |
+
}
|
14 |
+
|
15 |
+
/* Full width space */
|
16 |
+
.gradio-container {
|
17 |
+
max-width: 95%!important;
|
18 |
+
}
|
19 |
+
|
20 |
+
/* Text style and margins */
|
21 |
+
.markdown-text {
|
22 |
+
font-size: 16px !important;
|
23 |
+
}
|
24 |
+
|
25 |
+
#models-to-add-text {
|
26 |
+
font-size: 18px !important;
|
27 |
+
}
|
28 |
+
|
29 |
+
#citation-button span {
|
30 |
+
font-size: 16px !important;
|
31 |
+
}
|
32 |
+
|
33 |
+
#citation-button textarea {
|
34 |
+
font-size: 16px !important;
|
35 |
+
}
|
36 |
+
|
37 |
+
#citation-button > label > button {
|
38 |
+
margin: 6px;
|
39 |
+
transform: scale(1.3);
|
40 |
+
}
|
41 |
+
|
42 |
+
#search-bar-table-box > div:first-child {
|
43 |
+
background: none;
|
44 |
+
border: none;
|
45 |
+
}
|
46 |
+
|
47 |
+
#search-bar {
|
48 |
+
padding: 0px;
|
49 |
+
}
|
50 |
+
|
51 |
+
.tab-buttons button {
|
52 |
+
font-size: 20px;
|
53 |
+
}
|
54 |
+
|
55 |
+
/* Filters style */
|
56 |
+
#filter_type{
|
57 |
+
border: 0;
|
58 |
+
padding-left: 0;
|
59 |
+
padding-top: 0;
|
60 |
+
}
|
61 |
+
#filter_type label {
|
62 |
+
display: flex;
|
63 |
+
}
|
64 |
+
#filter_type label > span{
|
65 |
+
margin-top: var(--spacing-lg);
|
66 |
+
margin-right: 0.5em;
|
67 |
+
}
|
68 |
+
#filter_type label > .wrap{
|
69 |
+
width: 103px;
|
70 |
+
}
|
71 |
+
#filter_type label > .wrap .wrap-inner{
|
72 |
+
padding: 2px;
|
73 |
+
}
|
74 |
+
#filter_type label > .wrap .wrap-inner input{
|
75 |
+
width: 1px
|
76 |
+
}
|
77 |
+
#filter-columns-type{
|
78 |
+
border:0;
|
79 |
+
padding:0.5;
|
80 |
+
}
|
81 |
+
#filter-columns-size{
|
82 |
+
border:0;
|
83 |
+
padding:0.5;
|
84 |
+
}
|
85 |
+
#box-filter > .form{
|
86 |
+
border: 0
|
87 |
+
}
|
assets/leaderboard_data.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio-client @ git+https://github.com/gradio-app/gradio@efd95245081f5657b1d13f34038205fc8791c1f7#subdirectory=client/python
|
2 |
+
https://gradio-builds.s3.amazonaws.com/efd95245081f5657b1d13f34038205fc8791c1f7/gradio-4.25.0-py3-none-any.whl
|
run.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: mini_leaderboard"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('assets')\n", "!wget -q -O assets/__init__.py https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/__init__.py\n", "!wget -q -O assets/custom_css.css https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/custom_css.css\n", "!wget -q -O assets/leaderboard_data.json https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/leaderboard_data.json"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import pandas as pd\n", "from pathlib import Path\n", "\n", "abs_path = Path(__file__).parent.absolute()\n", "\n", "df = pd.read_json(str(abs_path / \"assets/leaderboard_data.json\"))\n", "invisible_df = df.copy()\n", "\n", "\n", "COLS = [\n", " \"T\",\n", " \"Model\",\n", " \"Average \u2b06\ufe0f\",\n", " \"ARC\",\n", " \"HellaSwag\",\n", " \"MMLU\",\n", " \"TruthfulQA\",\n", " \"Winogrande\",\n", " \"GSM8K\",\n", " \"Type\",\n", " \"Architecture\",\n", " \"Precision\",\n", " \"Merged\",\n", " \"Hub License\",\n", " \"#Params (B)\",\n", " \"Hub \u2764\ufe0f\",\n", " \"Model sha\",\n", " \"model_name_for_query\",\n", "]\n", "ON_LOAD_COLS = [\n", " \"T\",\n", " \"Model\",\n", " \"Average \u2b06\ufe0f\",\n", " \"ARC\",\n", " \"HellaSwag\",\n", " \"MMLU\",\n", " \"TruthfulQA\",\n", " \"Winogrande\",\n", " \"GSM8K\",\n", " \"model_name_for_query\",\n", "]\n", "TYPES = [\n", " \"str\",\n", " \"markdown\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"str\",\n", " \"str\",\n", " \"str\",\n", " \"str\",\n", " \"bool\",\n", " \"str\",\n", " \"number\",\n", " \"number\",\n", " \"bool\",\n", " \"str\",\n", " \"bool\",\n", " \"bool\",\n", " \"str\",\n", "]\n", "NUMERIC_INTERVALS = {\n", " \"?\": pd.Interval(-1, 0, closed=\"right\"),\n", " \"~1.5\": pd.Interval(0, 2, closed=\"right\"),\n", " \"~3\": pd.Interval(2, 4, closed=\"right\"),\n", " \"~7\": pd.Interval(4, 9, closed=\"right\"),\n", " \"~13\": pd.Interval(9, 20, closed=\"right\"),\n", " \"~35\": pd.Interval(20, 45, closed=\"right\"),\n", " \"~60\": pd.Interval(45, 70, closed=\"right\"),\n", " \"70+\": pd.Interval(70, 10000, closed=\"right\"),\n", "}\n", "MODEL_TYPE = [str(s) for s in df[\"T\"].unique()]\n", "Precision = [str(s) for s in df[\"Precision\"].unique()]\n", "\n", "\n", "# Searching and filtering\n", "def update_table(\n", " hidden_df: pd.DataFrame,\n", " columns: list,\n", " type_query: list,\n", " precision_query: str,\n", " size_query: list,\n", " query: str,\n", "):\n", " filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)\n", " filtered_df = filter_queries(query, filtered_df)\n", " df = select_columns(filtered_df, columns)\n", " return df\n", "\n", "\n", "def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:\n", " return df[(df[\"model_name_for_query\"].str.contains(query, case=False))]\n", "\n", "\n", "def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:\n", " # We use COLS to maintain sorting\n", " filtered_df = df[[c for c in COLS if c in df.columns and c in columns]]\n", " return filtered_df\n", "\n", "\n", "def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:\n", " final_df = []\n", " if query != \"\":\n", " queries = [q.strip() for q in query.split(\";\")]\n", " for _q in queries:\n", " _q = _q.strip()\n", " if _q != \"\":\n", " temp_filtered_df = search_table(filtered_df, _q)\n", " if len(temp_filtered_df) > 0:\n", " final_df.append(temp_filtered_df)\n", " if len(final_df) > 0:\n", " filtered_df = pd.concat(final_df)\n", " filtered_df = filtered_df.drop_duplicates(\n", " subset=[\"Model\", \"Precision\", \"Model sha\"]\n", " )\n", "\n", " return filtered_df\n", "\n", "\n", "def filter_models(\n", " df: pd.DataFrame,\n", " type_query: list,\n", " size_query: list,\n", " precision_query: list,\n", ") -> pd.DataFrame:\n", " # Show all models\n", " filtered_df = df\n", "\n", " type_emoji = [t[0] for t in type_query]\n", " filtered_df = filtered_df.loc[df[\"T\"].isin(type_emoji)]\n", " filtered_df = filtered_df.loc[df[\"Precision\"].isin(precision_query + [\"None\"])]\n", "\n", " numeric_interval = pd.IntervalIndex(\n", " sorted([NUMERIC_INTERVALS[s] for s in size_query])\n", " )\n", " params_column = pd.to_numeric(df[\"#Params (B)\"], errors=\"coerce\")\n", " mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))\n", " filtered_df = filtered_df.loc[mask]\n", "\n", " return filtered_df\n", "\n", "\n", "demo = gr.Blocks(css=str(abs_path / \"assets/leaderboard_data.json\"))\n", "with demo:\n", " gr.Markdown(\"\"\"Test Space of the LLM Leaderboard\"\"\", elem_classes=\"markdown-text\")\n", "\n", " with gr.Tabs(elem_classes=\"tab-buttons\") as tabs:\n", " with gr.TabItem(\"\ud83c\udfc5 LLM Benchmark\", elem_id=\"llm-benchmark-tab-table\", id=0):\n", " with gr.Row():\n", " with gr.Column():\n", " with gr.Row():\n", " search_bar = gr.Textbox(\n", " placeholder=\" \ud83d\udd0d Search for your model (separate multiple queries with `;`) and press ENTER...\",\n", " show_label=False,\n", " elem_id=\"search-bar\",\n", " )\n", " with gr.Row():\n", " shown_columns = gr.CheckboxGroup(\n", " choices=COLS,\n", " value=ON_LOAD_COLS,\n", " label=\"Select columns to show\",\n", " elem_id=\"column-select\",\n", " interactive=True,\n", " )\n", " with gr.Column(min_width=320):\n", " filter_columns_type = gr.CheckboxGroup(\n", " label=\"Model types\",\n", " choices=MODEL_TYPE,\n", " value=MODEL_TYPE,\n", " interactive=True,\n", " elem_id=\"filter-columns-type\",\n", " )\n", " filter_columns_precision = gr.CheckboxGroup(\n", " label=\"Precision\",\n", " choices=Precision,\n", " value=Precision,\n", " interactive=True,\n", " elem_id=\"filter-columns-precision\",\n", " )\n", " filter_columns_size = gr.CheckboxGroup(\n", " label=\"Model sizes (in billions of parameters)\",\n", " choices=list(NUMERIC_INTERVALS.keys()),\n", " value=list(NUMERIC_INTERVALS.keys()),\n", " interactive=True,\n", " elem_id=\"filter-columns-size\",\n", " )\n", "\n", " leaderboard_table = gr.components.Dataframe(\n", " value=df[ON_LOAD_COLS],\n", " headers=ON_LOAD_COLS,\n", " datatype=TYPES,\n", " elem_id=\"leaderboard-table\",\n", " interactive=False,\n", " visible=True,\n", " column_widths=[\"2%\", \"33%\"],\n", " )\n", "\n", " # Dummy leaderboard for handling the case when the user uses backspace key\n", " hidden_leaderboard_table_for_search = gr.components.Dataframe(\n", " value=invisible_df[COLS],\n", " headers=COLS,\n", " datatype=TYPES,\n", " visible=False,\n", " )\n", " search_bar.submit(\n", " update_table,\n", " [\n", " hidden_leaderboard_table_for_search,\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " search_bar,\n", " ],\n", " leaderboard_table,\n", " )\n", " for selector in [\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " ]:\n", " selector.change(\n", " update_table,\n", " [\n", " hidden_leaderboard_table_for_search,\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " search_bar,\n", " ],\n", " leaderboard_table,\n", " queue=True,\n", " )\n", "\n", "\n", "if __name__ == \"__main__\":\n", " demo.queue(default_concurrency_limit=40).launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
run.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
abs_path = Path(__file__).parent.absolute()
|
6 |
+
|
7 |
+
df = pd.read_json(str(abs_path / "assets/leaderboard_data.json"))
|
8 |
+
invisible_df = df.copy()
|
9 |
+
|
10 |
+
|
11 |
+
COLS = [
|
12 |
+
"T",
|
13 |
+
"Model",
|
14 |
+
"Average ⬆️",
|
15 |
+
"ARC",
|
16 |
+
"HellaSwag",
|
17 |
+
"MMLU",
|
18 |
+
"TruthfulQA",
|
19 |
+
"Winogrande",
|
20 |
+
"GSM8K",
|
21 |
+
"Type",
|
22 |
+
"Architecture",
|
23 |
+
"Precision",
|
24 |
+
"Merged",
|
25 |
+
"Hub License",
|
26 |
+
"#Params (B)",
|
27 |
+
"Hub ❤️",
|
28 |
+
"Model sha",
|
29 |
+
"model_name_for_query",
|
30 |
+
]
|
31 |
+
ON_LOAD_COLS = [
|
32 |
+
"T",
|
33 |
+
"Model",
|
34 |
+
"Average ⬆️",
|
35 |
+
"ARC",
|
36 |
+
"HellaSwag",
|
37 |
+
"MMLU",
|
38 |
+
"TruthfulQA",
|
39 |
+
"Winogrande",
|
40 |
+
"GSM8K",
|
41 |
+
"model_name_for_query",
|
42 |
+
]
|
43 |
+
TYPES = [
|
44 |
+
"str",
|
45 |
+
"markdown",
|
46 |
+
"number",
|
47 |
+
"number",
|
48 |
+
"number",
|
49 |
+
"number",
|
50 |
+
"number",
|
51 |
+
"number",
|
52 |
+
"number",
|
53 |
+
"str",
|
54 |
+
"str",
|
55 |
+
"str",
|
56 |
+
"str",
|
57 |
+
"bool",
|
58 |
+
"str",
|
59 |
+
"number",
|
60 |
+
"number",
|
61 |
+
"bool",
|
62 |
+
"str",
|
63 |
+
"bool",
|
64 |
+
"bool",
|
65 |
+
"str",
|
66 |
+
]
|
67 |
+
NUMERIC_INTERVALS = {
|
68 |
+
"?": pd.Interval(-1, 0, closed="right"),
|
69 |
+
"~1.5": pd.Interval(0, 2, closed="right"),
|
70 |
+
"~3": pd.Interval(2, 4, closed="right"),
|
71 |
+
"~7": pd.Interval(4, 9, closed="right"),
|
72 |
+
"~13": pd.Interval(9, 20, closed="right"),
|
73 |
+
"~35": pd.Interval(20, 45, closed="right"),
|
74 |
+
"~60": pd.Interval(45, 70, closed="right"),
|
75 |
+
"70+": pd.Interval(70, 10000, closed="right"),
|
76 |
+
}
|
77 |
+
MODEL_TYPE = [str(s) for s in df["T"].unique()]
|
78 |
+
Precision = [str(s) for s in df["Precision"].unique()]
|
79 |
+
|
80 |
+
|
81 |
+
# Searching and filtering
|
82 |
+
def update_table(
|
83 |
+
hidden_df: pd.DataFrame,
|
84 |
+
columns: list,
|
85 |
+
type_query: list,
|
86 |
+
precision_query: str,
|
87 |
+
size_query: list,
|
88 |
+
query: str,
|
89 |
+
):
|
90 |
+
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
91 |
+
filtered_df = filter_queries(query, filtered_df)
|
92 |
+
df = select_columns(filtered_df, columns)
|
93 |
+
return df
|
94 |
+
|
95 |
+
|
96 |
+
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
97 |
+
return df[(df["model_name_for_query"].str.contains(query, case=False))]
|
98 |
+
|
99 |
+
|
100 |
+
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
101 |
+
# We use COLS to maintain sorting
|
102 |
+
filtered_df = df[[c for c in COLS if c in df.columns and c in columns]]
|
103 |
+
return filtered_df
|
104 |
+
|
105 |
+
|
106 |
+
def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
|
107 |
+
final_df = []
|
108 |
+
if query != "":
|
109 |
+
queries = [q.strip() for q in query.split(";")]
|
110 |
+
for _q in queries:
|
111 |
+
_q = _q.strip()
|
112 |
+
if _q != "":
|
113 |
+
temp_filtered_df = search_table(filtered_df, _q)
|
114 |
+
if len(temp_filtered_df) > 0:
|
115 |
+
final_df.append(temp_filtered_df)
|
116 |
+
if len(final_df) > 0:
|
117 |
+
filtered_df = pd.concat(final_df)
|
118 |
+
filtered_df = filtered_df.drop_duplicates(
|
119 |
+
subset=["Model", "Precision", "Model sha"]
|
120 |
+
)
|
121 |
+
|
122 |
+
return filtered_df
|
123 |
+
|
124 |
+
|
125 |
+
def filter_models(
|
126 |
+
df: pd.DataFrame,
|
127 |
+
type_query: list,
|
128 |
+
size_query: list,
|
129 |
+
precision_query: list,
|
130 |
+
) -> pd.DataFrame:
|
131 |
+
# Show all models
|
132 |
+
filtered_df = df
|
133 |
+
|
134 |
+
type_emoji = [t[0] for t in type_query]
|
135 |
+
filtered_df = filtered_df.loc[df["T"].isin(type_emoji)]
|
136 |
+
filtered_df = filtered_df.loc[df["Precision"].isin(precision_query + ["None"])]
|
137 |
+
|
138 |
+
numeric_interval = pd.IntervalIndex(
|
139 |
+
sorted([NUMERIC_INTERVALS[s] for s in size_query])
|
140 |
+
)
|
141 |
+
params_column = pd.to_numeric(df["#Params (B)"], errors="coerce")
|
142 |
+
mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
|
143 |
+
filtered_df = filtered_df.loc[mask]
|
144 |
+
|
145 |
+
return filtered_df
|
146 |
+
|
147 |
+
|
148 |
+
demo = gr.Blocks(css=str(abs_path / "assets/leaderboard_data.json"))
|
149 |
+
with demo:
|
150 |
+
gr.Markdown("""Test Space of the LLM Leaderboard""", elem_classes="markdown-text")
|
151 |
+
|
152 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
153 |
+
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
154 |
+
with gr.Row():
|
155 |
+
with gr.Column():
|
156 |
+
with gr.Row():
|
157 |
+
search_bar = gr.Textbox(
|
158 |
+
placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
|
159 |
+
show_label=False,
|
160 |
+
elem_id="search-bar",
|
161 |
+
)
|
162 |
+
with gr.Row():
|
163 |
+
shown_columns = gr.CheckboxGroup(
|
164 |
+
choices=COLS,
|
165 |
+
value=ON_LOAD_COLS,
|
166 |
+
label="Select columns to show",
|
167 |
+
elem_id="column-select",
|
168 |
+
interactive=True,
|
169 |
+
)
|
170 |
+
with gr.Column(min_width=320):
|
171 |
+
filter_columns_type = gr.CheckboxGroup(
|
172 |
+
label="Model types",
|
173 |
+
choices=MODEL_TYPE,
|
174 |
+
value=MODEL_TYPE,
|
175 |
+
interactive=True,
|
176 |
+
elem_id="filter-columns-type",
|
177 |
+
)
|
178 |
+
filter_columns_precision = gr.CheckboxGroup(
|
179 |
+
label="Precision",
|
180 |
+
choices=Precision,
|
181 |
+
value=Precision,
|
182 |
+
interactive=True,
|
183 |
+
elem_id="filter-columns-precision",
|
184 |
+
)
|
185 |
+
filter_columns_size = gr.CheckboxGroup(
|
186 |
+
label="Model sizes (in billions of parameters)",
|
187 |
+
choices=list(NUMERIC_INTERVALS.keys()),
|
188 |
+
value=list(NUMERIC_INTERVALS.keys()),
|
189 |
+
interactive=True,
|
190 |
+
elem_id="filter-columns-size",
|
191 |
+
)
|
192 |
+
|
193 |
+
leaderboard_table = gr.components.Dataframe(
|
194 |
+
value=df[ON_LOAD_COLS],
|
195 |
+
headers=ON_LOAD_COLS,
|
196 |
+
datatype=TYPES,
|
197 |
+
elem_id="leaderboard-table",
|
198 |
+
interactive=False,
|
199 |
+
visible=True,
|
200 |
+
column_widths=["2%", "33%"],
|
201 |
+
)
|
202 |
+
|
203 |
+
# Dummy leaderboard for handling the case when the user uses backspace key
|
204 |
+
hidden_leaderboard_table_for_search = gr.components.Dataframe(
|
205 |
+
value=invisible_df[COLS],
|
206 |
+
headers=COLS,
|
207 |
+
datatype=TYPES,
|
208 |
+
visible=False,
|
209 |
+
)
|
210 |
+
search_bar.submit(
|
211 |
+
update_table,
|
212 |
+
[
|
213 |
+
hidden_leaderboard_table_for_search,
|
214 |
+
shown_columns,
|
215 |
+
filter_columns_type,
|
216 |
+
filter_columns_precision,
|
217 |
+
filter_columns_size,
|
218 |
+
search_bar,
|
219 |
+
],
|
220 |
+
leaderboard_table,
|
221 |
+
)
|
222 |
+
for selector in [
|
223 |
+
shown_columns,
|
224 |
+
filter_columns_type,
|
225 |
+
filter_columns_precision,
|
226 |
+
filter_columns_size,
|
227 |
+
]:
|
228 |
+
selector.change(
|
229 |
+
update_table,
|
230 |
+
[
|
231 |
+
hidden_leaderboard_table_for_search,
|
232 |
+
shown_columns,
|
233 |
+
filter_columns_type,
|
234 |
+
filter_columns_precision,
|
235 |
+
filter_columns_size,
|
236 |
+
search_bar,
|
237 |
+
],
|
238 |
+
leaderboard_table,
|
239 |
+
queue=True,
|
240 |
+
)
|
241 |
+
|
242 |
+
|
243 |
+
if __name__ == "__main__":
|
244 |
+
demo.queue(default_concurrency_limit=40).launch()
|