aliabd HF staff commited on
Commit
23cdb96
·
verified ·
1 Parent(s): d5a0079

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,12 +1,12 @@
 
1
  ---
2
- title: Mini Leaderboard Main
3
- emoji: 📉
4
- colorFrom: blue
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.25.0
8
- app_file: app.py
9
  pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  ---
3
+ title: mini_leaderboard_main
4
+ emoji: 🔥
5
+ colorFrom: indigo
6
+ colorTo: indigo
7
  sdk: gradio
8
  sdk_version: 4.25.0
9
+ app_file: run.py
10
  pinned: false
11
+ hf_oauth: true
12
  ---
 
 
assets/__init__.py ADDED
File without changes
assets/custom_css.css ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Hides the final AutoEvalColumn */
2
+ #llm-benchmark-tab-table table td:last-child,
3
+ #llm-benchmark-tab-table table th:last-child {
4
+ display: none;
5
+ }
6
+
7
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
8
+ table td:first-child,
9
+ table th:first-child {
10
+ max-width: 400px;
11
+ overflow: auto;
12
+ white-space: nowrap;
13
+ }
14
+
15
+ /* Full width space */
16
+ .gradio-container {
17
+ max-width: 95%!important;
18
+ }
19
+
20
+ /* Text style and margins */
21
+ .markdown-text {
22
+ font-size: 16px !important;
23
+ }
24
+
25
+ #models-to-add-text {
26
+ font-size: 18px !important;
27
+ }
28
+
29
+ #citation-button span {
30
+ font-size: 16px !important;
31
+ }
32
+
33
+ #citation-button textarea {
34
+ font-size: 16px !important;
35
+ }
36
+
37
+ #citation-button > label > button {
38
+ margin: 6px;
39
+ transform: scale(1.3);
40
+ }
41
+
42
+ #search-bar-table-box > div:first-child {
43
+ background: none;
44
+ border: none;
45
+ }
46
+
47
+ #search-bar {
48
+ padding: 0px;
49
+ }
50
+
51
+ .tab-buttons button {
52
+ font-size: 20px;
53
+ }
54
+
55
+ /* Filters style */
56
+ #filter_type{
57
+ border: 0;
58
+ padding-left: 0;
59
+ padding-top: 0;
60
+ }
61
+ #filter_type label {
62
+ display: flex;
63
+ }
64
+ #filter_type label > span{
65
+ margin-top: var(--spacing-lg);
66
+ margin-right: 0.5em;
67
+ }
68
+ #filter_type label > .wrap{
69
+ width: 103px;
70
+ }
71
+ #filter_type label > .wrap .wrap-inner{
72
+ padding: 2px;
73
+ }
74
+ #filter_type label > .wrap .wrap-inner input{
75
+ width: 1px
76
+ }
77
+ #filter-columns-type{
78
+ border:0;
79
+ padding:0.5;
80
+ }
81
+ #filter-columns-size{
82
+ border:0;
83
+ padding:0.5;
84
+ }
85
+ #box-filter > .form{
86
+ border: 0
87
+ }
assets/leaderboard_data.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio-client @ git+https://github.com/gradio-app/gradio@efd95245081f5657b1d13f34038205fc8791c1f7#subdirectory=client/python
2
+ https://gradio-builds.s3.amazonaws.com/efd95245081f5657b1d13f34038205fc8791c1f7/gradio-4.25.0-py3-none-any.whl
run.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: mini_leaderboard"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('assets')\n", "!wget -q -O assets/__init__.py https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/__init__.py\n", "!wget -q -O assets/custom_css.css https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/custom_css.css\n", "!wget -q -O assets/leaderboard_data.json https://github.com/gradio-app/gradio/raw/main/demo/mini_leaderboard/assets/leaderboard_data.json"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import pandas as pd\n", "from pathlib import Path\n", "\n", "abs_path = Path(__file__).parent.absolute()\n", "\n", "df = pd.read_json(str(abs_path / \"assets/leaderboard_data.json\"))\n", "invisible_df = df.copy()\n", "\n", "\n", "COLS = [\n", " \"T\",\n", " \"Model\",\n", " \"Average \u2b06\ufe0f\",\n", " \"ARC\",\n", " \"HellaSwag\",\n", " \"MMLU\",\n", " \"TruthfulQA\",\n", " \"Winogrande\",\n", " \"GSM8K\",\n", " \"Type\",\n", " \"Architecture\",\n", " \"Precision\",\n", " \"Merged\",\n", " \"Hub License\",\n", " \"#Params (B)\",\n", " \"Hub \u2764\ufe0f\",\n", " \"Model sha\",\n", " \"model_name_for_query\",\n", "]\n", "ON_LOAD_COLS = [\n", " \"T\",\n", " \"Model\",\n", " \"Average \u2b06\ufe0f\",\n", " \"ARC\",\n", " \"HellaSwag\",\n", " \"MMLU\",\n", " \"TruthfulQA\",\n", " \"Winogrande\",\n", " \"GSM8K\",\n", " \"model_name_for_query\",\n", "]\n", "TYPES = [\n", " \"str\",\n", " \"markdown\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"number\",\n", " \"str\",\n", " \"str\",\n", " \"str\",\n", " \"str\",\n", " \"bool\",\n", " \"str\",\n", " \"number\",\n", " \"number\",\n", " \"bool\",\n", " \"str\",\n", " \"bool\",\n", " \"bool\",\n", " \"str\",\n", "]\n", "NUMERIC_INTERVALS = {\n", " \"?\": pd.Interval(-1, 0, closed=\"right\"),\n", " \"~1.5\": pd.Interval(0, 2, closed=\"right\"),\n", " \"~3\": pd.Interval(2, 4, closed=\"right\"),\n", " \"~7\": pd.Interval(4, 9, closed=\"right\"),\n", " \"~13\": pd.Interval(9, 20, closed=\"right\"),\n", " \"~35\": pd.Interval(20, 45, closed=\"right\"),\n", " \"~60\": pd.Interval(45, 70, closed=\"right\"),\n", " \"70+\": pd.Interval(70, 10000, closed=\"right\"),\n", "}\n", "MODEL_TYPE = [str(s) for s in df[\"T\"].unique()]\n", "Precision = [str(s) for s in df[\"Precision\"].unique()]\n", "\n", "\n", "# Searching and filtering\n", "def update_table(\n", " hidden_df: pd.DataFrame,\n", " columns: list,\n", " type_query: list,\n", " precision_query: str,\n", " size_query: list,\n", " query: str,\n", "):\n", " filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)\n", " filtered_df = filter_queries(query, filtered_df)\n", " df = select_columns(filtered_df, columns)\n", " return df\n", "\n", "\n", "def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:\n", " return df[(df[\"model_name_for_query\"].str.contains(query, case=False))]\n", "\n", "\n", "def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:\n", " # We use COLS to maintain sorting\n", " filtered_df = df[[c for c in COLS if c in df.columns and c in columns]]\n", " return filtered_df\n", "\n", "\n", "def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:\n", " final_df = []\n", " if query != \"\":\n", " queries = [q.strip() for q in query.split(\";\")]\n", " for _q in queries:\n", " _q = _q.strip()\n", " if _q != \"\":\n", " temp_filtered_df = search_table(filtered_df, _q)\n", " if len(temp_filtered_df) > 0:\n", " final_df.append(temp_filtered_df)\n", " if len(final_df) > 0:\n", " filtered_df = pd.concat(final_df)\n", " filtered_df = filtered_df.drop_duplicates(\n", " subset=[\"Model\", \"Precision\", \"Model sha\"]\n", " )\n", "\n", " return filtered_df\n", "\n", "\n", "def filter_models(\n", " df: pd.DataFrame,\n", " type_query: list,\n", " size_query: list,\n", " precision_query: list,\n", ") -> pd.DataFrame:\n", " # Show all models\n", " filtered_df = df\n", "\n", " type_emoji = [t[0] for t in type_query]\n", " filtered_df = filtered_df.loc[df[\"T\"].isin(type_emoji)]\n", " filtered_df = filtered_df.loc[df[\"Precision\"].isin(precision_query + [\"None\"])]\n", "\n", " numeric_interval = pd.IntervalIndex(\n", " sorted([NUMERIC_INTERVALS[s] for s in size_query])\n", " )\n", " params_column = pd.to_numeric(df[\"#Params (B)\"], errors=\"coerce\")\n", " mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))\n", " filtered_df = filtered_df.loc[mask]\n", "\n", " return filtered_df\n", "\n", "\n", "demo = gr.Blocks(css=str(abs_path / \"assets/leaderboard_data.json\"))\n", "with demo:\n", " gr.Markdown(\"\"\"Test Space of the LLM Leaderboard\"\"\", elem_classes=\"markdown-text\")\n", "\n", " with gr.Tabs(elem_classes=\"tab-buttons\") as tabs:\n", " with gr.TabItem(\"\ud83c\udfc5 LLM Benchmark\", elem_id=\"llm-benchmark-tab-table\", id=0):\n", " with gr.Row():\n", " with gr.Column():\n", " with gr.Row():\n", " search_bar = gr.Textbox(\n", " placeholder=\" \ud83d\udd0d Search for your model (separate multiple queries with `;`) and press ENTER...\",\n", " show_label=False,\n", " elem_id=\"search-bar\",\n", " )\n", " with gr.Row():\n", " shown_columns = gr.CheckboxGroup(\n", " choices=COLS,\n", " value=ON_LOAD_COLS,\n", " label=\"Select columns to show\",\n", " elem_id=\"column-select\",\n", " interactive=True,\n", " )\n", " with gr.Column(min_width=320):\n", " filter_columns_type = gr.CheckboxGroup(\n", " label=\"Model types\",\n", " choices=MODEL_TYPE,\n", " value=MODEL_TYPE,\n", " interactive=True,\n", " elem_id=\"filter-columns-type\",\n", " )\n", " filter_columns_precision = gr.CheckboxGroup(\n", " label=\"Precision\",\n", " choices=Precision,\n", " value=Precision,\n", " interactive=True,\n", " elem_id=\"filter-columns-precision\",\n", " )\n", " filter_columns_size = gr.CheckboxGroup(\n", " label=\"Model sizes (in billions of parameters)\",\n", " choices=list(NUMERIC_INTERVALS.keys()),\n", " value=list(NUMERIC_INTERVALS.keys()),\n", " interactive=True,\n", " elem_id=\"filter-columns-size\",\n", " )\n", "\n", " leaderboard_table = gr.components.Dataframe(\n", " value=df[ON_LOAD_COLS],\n", " headers=ON_LOAD_COLS,\n", " datatype=TYPES,\n", " elem_id=\"leaderboard-table\",\n", " interactive=False,\n", " visible=True,\n", " column_widths=[\"2%\", \"33%\"],\n", " )\n", "\n", " # Dummy leaderboard for handling the case when the user uses backspace key\n", " hidden_leaderboard_table_for_search = gr.components.Dataframe(\n", " value=invisible_df[COLS],\n", " headers=COLS,\n", " datatype=TYPES,\n", " visible=False,\n", " )\n", " search_bar.submit(\n", " update_table,\n", " [\n", " hidden_leaderboard_table_for_search,\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " search_bar,\n", " ],\n", " leaderboard_table,\n", " )\n", " for selector in [\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " ]:\n", " selector.change(\n", " update_table,\n", " [\n", " hidden_leaderboard_table_for_search,\n", " shown_columns,\n", " filter_columns_type,\n", " filter_columns_precision,\n", " filter_columns_size,\n", " search_bar,\n", " ],\n", " leaderboard_table,\n", " queue=True,\n", " )\n", "\n", "\n", "if __name__ == \"__main__\":\n", " demo.queue(default_concurrency_limit=40).launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
run.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pathlib import Path
4
+
5
+ abs_path = Path(__file__).parent.absolute()
6
+
7
+ df = pd.read_json(str(abs_path / "assets/leaderboard_data.json"))
8
+ invisible_df = df.copy()
9
+
10
+
11
+ COLS = [
12
+ "T",
13
+ "Model",
14
+ "Average ⬆️",
15
+ "ARC",
16
+ "HellaSwag",
17
+ "MMLU",
18
+ "TruthfulQA",
19
+ "Winogrande",
20
+ "GSM8K",
21
+ "Type",
22
+ "Architecture",
23
+ "Precision",
24
+ "Merged",
25
+ "Hub License",
26
+ "#Params (B)",
27
+ "Hub ❤️",
28
+ "Model sha",
29
+ "model_name_for_query",
30
+ ]
31
+ ON_LOAD_COLS = [
32
+ "T",
33
+ "Model",
34
+ "Average ⬆️",
35
+ "ARC",
36
+ "HellaSwag",
37
+ "MMLU",
38
+ "TruthfulQA",
39
+ "Winogrande",
40
+ "GSM8K",
41
+ "model_name_for_query",
42
+ ]
43
+ TYPES = [
44
+ "str",
45
+ "markdown",
46
+ "number",
47
+ "number",
48
+ "number",
49
+ "number",
50
+ "number",
51
+ "number",
52
+ "number",
53
+ "str",
54
+ "str",
55
+ "str",
56
+ "str",
57
+ "bool",
58
+ "str",
59
+ "number",
60
+ "number",
61
+ "bool",
62
+ "str",
63
+ "bool",
64
+ "bool",
65
+ "str",
66
+ ]
67
+ NUMERIC_INTERVALS = {
68
+ "?": pd.Interval(-1, 0, closed="right"),
69
+ "~1.5": pd.Interval(0, 2, closed="right"),
70
+ "~3": pd.Interval(2, 4, closed="right"),
71
+ "~7": pd.Interval(4, 9, closed="right"),
72
+ "~13": pd.Interval(9, 20, closed="right"),
73
+ "~35": pd.Interval(20, 45, closed="right"),
74
+ "~60": pd.Interval(45, 70, closed="right"),
75
+ "70+": pd.Interval(70, 10000, closed="right"),
76
+ }
77
+ MODEL_TYPE = [str(s) for s in df["T"].unique()]
78
+ Precision = [str(s) for s in df["Precision"].unique()]
79
+
80
+
81
+ # Searching and filtering
82
+ def update_table(
83
+ hidden_df: pd.DataFrame,
84
+ columns: list,
85
+ type_query: list,
86
+ precision_query: str,
87
+ size_query: list,
88
+ query: str,
89
+ ):
90
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
91
+ filtered_df = filter_queries(query, filtered_df)
92
+ df = select_columns(filtered_df, columns)
93
+ return df
94
+
95
+
96
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
97
+ return df[(df["model_name_for_query"].str.contains(query, case=False))]
98
+
99
+
100
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
101
+ # We use COLS to maintain sorting
102
+ filtered_df = df[[c for c in COLS if c in df.columns and c in columns]]
103
+ return filtered_df
104
+
105
+
106
+ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
107
+ final_df = []
108
+ if query != "":
109
+ queries = [q.strip() for q in query.split(";")]
110
+ for _q in queries:
111
+ _q = _q.strip()
112
+ if _q != "":
113
+ temp_filtered_df = search_table(filtered_df, _q)
114
+ if len(temp_filtered_df) > 0:
115
+ final_df.append(temp_filtered_df)
116
+ if len(final_df) > 0:
117
+ filtered_df = pd.concat(final_df)
118
+ filtered_df = filtered_df.drop_duplicates(
119
+ subset=["Model", "Precision", "Model sha"]
120
+ )
121
+
122
+ return filtered_df
123
+
124
+
125
+ def filter_models(
126
+ df: pd.DataFrame,
127
+ type_query: list,
128
+ size_query: list,
129
+ precision_query: list,
130
+ ) -> pd.DataFrame:
131
+ # Show all models
132
+ filtered_df = df
133
+
134
+ type_emoji = [t[0] for t in type_query]
135
+ filtered_df = filtered_df.loc[df["T"].isin(type_emoji)]
136
+ filtered_df = filtered_df.loc[df["Precision"].isin(precision_query + ["None"])]
137
+
138
+ numeric_interval = pd.IntervalIndex(
139
+ sorted([NUMERIC_INTERVALS[s] for s in size_query])
140
+ )
141
+ params_column = pd.to_numeric(df["#Params (B)"], errors="coerce")
142
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
143
+ filtered_df = filtered_df.loc[mask]
144
+
145
+ return filtered_df
146
+
147
+
148
+ demo = gr.Blocks(css=str(abs_path / "assets/leaderboard_data.json"))
149
+ with demo:
150
+ gr.Markdown("""Test Space of the LLM Leaderboard""", elem_classes="markdown-text")
151
+
152
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
153
+ with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
154
+ with gr.Row():
155
+ with gr.Column():
156
+ with gr.Row():
157
+ search_bar = gr.Textbox(
158
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
159
+ show_label=False,
160
+ elem_id="search-bar",
161
+ )
162
+ with gr.Row():
163
+ shown_columns = gr.CheckboxGroup(
164
+ choices=COLS,
165
+ value=ON_LOAD_COLS,
166
+ label="Select columns to show",
167
+ elem_id="column-select",
168
+ interactive=True,
169
+ )
170
+ with gr.Column(min_width=320):
171
+ filter_columns_type = gr.CheckboxGroup(
172
+ label="Model types",
173
+ choices=MODEL_TYPE,
174
+ value=MODEL_TYPE,
175
+ interactive=True,
176
+ elem_id="filter-columns-type",
177
+ )
178
+ filter_columns_precision = gr.CheckboxGroup(
179
+ label="Precision",
180
+ choices=Precision,
181
+ value=Precision,
182
+ interactive=True,
183
+ elem_id="filter-columns-precision",
184
+ )
185
+ filter_columns_size = gr.CheckboxGroup(
186
+ label="Model sizes (in billions of parameters)",
187
+ choices=list(NUMERIC_INTERVALS.keys()),
188
+ value=list(NUMERIC_INTERVALS.keys()),
189
+ interactive=True,
190
+ elem_id="filter-columns-size",
191
+ )
192
+
193
+ leaderboard_table = gr.components.Dataframe(
194
+ value=df[ON_LOAD_COLS],
195
+ headers=ON_LOAD_COLS,
196
+ datatype=TYPES,
197
+ elem_id="leaderboard-table",
198
+ interactive=False,
199
+ visible=True,
200
+ column_widths=["2%", "33%"],
201
+ )
202
+
203
+ # Dummy leaderboard for handling the case when the user uses backspace key
204
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
205
+ value=invisible_df[COLS],
206
+ headers=COLS,
207
+ datatype=TYPES,
208
+ visible=False,
209
+ )
210
+ search_bar.submit(
211
+ update_table,
212
+ [
213
+ hidden_leaderboard_table_for_search,
214
+ shown_columns,
215
+ filter_columns_type,
216
+ filter_columns_precision,
217
+ filter_columns_size,
218
+ search_bar,
219
+ ],
220
+ leaderboard_table,
221
+ )
222
+ for selector in [
223
+ shown_columns,
224
+ filter_columns_type,
225
+ filter_columns_precision,
226
+ filter_columns_size,
227
+ ]:
228
+ selector.change(
229
+ update_table,
230
+ [
231
+ hidden_leaderboard_table_for_search,
232
+ shown_columns,
233
+ filter_columns_type,
234
+ filter_columns_precision,
235
+ filter_columns_size,
236
+ search_bar,
237
+ ],
238
+ leaderboard_table,
239
+ queue=True,
240
+ )
241
+
242
+
243
+ if __name__ == "__main__":
244
+ demo.queue(default_concurrency_limit=40).launch()