eduardo-alvarez commited on
Commit
278fc7f
·
1 Parent(s): e56faab

correcting df load bug, adding chat functionality, and improved docs

Browse files
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .ipynb_checkpoints
2
  *.pkl
3
- dask-worker-space/
 
 
1
  .ipynb_checkpoints
2
  *.pkl
3
+ dask-worker-space/
4
+ app_test.py
__pycache__/app.cpython-38.pyc ADDED
Binary file (7.81 kB). View file
 
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import random
4
- import time
5
 
6
  from info.train_a_model import (
7
  LLM_BENCHMARKS_TEXT)
@@ -13,46 +13,84 @@ from info.programs import (
13
  PROGRAMS_TEXT)
14
  from info.citation import(
15
  CITATION_TEXT)
16
- from src.processing import filter_benchmarks_table, make_clickable
 
 
 
 
 
17
 
18
  demo = gr.Blocks()
19
 
20
  with demo:
21
 
22
  gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
23
- gr.Markdown("This leaderboard is designed to evaluate, score, and rank open-source large language \
24
- models that have been pre-trained or fine-tuned on Intel Hardware 🦾")
25
- gr.Markdown("Models submitted to the leaderboard are evaluated \
26
- on the Intel Developer Cloud ☁️")
 
 
 
 
 
 
27
 
28
- # TODO: Coming soon comparison tool
29
- #with gr.Accordion("🥊Large Language Model Boxing Ring 🥊", open=False):
30
- # with gr.Row():
31
- # chat_a = gr.Chatbot()
32
- # chat_b = gr.Chatbot()
33
- # msg = gr.Textbox()
34
- # gr.ClearButton([msg, chat_a])
35
- #
36
- # def respond(message, chat_history):
37
- # bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
38
- # chat_history.append((message, bot_message))
39
- # time.sleep(2)
40
- # return "", chat_history
41
- #
42
- # msg.submit(respond, inputs = [msg, chat_a],outputs = [msg, chat_a])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
44
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
45
- with gr.TabItem("🏆 LLM Benchmark", elem_id="llm-benchmark-table", id=0):
46
  with gr.Row():
47
  with gr.Column():
48
  filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
49
  label="Select Training Platform*",
50
  elem_id="compute_platforms",
51
  value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
52
- filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","GCP","Local"],
53
  label="Training Infrastructure*",
54
  elem_id="training_infra",
55
- value=["Intel Developer Cloud","AWS","Azure","GCP","Local"])
56
  filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
57
  label="Intel Program Affiliation",
58
  elem_id="program_affiliation",
@@ -63,10 +101,10 @@ with demo:
63
  label="Model Sizes (Billion of Parameters)",
64
  elem_id="parameter_size",
65
  value=[1,3,5,7,13,35,60,70,100])
66
- filter_precision = gr.CheckboxGroup(choices=["fp8","fp16","bf16","int8","4bit"],
67
  label="Model Precision",
68
  elem_id="precision",
69
- value=["fp8","fp16","bf16","int8","4bit"])
70
  filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
71
  label="Model Types",
72
  elem_id="model_types",
@@ -74,14 +112,21 @@ with demo:
74
 
75
  initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")
76
 
77
- gradio_df_display = gr.Dataframe()
78
-
79
  def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
80
  filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
81
  affiliation_selected=affiliation_selected, size_selected=size_selected,
82
  precision_selected=precision_selected, type_selected=type_selected)
83
  return filtered_df
84
 
 
 
 
 
 
 
 
 
 
85
  filter_hw.change(fn=update_df,
86
  inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
87
  outputs=[gradio_df_display])
@@ -114,8 +159,9 @@ with demo:
114
  gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
115
  with gr.Row():
116
  with gr.Column():
117
- model_name_textbox = gr.Textbox(label="Model name")
118
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
 
119
  model_type = gr.Dropdown(
120
  choices=["pretrained","fine-tuned","chat models","merges/moerges"],
121
  label="Model type",
@@ -125,10 +171,10 @@ with demo:
125
  )
126
 
127
  hw_type = gr.Dropdown(
128
- choices=["Gaudi","Xeon","GPU Max","Arc GPU"],
129
  label="Training Hardware",
130
  multiselect=False,
131
- value="Gaudi2",
132
  interactive=True,
133
  )
134
  terms = gr.Checkbox(
@@ -137,9 +183,11 @@ with demo:
137
  value=False,
138
  interactive=True,
139
  )
 
 
140
  with gr.Column():
141
  precision = gr.Dropdown(
142
- choices=["fp8","fp16","bf16","int8","4bit"],
143
  label="Precision",
144
  multiselect=False,
145
  value="fp16",
@@ -151,31 +199,37 @@ with demo:
151
  multiselect=False,
152
  value="Original",
153
  interactive=True,
 
 
154
  )
155
  training_infra = gr.Dropdown(
156
- choices=["IDC","AWS","Azure","GCP","Local"],
157
  label="Training Infrastructure",
158
  multiselect=False,
159
- value="IDC",
160
  interactive=True,
 
 
161
  )
162
  affiliation = gr.Dropdown(
163
  choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
164
  label="Affiliation with Intel",
165
  multiselect=False,
166
- value="Independent",
167
  interactive=True,
 
168
  )
169
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
170
 
171
- #submit_button = gr.Button("Submit Eval")
172
- #submission_result = gr.Markdown()
173
- gr.Markdown("Community Submissions Coming soon!")
174
 
175
  with gr.Accordion("📙 Citation", open=False):
176
  citation =gr.Textbox(value = CITATION_TEXT,
177
  lines=6,
178
  label="Use the following to cite this content")
179
 
 
 
 
180
 
181
- demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import requests
4
+
5
 
6
  from info.train_a_model import (
7
  LLM_BENCHMARKS_TEXT)
 
13
  PROGRAMS_TEXT)
14
  from info.citation import(
15
  CITATION_TEXT)
16
+ from info.validated_chat_models import(
17
+ VALIDATED_CHAT_MODELS)
18
+ from src.processing import filter_benchmarks_table
19
+
20
+ #inference_endpoint_url = os.environ['inference_endpoint_url']
21
+ #inference_concurrency_limit = os.environ['inference_concurrency_limit']
22
 
23
  demo = gr.Blocks()
24
 
25
  with demo:
26
 
27
  gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
28
+ gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs
29
+ that have been pre-trained or fine-tuned on Intel Hardware 🦾 To submit your model for evaluation
30
+ follow the instructions and complete the form in the "🏎️ Submit" tab. Models submitted to the leaderboard are evaluated
31
+ on the Intel Developer Cloud ☁️ The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
32
+ the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
33
+ gr.Markdown("""Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and
34
+ talk about everything from GenAI and HPC to Quantum Computing.""")
35
+ gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
36
+ team for generously sharing their code and best
37
+ practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")
38
 
39
+ with gr.Accordion("Chat with Top Models on the Leaderboard Here 💬 ", open=False):
40
+ # import pdb
41
+
42
+ chat_model_dropdown = gr.Dropdown(
43
+ choices=VALIDATED_CHAT_MODELS,
44
+ label="Select a leaderboard model to chat with. ",
45
+ multiselect=False,
46
+ value=VALIDATED_CHAT_MODELS[0],
47
+ interactive=True,
48
+ )
49
+
50
+ #chat_model_selection = chat_model_dropdown.value
51
+ chat_model_selection = 'Intel/neural-chat-7b-v1-1'
52
+
53
+ def call_api_and_stream_response(query, chat_model):
54
+ """
55
+ Call the API endpoint and yield characters as they are received.
56
+ This function simulates streaming by yielding characters one by one.
57
+ """
58
+ url = "http://localhost:5004/query-stream/"
59
+ params = {"query": query,"selected_model":chat_model}
60
+ with requests.get(url, json=params, stream=True) as r:
61
+ for chunk in r.iter_content(chunk_size=1):
62
+ if chunk:
63
+ yield chunk.decode()
64
+
65
+ def get_response(query, history):
66
+ """
67
+ Wrapper function to call the streaming API and compile the response.
68
+ """
69
+ response = ''
70
+
71
+ global chat_model_selection
72
+
73
+ for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
74
+ if char == '<':
75
+ break
76
+ response += char
77
+ yield response
78
+
79
+ gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, theme=gr.themes.Soft(), concurrency_limit=5).launch()
80
 
81
+
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
+ with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
84
  with gr.Row():
85
  with gr.Column():
86
  filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
87
  label="Select Training Platform*",
88
  elem_id="compute_platforms",
89
  value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
90
+ filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
91
  label="Training Infrastructure*",
92
  elem_id="training_infra",
93
+ value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
94
  filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
95
  label="Intel Program Affiliation",
96
  elem_id="program_affiliation",
 
101
  label="Model Sizes (Billion of Parameters)",
102
  elem_id="parameter_size",
103
  value=[1,3,5,7,13,35,60,70,100])
104
+ filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"],
105
  label="Model Precision",
106
  elem_id="precision",
107
+ value=["fp32","fp16","bf16","int8","fp8", "int4"])
108
  filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
109
  label="Model Types",
110
  elem_id="model_types",
 
112
 
113
  initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")
114
 
 
 
115
  def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
116
  filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
117
  affiliation_selected=affiliation_selected, size_selected=size_selected,
118
  precision_selected=precision_selected, type_selected=type_selected)
119
  return filtered_df
120
 
121
+ initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
122
+ ["Intel Developer Cloud","AWS","Azure","GCP","Local"],
123
+ ["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
124
+ [1,3,5,7,13,35,60,70,100],
125
+ ["fp8","fp16","bf16","int8","4bit"],
126
+ ["pretrained","fine-tuned","chat-models","merges/moerges"])
127
+
128
+ gradio_df_display = gr.Dataframe(value=initial_filtered_df)
129
+
130
  filter_hw.change(fn=update_df,
131
  inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
132
  outputs=[gradio_df_display])
 
159
  gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
160
  with gr.Row():
161
  with gr.Column():
162
+ model_name_textbox = gr.Textbox(label="Model name",
163
+ info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",)
164
+ revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main")
165
  model_type = gr.Dropdown(
166
  choices=["pretrained","fine-tuned","chat models","merges/moerges"],
167
  label="Model type",
 
171
  )
172
 
173
  hw_type = gr.Dropdown(
174
+ choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
175
  label="Training Hardware",
176
  multiselect=False,
177
+ value="Gaudi",
178
  interactive=True,
179
  )
180
  terms = gr.Checkbox(
 
183
  value=False,
184
  interactive=True,
185
  )
186
+ submit_button = gr.Button("🤗 Submit Eval 💻")
187
+ submission_result = gr.Markdown()
188
  with gr.Column():
189
  precision = gr.Dropdown(
190
+ choices=["fp32","fp16","bf16","int8","fp8", "int4"],
191
  label="Precision",
192
  multiselect=False,
193
  value="fp16",
 
199
  multiselect=False,
200
  value="Original",
201
  interactive=True,
202
+ info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have
203
+ LoRA Adapters or Delta Weights.""",
204
  )
205
  training_infra = gr.Dropdown(
206
+ choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
207
  label="Training Infrastructure",
208
  multiselect=False,
209
+ value="Intel Developer Cloud",
210
  interactive=True,
211
+ info = """ Select the infrastructure that the model was developed on.
212
+ Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
213
  )
214
  affiliation = gr.Dropdown(
215
  choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
216
  label="Affiliation with Intel",
217
  multiselect=False,
218
+ value="No Affiliation",
219
  interactive=True,
220
+ info = """ Select "No Affiliation" if not part of any Intel programs.""",
221
  )
222
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
223
 
224
+ # gr.Markdown("Community Submissions Coming soon!")
 
 
225
 
226
  with gr.Accordion("📙 Citation", open=False):
227
  citation =gr.Textbox(value = CITATION_TEXT,
228
  lines=6,
229
  label="Use the following to cite this content")
230
 
231
+ gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries.
232
+ *Other names and brands may be claimed as the property of others.
233
+ </p> </div>""")
234
 
235
+ demo.launch(share=False)
info/__pycache__/citation.cpython-38.pyc ADDED
Binary file (513 Bytes). View file
 
info/__pycache__/deployment.cpython-38.pyc ADDED
Binary file (6.67 kB). View file
 
info/__pycache__/programs.cpython-38.pyc ADDED
Binary file (2.55 kB). View file
 
info/__pycache__/submit.cpython-38.pyc ADDED
Binary file (2.91 kB). View file
 
info/__pycache__/train_a_model.cpython-38.pyc ADDED
Binary file (3.57 kB). View file
 
info/__pycache__/validated_chat_models.cpython-38.pyc ADDED
Binary file (287 Bytes). View file
 
info/citation.py CHANGED
@@ -1,8 +1,8 @@
1
  CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
2
- author = {Eduardo Alvarez},
3
  title = {Powered By Intel LLM Leaderboard},
4
  year = {2024},
5
  publisher = {Intel},
6
- howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_leaderboard}"
7
  }
8
  """
 
1
  CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
2
+ author = {Eduardo Alvarez and Jack Erickson and Benjamin Consolvo},
3
  title = {Powered By Intel LLM Leaderboard},
4
  year = {2024},
5
  publisher = {Intel},
6
+ howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard}"
7
  }
8
  """
info/deployment.py CHANGED
@@ -1,10 +1,97 @@
1
  DEPLOY_TEXT = f"""
2
 
3
- Having table full of powerful models is nice and call but at the end of the day, you have to be able to use
4
- them for something. Below you will find sample code to help you load models and perform inference.
5
-
6
-
7
- ## Inference with Gaudi 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
9
  The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
10
  (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
@@ -26,26 +113,10 @@ python run_generation.py \
26
  --prompt "Hello world" "How are you?"
27
 
28
  ```
 
29
 
30
- # Inference Intel Extension for Transformers
31
- Intel® Extension for Transformers is an innovative toolkit designed to accelerate GenAI/LLM
32
- everywhere with the optimal performance of Transformer-based models on various Intel platforms,
33
- including Intel Gaudi2, Intel CPU, and Intel GPU.
34
-
35
- ### INT4 Inference (CPU)
36
- ```python
37
- from transformers import AutoTokenizer
38
- from intel_extension_for_transformers.transformers import AutoModelForCausalLM
39
- model_name = "Intel/neural-chat-7b-v3-1"
40
- prompt = "When winter becomes spring, the flowers..."
41
-
42
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
43
- inputs = tokenizer(prompt, return_tensors="pt").input_ids
44
-
45
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
46
- outputs = model.generate(inputs)
47
 
48
- ```
49
  ### INT4 Inference (GPU)
50
  ```python
51
  import intel_extension_for_pytorch as ipex
@@ -65,19 +136,11 @@ model = ipex.optimize_transformers(model, inplace=True, dtype=torch.float16, woq
65
 
66
  output = model.generate(inputs)
67
  ```
 
68
 
69
- # Intel Extension for PyTorch
70
- Intel® Extension for PyTorch extends PyTorch with up-to-date features optimizations for an
71
- extra performance boost on Intel hardware. Optimizations take advantage of Intel® Advanced
72
- Vector Extensions 512 (Intel® AVX-512) Vector Neural Network Instructions (VNNI) and Intel®
73
- Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions
74
- (XMX) AI engines on Intel discrete GPUs. Moreover, Intel® Extension for PyTorch* provides easy
75
- GPU acceleration for Intel discrete GPUs through the PyTorch* xpu device.
76
 
77
- There are a few flavors of PyTorch that can be leveraged for inference. For detailed documentation,
78
- the visit https://intel.github.io/intel-extension-for-pytorch/#introduction
79
-
80
- ### IPEX with Optimum Intel (no quantization)
81
  Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
82
  `
83
  ```python
@@ -90,7 +153,7 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
90
  results = pipe("A fisherman at sea...")
91
  ```
92
 
93
- ### IPEX with Stock PyTorch with Mixed Precision
94
  ```python
95
  import torch
96
  import intel_extension_for_pytorch as ipex
@@ -106,7 +169,60 @@ with torch.inference_mode():
106
  model.generate()
107
  ```
108
 
109
- # OpenVINO Toolkit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  ```python
112
  from optimum.intel import OVModelForCausalLM
@@ -122,5 +238,11 @@ pipe("In the spring, beautiful flowers bloom...")
122
 
123
  ```
124
 
 
 
 
 
 
 
125
 
126
  """
 
1
  DEPLOY_TEXT = f"""
2
 
3
+ # 🚀 Deployment Tips
4
+
5
+ A collection of powerful models is valuable, but ultimately, you need to be able to use them effectively.
6
+ This tab is dedicated to providing guidance and code snippets for performing inference with leaderboard models on Intel platforms.
7
+
8
+ Below, you'll find a table of open-source software options for inference, along with the supported Intel Hardware Platforms.
9
+ A 🚀 indicates that inference with the associated software package is supported on the hardware. We hope this information
10
+ helps you choose the best option for your specific use case. Happy building!
11
+
12
+ <div style="display: flex; justify-content: center;">
13
+ <table border="1">
14
+ <tr>
15
+ <th>Inference Software</th>
16
+ <th>Gaudi</th>
17
+ <th>Xeon</th>
18
+ <th>GPU Max</th>
19
+ <th>Arc GPU</th>
20
+ <th>Core Ultra</th>
21
+ </tr>
22
+ <tr>
23
+ <td>Optimum Habana</td>
24
+ <td>🚀</td>
25
+ <td></td>
26
+ <td></td>
27
+ <td></td>
28
+ <td></td>
29
+ </tr>
30
+ <tr>
31
+ <td>Intel Extension for PyTorch</td>
32
+ <td></td>
33
+ <td>🚀</td>
34
+ <td>🚀</td>
35
+ <td>🚀</td>
36
+ <td></td>
37
+ </tr>
38
+ <tr>
39
+ <td>Intel Extension for Transformers</td>
40
+ <td></td>
41
+ <td>🚀</td>
42
+ <td>🚀</td>
43
+ <td>🚀</td>
44
+ <td></td>
45
+ </tr>
46
+ <tr>
47
+ <td>OpenVINO</td>
48
+ <td></td>
49
+ <td>🚀</td>
50
+ <td>🚀</td>
51
+ <td>🚀</td>
52
+ <td>🚀</td>
53
+ </tr>
54
+ <tr>
55
+ <td>BigDL</td>
56
+ <td></td>
57
+ <td>🚀</td>
58
+ <td>🚀</td>
59
+ <td>🚀</td>
60
+ <td>🚀</td>
61
+ </tr>
62
+ <tr>
63
+ <td>NPU Acceleration Library</td>
64
+ <td></td>
65
+ <td></td>
66
+ <td></td>
67
+ <td></td>
68
+ <td>🚀</td>
69
+ </tr>
70
+ </tr>
71
+ <tr>
72
+ <td>PyTorch</td>
73
+ <td>🚀</td>
74
+ <td>🚀</td>
75
+ <td>🚀</td>
76
+ <td>🚀</td>
77
+ <td>🚀</td>
78
+ </tr>
79
+ </tr>
80
+ <tr>
81
+ <td>Tensorflow</td>
82
+ <td>🚀</td>
83
+ <td>🚀</td>
84
+ <td>🚀</td>
85
+ <td>🚀</td>
86
+ <td>🚀</td>
87
+ </tr>
88
+ </table>
89
+ </div>
90
+
91
+
92
+ <hr>
93
+
94
+ # Intel® Gaudi Accelerators
95
  Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
96
  The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
97
  (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
 
113
  --prompt "Hello world" "How are you?"
114
 
115
  ```
116
+ <hr>
117
 
118
+ # Intel® Max Series GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
 
120
  ### INT4 Inference (GPU)
121
  ```python
122
  import intel_extension_for_pytorch as ipex
 
136
 
137
  output = model.generate(inputs)
138
  ```
139
+ <hr>
140
 
141
+ # Intel® Xeon CPUs
 
 
 
 
 
 
142
 
143
+ ### Intel Extension for PyTorch - Optimum Intel (no quantization)
 
 
 
144
  Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
145
  `
146
  ```python
 
153
  results = pipe("A fisherman at sea...")
154
  ```
155
 
156
+ ### Intel® Extension for PyTorch - Mixed Precision (fp32 and bf16)
157
  ```python
158
  import torch
159
  import intel_extension_for_pytorch as ipex
 
169
  model.generate()
170
  ```
171
 
172
+ ### Intel® Extension for Transformers - INT4 Inference (CPU)
173
+ ```python
174
+ from transformers import AutoTokenizer
175
+ from intel_extension_for_transformers.transformers import AutoModelForCausalLM
176
+ model_name = "Intel/neural-chat-7b-v3-1"
177
+ prompt = "When winter becomes spring, the flowers..."
178
+
179
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
180
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids
181
+
182
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
183
+ outputs = model.generate(inputs)
184
+
185
+
186
+ ```
187
+
188
+ <hr>
189
+
190
+ # Intel® Core Ultra (NPUs and iGPUs)
191
+
192
+
193
+ ### Intel® NPU Acceleration Library
194
+ ```python
195
+ from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
196
+ import intel_npu_acceleration_library
197
+ import torch
198
+
199
+ model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
200
+
201
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=True).eval()
202
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
203
+ tokenizer.pad_token_id = tokenizer.eos_token_id
204
+ streamer = TextStreamer(tokenizer, skip_special_tokens=True)
205
+
206
+ print("Compile model for the NPU")
207
+ model = intel_npu_acceleration_library.compile(model, dtype=torch.int8)
208
+
209
+ query = input("Ask something: ")
210
+ prefix = tokenizer(query, return_tensors="pt")["input_ids"]
211
+
212
+ generation_kwargs = dict(
213
+ input_ids=prefix,
214
+ streamer=streamer,
215
+ do_sample=True,
216
+ top_k=50,
217
+ top_p=0.9,
218
+ max_new_tokens=512,
219
+ )
220
+
221
+ print("Run inference")
222
+ _ = model.generate(**generation_kwargs)
223
+ ```
224
+
225
+ ### OpenVINO Toolking with Optimum Habana
226
 
227
  ```python
228
  from optimum.intel import OVModelForCausalLM
 
238
 
239
  ```
240
 
241
+ <hr>
242
+
243
+ # Intel ARC GPUs
244
+
245
+ Coming Soon!
246
+
247
 
248
  """
info/programs.py CHANGED
@@ -1,7 +1,11 @@
1
  PROGRAMS_TEXT= """
 
 
2
  Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
3
  and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
4
- these opportunities below:
 
 
5
 
6
  ## Intel Liftoff Program
7
  Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
@@ -12,6 +16,8 @@ like Gaudi, Max Series GPUs, and Xeon Processors.
12
 
13
  Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
14
 
 
 
15
  ## Intel Student Ambassador Program
16
  This program is focused on undergraduate and graduate students who are passionate about technology and
17
  working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
@@ -23,6 +29,7 @@ a nondisclosure agreement (NDA) and extended access to Intel® Developer Cloud.
23
 
24
  Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
25
 
 
26
 
27
  ## Intel Innovator Program
28
  This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration
 
1
  PROGRAMS_TEXT= """
2
+ # 👩‍💻 Developer Programs
3
+
4
  Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
5
  and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
6
+ these opportunities below:
7
+
8
+ <hr>
9
 
10
  ## Intel Liftoff Program
11
  Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
 
16
 
17
  Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
18
 
19
+ <hr>
20
+
21
  ## Intel Student Ambassador Program
22
  This program is focused on undergraduate and graduate students who are passionate about technology and
23
  working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
 
29
 
30
  Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
31
 
32
+ <hr>
33
 
34
  ## Intel Innovator Program
35
  This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration
info/submit.py CHANGED
@@ -1,7 +1,9 @@
1
 
2
  SUBMIT_TEXT = f"""
3
- # Evaluation Queue for the 🤗"Powered by Intel" LLM Leaderboard 💻
4
- Models added here will be queued for evaluation on the Intel Developer Cloud ☁️
 
 
5
  ## First steps before submitting a model
6
 
7
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
@@ -19,19 +21,19 @@ Note: if your model needs `use_remote_code=True`, we do not support this option
19
  It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
20
 
21
  ### 3) Make sure your model has an open license!
22
- This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
 
23
 
24
  ### 4) Fill up your model card
25
  We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
26
  Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
27
- we cannot accept your model as a valid submission.
 
28
 
29
  ### 5) Select the correct precision
30
  Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
31
 
32
  ## In case of model failure
33
- If your model is displayed in the `FAILED` category, its execution stopped.
34
- Make sure you have followed the above steps first.
35
- If everything is done, check you can launch the EleutherAIHarness on your model locally, using the command in the About tab under "Reproducibility" with all arguments specified (you can add `--limit` to limit the number of examples per task).
36
 
37
  """
 
1
 
2
  SUBMIT_TEXT = f"""
3
+ # 🏎️ Submit
4
+ Models added here will be queued for evaluation on the Intel Developer Cloud ☁️ Depending on the queue, your model may take up to 10 days to show up on the leaderboard.
5
+ We will work to create greater transperancy as our leaderboard community grows!
6
+
7
  ## First steps before submitting a model
8
 
9
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
 
21
  It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
22
 
23
  ### 3) Make sure your model has an open license!
24
+ This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗 A good example of an open source license is apache-2.0.
25
+ Typically model licenses that are allow for commercial and research use tend to be the most attractive to other developers in the ecosystem!
26
 
27
  ### 4) Fill up your model card
28
  We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
29
  Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
30
+ we cannot accept your model as a valid submission. Remember, only models trained on these processors are eligle to participate in evaluation:
31
+ Intel® Gaudi Accelerators, Intel® Xeon® Processors, Intel® Data Center GPU Max Series, Intel® ARC GPUs, and Intel® Core Ultra
32
 
33
  ### 5) Select the correct precision
34
  Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
35
 
36
  ## In case of model failure
37
+ If your model fails evaluation 😔, we will contact you by opening a new discussion in your model respository. Let's work together to get your model the love it deserves ❤️!
 
 
38
 
39
  """
info/train_a_model.py CHANGED
@@ -1,9 +1,9 @@
1
 
2
  LLM_BENCHMARKS_TEXT = f"""
3
- # Use the Resources Below to Start Training a Model Today
4
 
5
- Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade cpus, gpus, and ASICs.
6
- Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the "Intel Inside Leaderboard".
7
 
8
  ## Intel Developer Cloud - Quick Start
9
  The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
@@ -11,19 +11,19 @@ The Intel Developer Cloud is one of the best places to access free and paid comp
11
  1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
12
  2. Navigate to the "Training" module under the "Software" section in the left panel
13
  3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
14
- 4. Follow the instructions in the notebook to train your model using Intel Max Series 1100 GPUs
15
  5. Upload your model to the Hugging Face Model Hub
16
  6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
17
 
18
  ## Additional Training Code Samples
19
 
20
  Below you will find a list of additional resources for training models on different intel hardware platforms:
21
- - Gaudi Processors
22
  - [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
23
- - Xeon Processors
24
  - [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
25
  - [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
26
- - Max Series GPUs
27
  - [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
28
  ## Submitting your Model to the Hub
29
  Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.
 
1
 
2
  LLM_BENCHMARKS_TEXT = f"""
3
+ # 🧰 Train a Model
4
 
5
+ Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade CPUs, GPUs, and ASICs.
6
+ Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the Powered-by-Intel LLM Leaderboard.
7
 
8
  ## Intel Developer Cloud - Quick Start
9
  The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
 
11
  1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
12
  2. Navigate to the "Training" module under the "Software" section in the left panel
13
  3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
14
+ 4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100
15
  5. Upload your model to the Hugging Face Model Hub
16
  6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
17
 
18
  ## Additional Training Code Samples
19
 
20
  Below you will find a list of additional resources for training models on different intel hardware platforms:
21
+ - Intel® Gaudi® Accelerators
22
  - [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
23
+ - Intel® Xeon® Processors
24
  - [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
25
  - [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
26
+ - Intel® Data Center GPU Max Series
27
  - [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
28
  ## Submitting your Model to the Hub
29
  Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.
info/validated_chat_models.py ADDED
@@ -0,0 +1 @@
 
 
1
+ VALIDATED_CHAT_MODELS = ['Intel/neural-chat-7b-v1-1','More Coming Soon!']
src/__pycache__/processing.cpython-38.pyc ADDED
Binary file (779 Bytes). View file
 
src/leaderboard_filtered.csv DELETED
@@ -1,7 +0,0 @@
1
- ,Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
- 1,BetaWave,83.21,Arc GPU,fine-tuned,fp16,7,Local,70.44,92.32,78.67,85.55,90.0,Innovator
3
- 4,EpsilonWave,58.44,Xeon,fine-tuned,int8,3,AWS,91.22,82.1,60.55,80.11,77.89,Partner
4
- 6,EtaMatrix,69.78,Xeon,fine-tuned,int8,3,GCP,85.55,79.33,70.89,72.18,79.44,Liftoff
5
- 7,ThetaCore,88.12,Arc GPU,fine-tuned,int8,3,Local,67.33,85.78,88.55,86.9,83.11,Liftoff
6
- 14,BetaNeural,79.67,Gaudi 1,fine-tuned,4bit,7,AWS,85.44,77.22,83.1,75.45,71.33,Partner
7
- 15,TrackSpeed,88.12,Arc GPU,fine-tuned,4bit,7,Local,67.33,85.78,88.55,86.9,83.11,Student Ambassador
 
 
 
 
 
 
 
 
src/submit.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ # eval submission logic