Spaces:
Running
Running
Arnav Chavan
commited on
Commit
Β·
aa8b4d6
1
Parent(s):
2fcb72a
remove control panel
Browse files- app.py +34 -31
- src/leaderboard.py +2 -2
- src/panel.py +4 -4
app.py
CHANGED
|
@@ -33,19 +33,22 @@ with demo:
|
|
| 33 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
| 34 |
|
| 35 |
######################## CONTROL PANEL #######################
|
| 36 |
-
(
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
) = create_control_panel(
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
)
|
|
|
|
|
|
|
|
|
|
| 49 |
####################### HARDWARE SUBTABS #######################
|
| 50 |
with gr.Tabs(elem_classes="subtabs"):
|
| 51 |
open_llm_perf_df = get_llm_perf_df(
|
|
@@ -69,24 +72,24 @@ with demo:
|
|
| 69 |
# create_quant_krnl_plots(llm_perf_df)
|
| 70 |
# )
|
| 71 |
####################### CONTROL CALLBACK #######################
|
| 72 |
-
create_control_callback(
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
)
|
| 90 |
|
| 91 |
create_select_callback(
|
| 92 |
# inputs
|
|
|
|
| 33 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
| 34 |
|
| 35 |
######################## CONTROL PANEL #######################
|
| 36 |
+
# (
|
| 37 |
+
# filter_button,
|
| 38 |
+
# machine_value,
|
| 39 |
+
# backends_value,
|
| 40 |
+
# hardware_type_value,
|
| 41 |
+
# memory_slider,
|
| 42 |
+
# quantization_checkboxes,
|
| 43 |
+
# ) = create_control_panel(
|
| 44 |
+
# machine=config.machine,
|
| 45 |
+
# backends=config.backends,
|
| 46 |
+
# hardware_provider=config.hardware_provider,
|
| 47 |
+
# hardware_type=config.hardware_type,
|
| 48 |
+
# )
|
| 49 |
+
machine_value = gr.State(value=config.machine)
|
| 50 |
+
backends_value = gr.State(value=config.backends)
|
| 51 |
+
hardware_type_value = gr.State(value=config.hardware_type)
|
| 52 |
####################### HARDWARE SUBTABS #######################
|
| 53 |
with gr.Tabs(elem_classes="subtabs"):
|
| 54 |
open_llm_perf_df = get_llm_perf_df(
|
|
|
|
| 72 |
# create_quant_krnl_plots(llm_perf_df)
|
| 73 |
# )
|
| 74 |
####################### CONTROL CALLBACK #######################
|
| 75 |
+
# create_control_callback(
|
| 76 |
+
# filter_button,
|
| 77 |
+
# # inputs
|
| 78 |
+
# machine_value,
|
| 79 |
+
# backends_value,
|
| 80 |
+
# hardware_type_value,
|
| 81 |
+
# memory_slider,
|
| 82 |
+
# quantization_checkboxes,
|
| 83 |
+
# # interactive
|
| 84 |
+
# columns_checkboxes,
|
| 85 |
+
# search_bar,
|
| 86 |
+
# # outputs
|
| 87 |
+
# leaderboard_table,
|
| 88 |
+
# # attn_prefill_plot,
|
| 89 |
+
# # attn_decode_plot,
|
| 90 |
+
# # quant_krnl_prefill_plot,
|
| 91 |
+
# # quant_krnl_decode_plot,
|
| 92 |
+
# )
|
| 93 |
|
| 94 |
create_select_callback(
|
| 95 |
# inputs
|
src/leaderboard.py
CHANGED
|
@@ -4,7 +4,7 @@ from src.utils import model_hyperlink
|
|
| 4 |
|
| 5 |
LEADERBOARD_COLUMN_TO_DATATYPE = {
|
| 6 |
# open llm
|
| 7 |
-
"Model": "
|
| 8 |
"Quantization": "str",
|
| 9 |
# primary measurements
|
| 10 |
"Prefill (tokens/s)": "number",
|
|
@@ -35,7 +35,7 @@ def process_model(model_name):
|
|
| 35 |
def get_leaderboard_df(llm_perf_df):
|
| 36 |
df = llm_perf_df.copy()
|
| 37 |
# transform for leaderboard
|
| 38 |
-
df["Model"] = df["Model"].apply(process_model)
|
| 39 |
return df
|
| 40 |
|
| 41 |
|
|
|
|
| 4 |
|
| 5 |
LEADERBOARD_COLUMN_TO_DATATYPE = {
|
| 6 |
# open llm
|
| 7 |
+
"Model": "str",
|
| 8 |
"Quantization": "str",
|
| 9 |
# primary measurements
|
| 10 |
"Prefill (tokens/s)": "number",
|
|
|
|
| 35 |
def get_leaderboard_df(llm_perf_df):
|
| 36 |
df = llm_perf_df.copy()
|
| 37 |
# transform for leaderboard
|
| 38 |
+
# df["Model"] = df["Model"].apply(process_model)
|
| 39 |
return df
|
| 40 |
|
| 41 |
|
src/panel.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
from typing import List
|
| 2 |
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
|
| 5 |
from src.leaderboard import get_leaderboard_df
|
| 6 |
from src.llm_perf import get_llm_perf_df
|
| 7 |
-
|
| 8 |
# from attention_implementations import get_attn_decode_fig, get_attn_prefill_fig
|
| 9 |
# from custom_kernels import get_kernel_decode_fig, get_kernel_prefill_fig
|
| 10 |
|
|
@@ -21,7 +21,7 @@ def create_control_panel(
|
|
| 21 |
hardware_type_value = gr.State(value=hardware_type)
|
| 22 |
|
| 23 |
if hardware_provider == "ARM":
|
| 24 |
-
backends = ["
|
| 25 |
quantizations = ["Q8_0", "Q4_K_M", "Q4_0_4_4"]
|
| 26 |
else:
|
| 27 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
|
@@ -30,7 +30,7 @@ def create_control_panel(
|
|
| 30 |
with gr.Row():
|
| 31 |
with gr.Column(scale=2, variant="panel"):
|
| 32 |
memory_slider = gr.Slider(
|
| 33 |
-
label="Model Size (GB)
|
| 34 |
info="ποΈ Slide to maximum Model Size",
|
| 35 |
minimum=0,
|
| 36 |
maximum=16,
|
|
@@ -81,7 +81,7 @@ def filter_rows_fn(
|
|
| 81 |
filtered_llm_perf_df = llm_perf_df[
|
| 82 |
llm_perf_df["Model"].str.contains(search, case=False)
|
| 83 |
& llm_perf_df["Quantization"].isin(quantizations)
|
| 84 |
-
&
|
| 85 |
]
|
| 86 |
selected_filtered_llm_perf_df = select_columns_fn(
|
| 87 |
machine, backends, hardware_type, columns, search, filtered_llm_perf_df
|
|
|
|
| 1 |
from typing import List
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
|
| 6 |
from src.leaderboard import get_leaderboard_df
|
| 7 |
from src.llm_perf import get_llm_perf_df
|
|
|
|
| 8 |
# from attention_implementations import get_attn_decode_fig, get_attn_prefill_fig
|
| 9 |
# from custom_kernels import get_kernel_decode_fig, get_kernel_prefill_fig
|
| 10 |
|
|
|
|
| 21 |
hardware_type_value = gr.State(value=hardware_type)
|
| 22 |
|
| 23 |
if hardware_provider == "ARM":
|
| 24 |
+
backends = ["llama_cpp"]
|
| 25 |
quantizations = ["Q8_0", "Q4_K_M", "Q4_0_4_4"]
|
| 26 |
else:
|
| 27 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
|
|
|
| 30 |
with gr.Row():
|
| 31 |
with gr.Column(scale=2, variant="panel"):
|
| 32 |
memory_slider = gr.Slider(
|
| 33 |
+
label="Model Size (GB)",
|
| 34 |
info="ποΈ Slide to maximum Model Size",
|
| 35 |
minimum=0,
|
| 36 |
maximum=16,
|
|
|
|
| 81 |
filtered_llm_perf_df = llm_perf_df[
|
| 82 |
llm_perf_df["Model"].str.contains(search, case=False)
|
| 83 |
& llm_perf_df["Quantization"].isin(quantizations)
|
| 84 |
+
& llm_perf_df["Model Size (GB)"] <= memory
|
| 85 |
]
|
| 86 |
selected_filtered_llm_perf_df = select_columns_fn(
|
| 87 |
machine, backends, hardware_type, columns, search, filtered_llm_perf_df
|