Skip to content

Commit

Permalink
Leaderboard update: mistral-large-2402, gemini-1.0-pro and gemma 7-b;…
Browse files Browse the repository at this point in the history
… Update REST eval. (#237)

Update leaderboard data and wagon wheel data to reflect new models and
the updated rest API eval result.

Change leaderboard to include `mistral-large-2402`, `gemini-1.0-pro`,
and `gemma-7b`.
Change leaderboard evals to be consistent with #234 new REST eval.
  • Loading branch information
HuanzhiMao authored Mar 5, 2024
1 parent 4f96cb8 commit 0b872bc
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 120 deletions.
69 changes: 57 additions & 12 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ const data = {
datasets: [
{
label: 'GPT-4-0125',
data: [87.50, 82.18, 90.00, 90.00, 91.00, 54.12, 76.00, 70.00, 55.00],
data: [87.50, 82.18, 90.00, 90.00, 91.00, 67.06, 76.00, 70.00, 55.00],
fill: true,
backgroundColor: 'rgba(255, 206, 86, 0.1)',
borderColor: 'rgb(255, 206, 86)',
Expand All @@ -152,7 +152,7 @@ const data = {
pointHoverBorderColor: 'rgb(255, 206, 86)'
}, {
label: 'GPT-4-1106',
data: [88.75, 81.64, 92.00, 89.50, 92.00, 53.53, 72.00, 62.00, 50.00],
data: [88.75, 81.64, 92.00, 89.50, 92.00, 70.00, 72.00, 62.00, 50.00],
fill: true,
backgroundColor: 'rgba(75, 192, 192, 0.1)',
borderColor: 'rgb(75, 192, 192)',
Expand All @@ -163,7 +163,7 @@ const data = {
hidden: true
}, {
label: 'OpenFunctions-v2',
data: [71.67, 88.73, 79.50, 89.50, 78.00, 78.82, 76.00, 74.00, 60.00],
data: [71.67, 88.73, 79.50, 89.50, 78.00, 80.00, 76.00, 74.00, 60.00],
fill: true,
backgroundColor: 'rgba(153, 102, 255, 0.1)',
borderColor: 'rgb(153, 102, 255)',
Expand All @@ -173,7 +173,7 @@ const data = {
pointHoverBorderColor: 'rgb(153, 102, 255)'
}, {
label: 'GPT-3.5-Turbo',
data: [68.33, 81.27, 87.50, 88.00, 88.00, 74.12, 70.00, 74.00, 47.50],
data: [68.33, 81.27, 87.50, 88.00, 88.00, 80.00, 70.00, 74.00, 47.50],
fill: true,
backgroundColor: 'rgba(255, 159, 64, 0.1)',
borderColor: 'rgb(255, 159, 64)',
Expand All @@ -184,7 +184,7 @@ const data = {
hidden: true
}, {
label: 'Mistral-medium',
data: [90.00, 80.18, 71.00, 84.50, 68.00, 75.88, 62.00, 72.00, 47.50],
data: [90.00, 80.18, 71.00, 84.50, 68.00, 78.24, 62.00, 72.00, 47.50],
fill: true,
backgroundColor: 'rgba(54, 162, 235, 0.1)',
borderColor: 'rgb(54, 162, 235)',
Expand All @@ -195,7 +195,7 @@ const data = {
hidden: true
}, {
label: 'Claude-2.1',
data: [78.33, 85.64, 72.00, 83.00, 56.50, 61.18, 60.00, 48.00, 45.00],
data: [78.33, 85.64, 72.00, 83.00, 56.50, 63.53, 60.00, 48.00, 45.00],
fill: true,
backgroundColor: 'rgba(163, 73, 164, 0.1)',
borderColor: 'rgb(163, 73, 164)',
Expand All @@ -206,7 +206,7 @@ const data = {
hidden: true
}, {
label: 'Mistral-tiny',
data: [77.08, 59.27, 53.50, 59.50, 41.50, 58.24, 42.00, 64.00, 40.00],
data: [77.08, 59.27, 53.50, 59.50, 41.50, 63.53, 42.00, 64.00, 40.00],
fill: true,
backgroundColor: 'rgba(255, 105, 180, 0.1)',
borderColor: 'rgb(255, 105, 180)',
Expand All @@ -217,7 +217,7 @@ const data = {
hidden: true
}, {
label: 'Claude-instant',
data: [61.67, 68.73, 53.00, 59.00, 39.50, 51.76, 50.00, 52.00, 37.50],
data: [61.67, 68.73, 53.00, 59.00, 39.50, 56.47, 50.00, 52.00, 37.50],
fill: true,
backgroundColor: 'rgba(255, 165, 0, 0.1)',
borderColor: 'rgb(255, 165, 0)',
Expand All @@ -228,7 +228,7 @@ const data = {
hidden: true
}, {
label: 'Nexusflow-Raven-v2',
data: [0.00, 76.55, 39.50, 83.50, 34.00, 45.88, 68.00, 78.00, 45.00],
data: [0.00, 76.55, 39.50, 83.50, 34.00, 58.24, 68.00, 78.00, 45.00],
fill: true,
backgroundColor: 'rgba(60, 179, 113, 0.1)',
borderColor: 'rgb(60, 179, 113)',
Expand All @@ -239,7 +239,7 @@ const data = {
hidden: true
}, {
label: 'Mistral-small',
data: [89.58, 46.55, 48.50, 68.00, 58.00, 14.12, 40.00, 30.00, 37.50],
data: [89.58, 46.55, 48.50, 68.00, 58.00, 32.35, 40.00, 30.00, 37.50],
fill: true,
backgroundColor: 'rgba(0, 0, 255, 0.1)',
borderColor: 'rgb(0, 0, 255)',
Expand All @@ -250,7 +250,7 @@ const data = {
hidden: true
}, {
label: 'GPT-4-0613',
data: [87.08, 74.55, 4.00, 86.00, 0.00, 37.65, 0.00, 50.00, 0.00],
data: [87.08, 74.55, 4.00, 86.00, 0.00, 44.12, 0.00, 50.00, 0.00],
fill: true,
backgroundColor: 'rgba(128, 0, 0, 0.1)',
borderColor: 'rgb(128, 0, 0)',
Expand All @@ -261,7 +261,7 @@ const data = {
hidden: true
}, {
label: 'Deepseek-v1.5',
data: [66.25, 48.36, 35.00, 61.00, 43.50, 5.29, 0.00, 2.00, 7.50],
data: [66.25, 48.36, 35.00, 61.00, 43.50, 24.70, 0.00, 2.00, 7.50],
fill: true,
backgroundColor: 'rgba(255, 215, 0, 0.1)',
borderColor: 'rgb(255, 215, 0)',
Expand Down Expand Up @@ -292,8 +292,53 @@ const data = {
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(255, 99, 132)',
hidden: true
}, {
label: 'Gemini-1.0-Pro',
data: [77.50, 78.43, 89, 4.00, 0.00, 63.77, 62.00, 0.00, 0.00],
fill: true,
backgroundColor: 'rgba(218, 112, 214, 0.1)',
borderColor: 'rgb(218, 112, 214)',
pointBackgroundColor: 'rgb(218, 112, 214)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(218, 112, 214)',
hidden: true
}, {
label: 'Mistral-large-2402',
data: [84.58, 71.82, 90.50, 4.00, 0.00, 67.06, 66.00, 0.00, 5.00],
fill: true,
backgroundColor: 'rgba(65, 105, 225, 0.1)',
borderColor: 'rgb(65, 105, 225)',
pointBackgroundColor: 'rgb(65, 105, 225)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(65, 105, 225)',
hidden: true
}, {
label: 'Firefunction-v1',
data: [81.25, 73.19, 87.00, 4.00, 0.00, 61.76, 64.00, 0.00, 5.00],
fill: true,
backgroundColor: 'rgba(0, 255, 255, 0.1)',
borderColor: 'rgb(0, 255, 255)',
pointBackgroundColor: 'rgb(0, 255, 255)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(0, 255, 255)',
hidden: true
}, {
label: 'Gemma',
data: [0.42, 61.45, 60.00, 41.00, 32.50, 44.71, 46.00, 44.00, 25.50],
fill: true,
backgroundColor: 'rgba(85, 107, 47, 0.1)',
borderColor: 'rgb(85, 107, 47)',
pointBackgroundColor: 'rgb(85, 107, 47)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(85, 107, 47)',
hidden: true
},


]
};

Expand Down
Loading

0 comments on commit 0b872bc

Please sign in to comment.