From aee94a05841c439562bd6f50eee1e30ac1fb31fe Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Sun, 24 Nov 2024 07:14:09 -0800
Subject: [PATCH] copy

---
 aider/website/_data/quant.yml                 | 23 +++++++++++++++++++
 .../website/_posts/2024-11-21-quantization.md | 19 ++++++++++-----
 2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml
index 03ec2254d6d..6c1687c5403 100644
--- a/aider/website/_data/quant.yml
+++ b/aider/website/_data/quant.yml
@@ -274,3 +274,26 @@
   versions: 0.64.2.dev
   seconds_per_case: 110.0
   total_cost: 0.1763
+
+- dirname: 2024-11-24-15-00-50--qwen25-32b-or-deepinfra
+  test_cases: 133
+  model: "Deepinfra via OpenRouter: BF16"
+  edit_format: diff
+  commit_hash: c2f184f
+  pass_rate_1: 57.1
+  pass_rate_2: 69.9
+  percent_cases_well_formed: 89.5
+  error_outputs: 35
+  num_malformed_responses: 31
+  num_with_malformed_responses: 14
+  user_asks: 11
+  lazy_comments: 0
+  syntax_errors: 1
+  indentation_errors: 1
+  exhausted_context_windows: 4
+  test_timeouts: 1
+  command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct
+  date: 2024-11-24
+  versions: 0.64.2.dev
+  seconds_per_case: 28.5
+  total_cost: 0.1390
\ No newline at end of file
diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md
index abbf2b7d43e..15056fe1c83 100644
--- a/aider/website/_posts/2024-11-21-quantization.md
+++ b/aider/website/_posts/2024-11-21-quantization.md
@@ -18,11 +18,6 @@ can impact code editing skill.
 Heavily quantized models are often used by cloud API providers
 and local model servers like Ollama or MLX.
 
-<canvas id="quantChart" width="800" height="500" style="margin: 20px 0"></canvas>
-<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
-<script>
-{% include quant-chart.js %}
-</script>
 
 The graph above compares different versions of the Qwen 2.5 Coder 32B Instruct model,
 served both locally and from cloud providers.
@@ -34,11 +29,23 @@ served both locally and from cloud providers.
 - Other API providers.
 
 The best version of the model rivals GPT-4o, while the worst performer
-is more like GPT-4 Turbo level.
+is worse than GPT-3.5 Turbo.
+
+Hyperbolic via OpenRouter in particular is confusing.
+Their direct API produces excellent results, but the performance
+through OpenRouter is very poor.
+It's unclear why this is happening to just this provider.
+The other providers available through OpenRouter perform similarly
+when their API is accessed directly.
 
 {: .note }
 This article is being updated as additional benchmark runs complete.
 
+<canvas id="quantChart" width="800" height="600" style="margin: 20px 0"></canvas>
+<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+<script>
+{% include quant-chart.js %}
+</script>
 
 <input type="text" id="quantSearchInput" placeholder="Search..." style="width: 100%; max-width: 800px; margin: 10px auto; padding: 8px; display: block; border: 1px solid #ddd; border-radius: 4px;">