From aee94a05841c439562bd6f50eee1e30ac1fb31fe Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 24 Nov 2024 07:14:09 -0800 Subject: [PATCH] copy --- aider/website/_data/quant.yml | 23 +++++++++++++++++++ .../website/_posts/2024-11-21-quantization.md | 19 ++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index 03ec2254d6d..6c1687c5403 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -274,3 +274,26 @@ versions: 0.64.2.dev seconds_per_case: 110.0 total_cost: 0.1763 + +- dirname: 2024-11-24-15-00-50--qwen25-32b-or-deepinfra + test_cases: 133 + model: "Deepinfra via OpenRouter: BF16" + edit_format: diff + commit_hash: c2f184f + pass_rate_1: 57.1 + pass_rate_2: 69.9 + percent_cases_well_formed: 89.5 + error_outputs: 35 + num_malformed_responses: 31 + num_with_malformed_responses: 14 + user_asks: 11 + lazy_comments: 0 + syntax_errors: 1 + indentation_errors: 1 + exhausted_context_windows: 4 + test_timeouts: 1 + command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct + date: 2024-11-24 + versions: 0.64.2.dev + seconds_per_case: 28.5 + total_cost: 0.1390 \ No newline at end of file diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index abbf2b7d43e..15056fe1c83 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -18,11 +18,6 @@ can impact code editing skill. Heavily quantized models are often used by cloud API providers and local model servers like Ollama or MLX. - - - The graph above compares different versions of the Qwen 2.5 Coder 32B Instruct model, served both locally and from cloud providers. @@ -34,11 +29,23 @@ served both locally and from cloud providers. - Other API providers. The best version of the model rivals GPT-4o, while the worst performer -is more like GPT-4 Turbo level. +is worse than GPT-3.5 Turbo. + +Hyperbolic via OpenRouter in particular is confusing. +Their direct API produces excellent results, but the performance +through OpenRouter is very poor. +It's unclear why this is happening to just this provider. +The other providers available through OpenRouter perform similarly +when their API is accessed directly. {: .note } This article is being updated as additional benchmark runs complete. + + +