From 307c23631ab3d2d5c8c38f8bd8fb277ea9a562c6 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 22 Nov 2024 17:20:38 -0800 Subject: [PATCH] copy --- aider/website/_data/quant.yml | 23 +++++++++++++++++++ .../website/_posts/2024-11-21-quantization.md | 7 ++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index c84755dca30..baaefa6585b 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -22,6 +22,29 @@ seconds_per_case: 22.5 total_cost: 0.0000 +- dirname: 2024-11-22-18-56-13--ollama-qwen2.5-coder:32b-instruct-fp16 + test_cases: 132 + model: ollama/qwen2.5-coder:32b-instruct-fp16 (64k context) + edit_format: diff + commit_hash: f06452c-dirty, 6a0a97c-dirty, 4e9ae16-dirty, 5506d0f-dirty + pass_rate_1: 58.3 + pass_rate_2: 71.4 + percent_cases_well_formed: 90.2 + error_outputs: 27 + num_malformed_responses: 26 + num_with_malformed_responses: 13 + user_asks: 2 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model ollama/qwen2.5-coder:32b-instruct-fp16 + date: 2024-11-22 + versions: 0.64.2.dev + seconds_per_case: 119.6 + total_cost: 0.0000 + - dirname: 2024-11-22-14-53-26--hyperbolic-qwen25coder32binstruct test_cases: 133 model: Hyperbolic BF16 diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index 3e4ba910c29..2d8391ac56c 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -18,17 +18,20 @@ can strongly impact code editing skill. Heavily quantized models are often used by cloud API providers and local model servers like Ollama. - + -The graph above compares 4 different versions of the Qwen 2.5 Coder 32B Instruct model, +The graph above compares different versions of the Qwen 2.5 Coder 32B Instruct model, served both locally and from cloud providers. - The [HuggingFace BF16 weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat). - Hyperbolic labs API for [qwen2-5-coder-32b-instruct](https://app.hyperbolic.xyz/models/qwen2-5-coder-32b-instruct), which is using BF16. This result is probably within the expected variance of the HF result. +- A [4bit quant for mlx](https://t.co/cwX3DYX35D). +This is the only model which was benchmarked using the "whole" [edit format](https://aider.chat/docs/more/edit-formats.html). +The rest were benchmarked with the much more practical and challenging "diff"edit format. - The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization. - Ollama locally serving [qwen2.5-coder:32b-instruct-q4_K_M)](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M), which has `Q4_K_M` quantization, with Ollama's default 2k context window.