diff --git a/docusaurus.config.ts b/docusaurus.config.ts
index 709646b..43050b9 100644
--- a/docusaurus.config.ts
+++ b/docusaurus.config.ts
@@ -98,6 +98,10 @@ const config: Config = {
label: 'Craft vs Cruft',
href: 'https://www.youtube.com/channel/UC4nEbAo5xFsOZDk2v0RIGHA',
},
+ {
+ label: 'More',
+ href: '/resources',
+ },
],
},
{
@@ -108,13 +112,14 @@ const config: Config = {
// href: 'https://stackoverflow.com/questions/tagged/docusaurus',
// },
{
- label: 'nopilot.dev Discord',
+ label: 'Nopilot Discord',
href: 'https://discord.gg/k3hzFm5ykA',
},
{
- label: 'Resources',
- href: '/resources',
+ label: 'Nopilot YouTube',
+ href: 'https://www.youtube.com/@nopilot-dev',
},
+
// {
// label: 'Twitter',
// href: 'https://twitter.com/docusaurus',
diff --git a/src/components/HomepageFeatures/index.tsx b/src/components/HomepageFeatures/index.tsx
index 5459085..6f9c8b8 100644
--- a/src/components/HomepageFeatures/index.tsx
+++ b/src/components/HomepageFeatures/index.tsx
@@ -128,11 +128,14 @@ export default function HomepageFeatures(): JSX.Element {
Updates
+
+
-
-
-
diff --git a/src/pages/leaderboards.mdx b/src/pages/leaderboards.mdx
index 2a4655e..6b4f6ad 100644
--- a/src/pages/leaderboards.mdx
+++ b/src/pages/leaderboards.mdx
@@ -20,6 +20,30 @@ ML researcher [theblackcat102](https://github.com/theblackcat102) [reports](http
Paul Gauthier [points out](https://github.com/princeton-nlp/SWE-bench/issues/72) that some SWE-bench cases appear to be underspecified and effectively impossible to solve because the tests rely on implementation detail. It's unclear what the maximum possible score is.
+## Aider Leaderboards
+
+The coding agent Aider maintains a [leaderboard](https://aider.chat/docs/leaderboards) of model performance within its key subtasks.
+
+### Code Editing
+
+- openai/gpt-4o
+- claude-3-opus
+- gpt-4 (0613)
+- gpt-4-turbo (2024-04-09)
+- deepseek-chat v2 (Open Weight)
+- gpt-3.5-turbo
+- gemini-1.5-pro
+- claude-3-sonnet
+- deepseek-coder (Open Weight)
+
+### Code refactoring
+
+- claude-3-opus
+- openai/gpt-4o
+- gpt-4 (1106-preview)
+- gemini-1.5-pro
+- gpt-4-turbo (2024-04-09)
+
## LiveCodeBench
[LiveCodeBench](https://livecodebench.github.io/leaderboard.html): "Holistic and Contamination Free Evaluation of Large Language Models for Code"
@@ -31,10 +55,15 @@ Tests the strength of models across different coding sub-tasks.
* Test Output Prediction
* Code Execution
-*Last checked: 2024-04-10*
-* Proprietary Leaders: GPT-4-Turbo-2024-04-09, Claude-3-Opus
-* Open Weight Leaders: [WizardCoder-33B-V1.1](https://huggingface.co/WizardLM/WizardCoder-33B-V1.1), [deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct), [CodeLlama-34b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf)
+The below listing of standout models across subtasks is subjective.
+*Last checked: 2024-05-14*
+* Proprietary Leaders: GPT-4o, GPT-4-Turbo, Claude-3-Opus
+* Open Weight Leaders:
+ * [LLama3-70b-Ins](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)
+ * [WizardCoder-33B-V1.1](https://huggingface.co/WizardLM/WizardCoder-33B-V1.1)
+ * [deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct)
+ * [Phind-34B-V2](https://huggingface.co/Phind/Phind-CodeLlama-34B-v2)
## Other notable benchmarks
diff --git a/src/pages/resources.md b/src/pages/resources.md
index 8a42a1c..d3bc459 100644
--- a/src/pages/resources.md
+++ b/src/pages/resources.md
@@ -6,25 +6,31 @@ title: Resources
## Community
-* [Can we beat Devin? Discord](https://discord.gg/canwebeatdevin): shared space with several teams
+* [nopilot.dev Discord](https://discord.gg/k3hzFm5ykA) - Discussion about the ecosystem
* [OpenDevin Discord](https://discord.gg/mBuDGRzzES)
-* [nopilot.dev Discord](https://discord.gg/k3hzFm5ykA): discussion about this hub
+* [SWE-agent Discord](https://discord.gg/AVEFbBn2rH)
+* OpenDevAI Discord
-## Autonomous Coders (WebUX)
+## Videos
+* [nopilot.dev YouTube Channel](https://www.youtube.com/@nopilot-dev)
+* [Playlist on Autonomous DevTools](https://www.youtube.com/playlist?list=PLUBjHzmgsFNf_9LrJlk2t0n7pGiOLVqoX)
+
+## Coding Agents (WebUX)
-* Devin by Cognition
* [OpenDevin](https://github.com/OpenDevin/OpenDevin)
+* Devin by Cognition
* [Devika](https://github.com/stitionai/devika)
* [Anterion](https://github.com/MiscellaneousStuff/anterion): UX wrapping SWE-agent
-## Autonomous Coders (Command-line)
+## Coding Agents (Backend)
* [AutoCodeRover](https://github.com/nus-apr/auto-code-rover): from NUS-apr, highest score on SWE-bench lite
* [SWE-agent](https://swe-agent.com) from Princeton NLP, first Open Source agent to break 10% SWE-bench
+* [Sweep](https://sweep.dev): Turn bugs into pull requests
[Longer list](https://github.com/e2b-dev/awesome-ai-agents) by E2B.
## Eval Tools
* [SWE-bench](https://www.swebench.com/)
* [moatless-tools](https://github.com/aorwall/moatless-tools)
-* [SWE-bench-util](https://github.com/raymyers/swe-bench-util)
+