Skip to content

Commit

Permalink
Update leaderboard with AutoCodeRover v2, Lingma Agent, Moatless + Cl…
Browse files Browse the repository at this point in the history
…aude 3.5 Sonnet
  • Loading branch information
john-b-yang committed Jun 24, 2024
1 parent f1bae1e commit da321b7
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 4 deletions.
2 changes: 0 additions & 2 deletions css/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ tr {
text-align: left;
position: sticky;
top: 0;
z-index: 3;
background-color: var(--even_line_color);
}

Expand Down Expand Up @@ -343,7 +342,6 @@ tr {
.sticky-header-content {
position: sticky;
top: 0;
z-index: 3;
padding: 6px 0px; /* Adjusted padding */
/* box-sizing: border-box; */
background-color: var(--even_line_color);
Expand Down
102 changes: 100 additions & 2 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,39 @@ <h2 class="text-title">Leaderboard (Lite)</h2>
<p class="model-type">
🥇

Alibaba Lingma Agent
</p>
</td>
<td><p class="number">33.00</p></td>
<td><p><span class="label-date">2024-06-22</span></p></td>
<td>
<p style="text-align: center;">

<a href="https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240622_Lingma_Agent/logs">🔗</a>

</p>
</td>
<td>
<p style="text-align: center;">
-
</p>
</td>
<td>
<p style="text-align: center;">

<a href="https://arxiv.org/abs/2406.01422">🔗</a>

</p>
</td>
<td><p style="text-align: center;"></p></td>
<td><p style="text-align: center;"></p></td>
</tr>

<tr>
<td>
<p class="model-type">
🥈

Factory Code Droid
</p>
</td>
Expand Down Expand Up @@ -548,7 +581,39 @@ <h2 class="text-title">Leaderboard (Lite)</h2>
<tr>
<td>
<p class="model-type">
🥈
🥉

AutoCodeRover (v20240620) + GPT 4o (2024-05-13)
</p>
</td>
<td><p class="number">30.67</p></td>
<td><p><span class="label-date">2024-06-21</span></p></td>
<td>
<p style="text-align: center;">

<a href="https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240621_autocoderover-v20240620/logs">🔗</a>

</p>
</td>
<td>
<p style="text-align: center;">
-
</p>
</td>
<td>
<p style="text-align: center;">

<a href="https://autocoderover.dev/">🔗</a>

</p>
</td>
<td><p style="text-align: center;"></p></td>
<td><p style="text-align: center;"></p></td>
</tr>

<tr>
<td>
<p class="model-type">

CodeR + GPT 4 (1106)
</p>
Expand Down Expand Up @@ -581,7 +646,6 @@ <h2 class="text-title">Leaderboard (Lite)</h2>
<tr>
<td>
<p class="model-type">
🥉

MASAI + GPT 4o (2024-05-13)
</p>
Expand Down Expand Up @@ -643,6 +707,40 @@ <h2 class="text-title">Leaderboard (Lite)</h2>
<td><p style="text-align: center;"></p></td>
</tr>

<tr>
<td>
<p class="model-type">

Moatless Tools + Claude 3.5 Sonnet
</p>
</td>
<td><p class="number">26.67</p></td>
<td><p><span class="label-date">2024-06-23</span></p></td>
<td>
<p style="text-align: center;">

<a href="https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240623_moatless_claude35sonnet/logs">🔗</a>

</p>
</td>
<td>
<p style="text-align: center;">

<a href="https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240623_moatless_claude35sonnet/trajs">🔗</a>

</p>
</td>
<td>
<p style="text-align: center;">

<a href="https://github.com/aorwall/moatless-tools">🔗</a>

</p>
</td>
<td><p style="text-align: center;"></p></td>
<td><p style="text-align: center;"></p></td>
</tr>

<tr>
<td>
<p class="model-type">
Expand Down
24 changes: 24 additions & 0 deletions template/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,27 @@
}
],
"lite_leaderboard": [
{
"name": "Alibaba Lingma Agent",
"resolved": "33.00",
"date": "2024-06-22",
"logs": "lite/20240622_Lingma_Agent/logs",
"site": "https://arxiv.org/abs/2406.01422"
},
{
"name": "Factory Code Droid",
"resolved": "31.33",
"date": "2024-06-17",
"logs": "lite/20240617_factory_code_droid/logs",
"site": "https://www.factory.ai/"
},
{
"name": "AutoCodeRover (v20240620) + GPT 4o (2024-05-13)",
"resolved": "30.67",
"date": "2024-06-21",
"logs": "lite/20240621_autocoderover-v20240620/logs",
"site": "https://autocoderover.dev/"
},
{
"name": "CodeR + GPT 4 (1106)",
"resolved": "28.33",
Expand All @@ -119,6 +133,16 @@
"logs": "lite/20240612_IBM_Research_Agent101/logs",
"site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240612_IBM_Research_Agent101"
},
{
"name": "Moatless Tools + Claude 3.5 Sonnet",
"resolved": "26.67",
"date": "2024-06-23",
"logs": "lite/20240623_moatless_claude35sonnet/logs",
"trajs": "lite/20240623_moatless_claude35sonnet/trajs",
"site": "https://github.com/aorwall/moatless-tools",
"verified": true,
"oss": true
},
{
"name": "Aider + GPT 4o & Claude 3 Opus",
"resolved": "26.33",
Expand Down
3 changes: 3 additions & 0 deletions viewer.html
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ <h1 style="font-size: 60px; padding-top: 0.4em">SWE-bench Analysis</h1>
<option value="lite,20240615_appmap-navie_gpt4o">Lite / (2024/06/15) AppMap Navie + GPT 4o</option>
<option value="lite,20240617_factory_code_droid">Lite / (2024/06/17) Factory Code Droid</option>
<option value="lite,20240617_moatless_gpt4o">Lite / (2024/06/17) Moatless + GPT 4o</option>
<option value="lite,20240621_autocoderover-v20240620">Lite / (2024/06/21) AutoCodeRover (v20240620)</option>
<option value="lite,20240622_Lingma_Agent">Lite / (2024/06/22) Alibaba Lingma Agent</option>
<option value="lite,20240623_moatless_claude35sonnet">Lite / (2024/06/23) Moatless + Claude 3.5 Sonnet</option>
<option value="test,20240402_sweagent_gpt4">Test / (2024/04/02) SWE-agent + GPT 4 (1106)</option>
<option value="test,20240509_amazon-q-developer-agent-20240430-dev">Test / (2024/05/09) Amazon Q Developer Agent</option>
<option value="test,20240615_appmap-navie_gpt4o">Test / (2024/06/15) AppMap Navie + GPT 4o</option>
Expand Down

0 comments on commit da321b7

Please sign in to comment.