or-bench-leaderboard / data /or-bench.html
Justin Cui
initial commit.
f98e8dd
<table id="or-bench-table" class="datatable" style="width: 100%"><thead>
<tr>
<th align="right" class="model-header">model</th>
<th align="right" class="number-header">over refusal(%)</th>
<th align="right" class="number-header">toxic acceptance(%)</th>
<th align="right" class="number-header">average</th>
</tr></thead>
<tbody>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Claude-2.1</span>
</div></td>
<td align="right">99.8</td>
<td align="right">0</td>
<td align="right">49.9</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Claude-3-haiku</span>
</div></td>
<td align="right">96.3</td>
<td align="right">0.3</td>
<td align="right">48.3</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Claude-3-sonnet</span>
</div></td>
<td align="right">94.5</td>
<td align="right">0.3</td>
<td align="right">47.4</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Claude-3-opus</span>
</div></td>
<td align="right">91.0</td>
<td align="right">1.9</td>
<td align="right">46.5</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Claude-3.5-sonnet</span>
</div></td>
<td align="right">43.8</td>
<td align="right">3.4</td>
<td align="right">23.6</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemma-7b</span>
</div></td>
<td align="right">26.3</td>
<td align="right">14.5</td>
<td align="right">20.4</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemma-2-9b</span>
</div></td>
<td align="right">79.9</td>
<td align="right">1.5</td>
<td align="right">40.7</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemma-2-27b</span>
</div></td>
<td align="right">62.0</td>
<td align="right">3.2</td>
<td align="right">32.6</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemini-1.0-pro</span>
</div></td>
<td align="right">9.7</td>
<td align="right">21.3</td>
<td align="right">15.5</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemini-1.5-flash-latest</span></div></td>
<td align="right">84.3</td>
<td align="right">1.2</td>
<td align="right">42.7</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Gemini-1.5-pro-latest</span></div></td>
<td align="right">88.0</td>
<td align="right">0.6</td>
<td align="right">44.3</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-3.5-turbo-0301</span></div></td>
<td align="right">57.4</td>
<td align="right">5.3</td>
<td align="right">31.4</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-3.5-turbo-0613</span></div></td>
<td align="right">38.4</td>
<td align="right">7.9</td>
<td align="right">23.2</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-3.5-turbo-0125</span></div></td>
<td align="right">12.7</td>
<td align="right">37.9</td>
<td align="right">25.3</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-4-0125-preview</span></div></td>
<td align="right">12.2</td>
<td align="right">7</td>
<td align="right">9.6</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-4-turbo-2024-04-09</span></div></td>
<td align="right">12.8</td>
<td align="right">3.5</td>
<td align="right">8.1</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">GPT-4o</span></div></td>
<td align="right">6.8</td>
<td align="right">15.1</td>
<td align="right">10.9</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Llama-2-7b</span></div></td>
<td align="right">87.5</td>
<td align="right">0.4</td>
<td align="right">43.9</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Llama-2-13b</span></div></td>
<td align="right">91.0</td>
<td align="right">0.3</td>
<td align="right">45.7</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Llama-2-70b</span></div></td>
<td align="right">96.1</td>
<td align="right">0.3</td>
<td align="right">48.2</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Llama-3-8b</span></div></td>
<td align="right">69.4</td>
<td align="right">5</td>
<td align="right">37.2</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Llama-3-70b</span></div></td>
<td align="right">37.7</td>
<td align="right">21.3</td>
<td align="right">29.5</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Mistral-small-latest</span></div></td>
<td align="right">13.3</td>
<td align="right">20.3</td>
<td align="right">16.8</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Mistral-medium-latest</span></div></td>
<td align="right">14.0</td>
<td align="right">22.5</td>
<td align="right">18.2</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Mistral-large-latest</span></div></td>
<td align="right">9.8</td>
<td align="right">27.2</td>
<td align="right">18.5</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Qwen-1.5-7B</span></div></td>
<td align="right">39.2</td>
<td align="right">15</td>
<td align="right">27.1</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Qwen-1.5-32B</span></div></td>
<td align="right">50.8</td>
<td align="right">4.4</td>
<td align="right">27.6</td>
</tr>
<tr>
<td align="right"><div class="logo-container">
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
<span class="logo-text">Qwen-1.5-72B</span></div></td>
<td align="right">46.9</td>
<td align="right">5.6</td>
<td align="right">26.3</td>
</tr>
</tbody></table>