Spaces:
Running
Running
<table id="or-bench-table" class="datatable" style="width: 100%"><thead> | |
<tr> | |
<th align="right" class="model-header">model</th> | |
<th align="right" class="number-header">over refusal(%)</th> | |
<th align="right" class="number-header">toxic acceptance(%)</th> | |
<th align="right" class="number-header">average</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Claude-2.1</span> | |
</div></td> | |
<td align="right">99.8</td> | |
<td align="right">0</td> | |
<td align="right">49.9</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Claude-3-haiku</span> | |
</div></td> | |
<td align="right">96.3</td> | |
<td align="right">0.3</td> | |
<td align="right">48.3</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Claude-3-sonnet</span> | |
</div></td> | |
<td align="right">94.5</td> | |
<td align="right">0.3</td> | |
<td align="right">47.4</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Claude-3-opus</span> | |
</div></td> | |
<td align="right">91.0</td> | |
<td align="right">1.9</td> | |
<td align="right">46.5</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Claude-3.5-sonnet</span> | |
</div></td> | |
<td align="right">43.8</td> | |
<td align="right">3.4</td> | |
<td align="right">23.6</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemma-7b</span> | |
</div></td> | |
<td align="right">26.3</td> | |
<td align="right">14.5</td> | |
<td align="right">20.4</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemma-2-9b</span> | |
</div></td> | |
<td align="right">79.9</td> | |
<td align="right">1.5</td> | |
<td align="right">40.7</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemma-2-27b</span> | |
</div></td> | |
<td align="right">62.0</td> | |
<td align="right">3.2</td> | |
<td align="right">32.6</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemini-1.0-pro</span> | |
</div></td> | |
<td align="right">9.7</td> | |
<td align="right">21.3</td> | |
<td align="right">15.5</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemini-1.5-flash-latest</span></div></td> | |
<td align="right">84.3</td> | |
<td align="right">1.2</td> | |
<td align="right">42.7</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Gemini-1.5-pro-latest</span></div></td> | |
<td align="right">88.0</td> | |
<td align="right">0.6</td> | |
<td align="right">44.3</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-3.5-turbo-0301</span></div></td> | |
<td align="right">57.4</td> | |
<td align="right">5.3</td> | |
<td align="right">31.4</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-3.5-turbo-0613</span></div></td> | |
<td align="right">38.4</td> | |
<td align="right">7.9</td> | |
<td align="right">23.2</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-3.5-turbo-0125</span></div></td> | |
<td align="right">12.7</td> | |
<td align="right">37.9</td> | |
<td align="right">25.3</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-4-0125-preview</span></div></td> | |
<td align="right">12.2</td> | |
<td align="right">7</td> | |
<td align="right">9.6</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-4-turbo-2024-04-09</span></div></td> | |
<td align="right">12.8</td> | |
<td align="right">3.5</td> | |
<td align="right">8.1</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">GPT-4o</span></div></td> | |
<td align="right">6.8</td> | |
<td align="right">15.1</td> | |
<td align="right">10.9</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Llama-2-7b</span></div></td> | |
<td align="right">87.5</td> | |
<td align="right">0.4</td> | |
<td align="right">43.9</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Llama-2-13b</span></div></td> | |
<td align="right">91.0</td> | |
<td align="right">0.3</td> | |
<td align="right">45.7</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Llama-2-70b</span></div></td> | |
<td align="right">96.1</td> | |
<td align="right">0.3</td> | |
<td align="right">48.2</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Llama-3-8b</span></div></td> | |
<td align="right">69.4</td> | |
<td align="right">5</td> | |
<td align="right">37.2</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Llama-3-70b</span></div></td> | |
<td align="right">37.7</td> | |
<td align="right">21.3</td> | |
<td align="right">29.5</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Mistral-small-latest</span></div></td> | |
<td align="right">13.3</td> | |
<td align="right">20.3</td> | |
<td align="right">16.8</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Mistral-medium-latest</span></div></td> | |
<td align="right">14.0</td> | |
<td align="right">22.5</td> | |
<td align="right">18.2</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Mistral-large-latest</span></div></td> | |
<td align="right">9.8</td> | |
<td align="right">27.2</td> | |
<td align="right">18.5</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Qwen-1.5-7B</span></div></td> | |
<td align="right">39.2</td> | |
<td align="right">15</td> | |
<td align="right">27.1</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Qwen-1.5-32B</span></div></td> | |
<td align="right">50.8</td> | |
<td align="right">4.4</td> | |
<td align="right">27.6</td> | |
</tr> | |
<tr> | |
<td align="right"><div class="logo-container"> | |
<img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo"> | |
<span class="logo-text">Qwen-1.5-72B</span></div></td> | |
<td align="right">46.9</td> | |
<td align="right">5.6</td> | |
<td align="right">26.3</td> | |
</tr> | |
</tbody></table> |