Spaces:

rai-institute-testbed
/

Datasets

Sleeping

App Files Files Community

rhea2809 commited on Oct 11, 2023

Commit

c1b1ef1

1 Parent(s): 36b272f

Upload 8 files

Browse files

Files changed (7) hide show

Building_a_Safety_Agent.ipynb +316 -0
README.md +3 -3
app.py +80 -4
data_list.py +1 -1
model_list.py +79 -0
nist.png +0 -0
requirements.txt +4 -0

Building_a_Safety_Agent.ipynb ADDED Viewed

	@@ -0,0 +1,316 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bdp9fSdWKBhp",
+        "collapsed": true
+      },
+      "outputs": [],
+      "source": [
+        "# Install the Libraries used in this notebook.\n",
+        "\n",
+        "!pip install -qU langchain openai transformers selfcheckgpt profanityfilter\n",
+        "! python -m spacy download en"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "##Safety Agent"
+      ],
+      "metadata": {
+        "id": "6wj7wxo9aTe5"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.chat_models import ChatOpenAI\n",
+        "from langchain.chains.conversation.memory import ConversationBufferWindowMemory\n",
+        "import openai\n",
+        "import os\n",
+        "os.environ['OPENAI_API_KEY'] = openai.api_key= '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'  #platform.openai.com api key\n",
+        "\n",
+        "# initialize LLM\n",
+        "llm = ChatOpenAI(\n",
+        "    temperature=0,\n",
+        "    model_name='gpt-3.5-turbo'\n",
+        ")\n",
+        "\n",
+        "# initialize conversational memory\n",
+        "conversational_memory = ConversationBufferWindowMemory(\n",
+        "    memory_key='chat_history',\n",
+        "    k=5,\n",
+        "    return_messages=True\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "OIKitpN-fPSF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Profanity Detection Tool"
+      ],
+      "metadata": {
+        "id": "O6BLtKefgSxZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from profanityfilter import ProfanityFilter\n",
+        "import spacy\n",
+        "from langchain.tools import BaseTool\n",
+        "from typing import Optional\n",
+        "\n",
+        "class Profanity_Check(BaseTool):\n",
+        "    name = \"Profanity_Checker\"\n",
+        "    description = (\n",
+        "        \"use this tool when you need to check for profanity in given text\"\n",
+        "    )\n",
+        "    def _run(\n",
+        "      self,\n",
+        "      sentence1: Optional[str] = None\n",
+        "      ):\n",
+        "      pf = ProfanityFilter()\n",
+        "      flag = pf.is_profane(sentence1)\n",
+        "      if flag:  return 'Profanity Detected'\n",
+        "      else: return 'No Profanity found'\n",
+        "\n",
+        "\n",
+        "    def _arun(self, sentence1, sentence2):\n",
+        "      raise NotImplementedError(\"This tool does not support async runs.\")\n"
+      ],
+      "metadata": {
+        "id": "_SJa13N5i1kY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Hallucination Detection Tool"
+      ],
+      "metadata": {
+        "id": "YicTP78lgXlT"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from selfcheckgpt.modeling_selfcheck import SelfCheckBERTScore\n",
+        "\n",
+        "class Hallucination_Scorer(BaseTool):\n",
+        "    name = \"Hallucination_Scorer\"\n",
+        "    description = (\n",
+        "        \"use this tool when a you need to give hallucination scores\"\n",
+        "    )\n",
+        "    def _run(\n",
+        "      self,\n",
+        "      sentence1: Optional[str] = None\n",
+        "      ):\n",
+        "      selfcheck_bertscore = SelfCheckBERTScore()\n",
+        "      nlp = spacy.load('en_core_web_sm')\n",
+        "      passage = sentence1\n",
+        "      sentences = [sent.text.strip() for sent in nlp(passage).sents]\n",
+        "\n",
+        "      chat_completion = openai.ChatCompletion.create(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": sentence1}])\n",
+        "      sample1 = chat_completion.choices[0].message.content\n",
+        "      chat_completion = openai.ChatCompletion.create(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": sentence1}])\n",
+        "      sample2 = chat_completion.choices[0].message.content\n",
+        "      chat_completion = openai.ChatCompletion.create(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": sentence1}])\n",
+        "      sample3 = chat_completion.choices[0].message.content\n",
+        "# SelfCheck-BERTScore: Score for each sentence where value is in [0.0, 1.0] and high value means non-factual\n",
+        "      sent_scores_bertscore = selfcheck_bertscore.predict(\n",
+        "          sentences = sentences,                          # list of sentences\n",
+        "          sampled_passages = [sample1, sample2, sample3], # list of sampled passages\n",
+        "          )\n",
+        "      return sent_scores_bertscore\n",
+        "\n",
+        "\n",
+        "    def _arun(self, sentence1, sentence2):\n",
+        "      raise NotImplementedError(\"This tool does not support async runs.\")\n"
+      ],
+      "metadata": {
+        "id": "1CA0tBsWYC6K"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Initializing Safety Agent\n"
+      ],
+      "metadata": {
+        "id": "6wqgeEvKgex7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from langchain.agents import initialize_agent\n",
+        "\n",
+        "# Pass the tools\n",
+        "tools = [Hallucination_Scorer(),Profanity_Check()]\n",
+        "\n",
+        "# initialize agent with tools\n",
+        "agent = initialize_agent(\n",
+        "    agent='chat-conversational-react-description',\n",
+        "    tools=tools, # Point each smaller sized agent towards the test we use\n",
+        "    llm=llm, # Can be buiult over any LLM\n",
+        "    verbose=True, ## Temperature for responses is set to zero for determinsitc test score// change to 1 when generating reports.\n",
+        "    max_iterations=3,  # Avoid Looping\n",
+        "    early_stopping_method='generate', # Stop and generate a score\n",
+        "    memory=conversational_memory # Chat Memory\n",
+        "\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "4hI5kL3I10sM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "##### Loading generated policy"
+      ],
+      "metadata": {
+        "id": "RNxBUVOsjXpr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "with open(\"/content/Generated_Policy.txt\", \"r\") as file1:\n",
+        "    text = file1.read()\n",
+        "    generated_policy = ' '.join(text.split('\\n'))\n",
+        "file1.close()"
+      ],
+      "metadata": {
+        "id": "exp-mK78gbUG",
+        "collapsed": true
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "generated_policy"
+      ],
+      "metadata": {
+        "id": "KsGtYtrkgpey"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Hallucination Scoring"
+      ],
+      "metadata": {
+        "id": "2_32mHKGjeqt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "prompt1 = 'Hallucination score for :'+generated_policy"
+      ],
+      "metadata": {
+        "id": "VWH3VDo03oHr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "var1 = agent(prompt1)"
+      ],
+      "metadata": {
+        "id": "8aCm3m79hppI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "var1['output']"
+      ],
+      "metadata": {
+        "id": "gum7V3J9QFBv"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Profanity Detection"
+      ],
+      "metadata": {
+        "id": "yjyYQJj0jlxm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "prompt2 = 'Check for profanity in '+generated_policy\n"
+      ],
+      "metadata": {
+        "id": "xoc4nCbr4Im5"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "var2 = agent(prompt2)"
+      ],
+      "metadata": {
+        "id": "fBsWajM4lOyo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "var2['output']"
+      ],
+      "metadata": {
+        "id": "Nc6aYJ1eYnpf"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Datasets
 emoji: 🚀
 colorFrom: gray
-colorTo: purple
 sdk: gradio
-sdk_version: 3.47.1
 app_file: app.py
 pinned: false
 ---

 ---
+title: Explore
 emoji: 🚀
 colorFrom: gray
+colorTo: blue
 sdk: gradio
+sdk_version: 3.43.2
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -1,8 +1,43 @@
 import gradio as gr
 from data_list import DataList
-with gr.Blocks() as demo:
-            data_list = DataList()
             with gr.Row():
                 gr.Image(value="RAII.svg",scale=1,show_download_button=False,show_share_button=False,show_label=False,height=100,container=False)
                 gr.Markdown("# Datasets for Healthcare Teams")
@@ -16,5 +51,46 @@ with gr.Blocks() as demo:
             demo.load(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,],outputs=[table,])
             search_box.submit(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,], outputs=[table,])
             search_button.click(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,], outputs=[table,])
-demo.queue()
-demo.launch(share=False)

 import gradio as gr
+import IPython
+import nbformat
+from nbconvert import HTMLExporter
+from IPython.display import HTML
+import requests
+from model_list import ModelList
 from data_list import DataList
+def show_notebook(notebook_file):
+    with open(notebook_file, 'r', encoding='utf-8') as notebook_file:
+        notebook_content = nbformat.read(notebook_file, as_version=4)
+    html_expor = HTMLExporter()
+    html_output, resources = html_expor.from_notebook_node(notebook_content)
+    return html_output
+def main():
+    data_list = DataList()
+    model_list = ModelList()
+    css = """
+button.svelte-kqij2n{font-weight: bold !important;
+background-color: #ebecf0;
+color: black;
+margin-left: 5px;}
+#tlsnlbs{}
+#mtcs{}
+#mdls{}
+#dts{}
+.svelte-kqij2n .selected {
+    background-color: black;
+    color: white;
+}
+span.svelte-s1r2yt{font-weight: bold !important;
+}
+"""
+    with gr.Blocks(css=css) as demo:
+        with gr.Tab(label="DATASETS",elem_id="dts"):
             with gr.Row():
                 gr.Image(value="RAII.svg",scale=1,show_download_button=False,show_share_button=False,show_label=False,height=100,container=False)
                 gr.Markdown("# Datasets for Healthcare Teams")
             demo.load(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,],outputs=[table,])
             search_box.submit(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,], outputs=[table,])
             search_button.click(fn=data_list.render, inputs=[search_box, case_sensitive, filter_names, data_types,], outputs=[table,])
+        with gr.Tab(label="MODELS",elem_id="mdls"):
+            with gr.Row():
+                gr.Image(value="RAII.svg",scale=1,show_download_button=False,show_share_button=False,show_label=False,height=100,container=False)
+                gr.Markdown("# Models for Healthcare Teams")
+            search_box = gr.Textbox(label='Search Name',placeholder='You can search for titles with regular expressions. e.g. (?<!sur)face',max_lines=1)
+            case_sensitive = gr.Checkbox(label='Case Sensitive')
+            filter_names1 = gr.CheckboxGroup(choices=['NLP','Computer Vision', 'Multi-Model'], value=['NLP','Computer Vision', 'Multi-Model'], label='Task')
+            data_type_names1 = ['Biomedical Corpus','Scientific Corpus','Clinical Corpus','Image','Mixed']
+            data_types1 = gr.CheckboxGroup(choices=data_type_names1, value=data_type_names1, label='Training Data Type')
+            search_button = gr.Button('Search')
+            table = gr.HTML(show_label=False)
+            demo.load(fn=model_list.render, inputs=[search_box, case_sensitive, filter_names1, data_types1,],outputs=[table,])
+            search_box.submit(fn=model_list.render, inputs=[search_box, case_sensitive, filter_names1, data_types1,], outputs=[table,])
+            search_button.click(fn=model_list.render, inputs=[search_box, case_sensitive, filter_names1, data_types1,], outputs=[table,])
+        with gr.Tab(label="NOTEBOOKS",elem_id="nbs"):
+            with gr.Accordion("Building a Safety Agent using Langchain",open=False):
+                 notebook='Building_a_Safety_Agent.ipynb'
+                 colab_link="<a href='https://colab.research.google.com/drive/1BoxUprJQ7skeyA88gfGRVVgzsvFUwnqd?usp=sharing'><button style='box-sizing: border-box; border: 1px solid #000; padding: 5px;'>Open in Colab</button></a>"
+                 gr.HTML(colab_link)
+                 gr.HTML(show_notebook(notebook))
+            with gr.Accordion("LLM Hallucination Detection",open=False):
+                 gr.HTML("Coming Soon")
+        with gr.Tab(label="METRICS",elem_id="mtcs"):
+            gr.HTML(value='<iframe src="https://v2-embednotion.com/Metrics-dbe5d86e2181438fb4eb1e4f01fa3955?pvs=4"></iframe>  <style>  iframe { width: 100%; height: 10vh; border: 2px solid #ccc; border-radius: 10px; padding: none; text-align: right; }  </style>')
+        with gr.Tab(label="NIST-RAI INSTITUTE AI SAFETY RATINGS ",elem_id="nrasr"):
+            gr.Image(value="nist.png",scale=1,show_download_button=False,show_share_button=False,show_label=False,container=False)
+        with gr.Tab(label="TOOLKITS & LIBRARIES",elem_id="tlsnlbs"):
+            gr.HTML(value='<iframe src="https://v2-embednotion.com/Toolkits-Libraries-d5865c7ae5b0499988f5cc5fce711888?pvs=4"></iframe>  <style>  iframe { width: 100%; height: 10vh; border: 2px solid #ccc; border-radius: 10px; padding: none; }  </style>')
+    demo.queue()
+    demo.launch(share=False)
+if __name__ == '__main__':
+    main()

data_list.py CHANGED Viewed

@@ -74,4 +74,4 @@ class DataList:
             {table_header}
             {table_data}
         </table>'''
-        return html

             {table_header}
             {table_data}
         </table>'''
+        return html

model_list.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+import requests
+from huggingface_hub.hf_api import SpaceInfo
+SHEET_ID = '1L7AHpWMVU_kZVLcsk8H2FTizgzeVxWPDoBxw7K8KHXw'
+SHEET_NAME = 'model'
+csv_url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'
+class ModelList:
+    def __init__(self):
+        self.table = pd.read_csv(csv_url)
+        self.table = self.table.astype({'Year':'string'})
+        self._preprocess_table()
+        self.table_header = '''
+            <tr>
+                <td width="15%">Name</td>
+                <td width="10%">Year Published</td>
+                <td width="10%">Source</td>
+                <td width="30%">About</td>
+                <td width="10%">Task</td>
+                <td width="15%">Training Data Type</td>
+                <td width="10%">Publication</td>
+            </tr>'''
+    def _preprocess_table(self) -> None:
+        self.table['name_lowercase'] = self.table['Name'].str.lower()
+        rows = []
+        for row in self.table.itertuples():
+            source = f'<a href="{row.Source}" target="_blank">Link</a>' if isinstance(
+                row.Source, str) else ''
+            paper = f'<a href="{row.Paper}" target="_blank">Link</a>' if isinstance(
+                row.Source, str) else ''
+            row = f'''
+                <tr>
+                    <td>{row.Name}</td>
+                    <td>{row.Year}</td>
+                    <td>{source}</td>
+                    <td>{row.About}</td>
+                    <td>{row.task}</td>
+                    <td>{row.data}</td>
+                    <td>{paper}</td>
+                </tr>'''
+            rows.append(row)
+        self.table['html_table_content'] = rows
+    def render(self, search_query: str,
+            case_sensitive: bool,
+            filter_names: list[str],
+            data_types: list[str]) -> tuple[int, str]:
+        df = self.table
+        if search_query:
+            if case_sensitive:
+                df = df[df.name.str.contains(search_query)]
+            else:
+                df = df[df.name_lowercase.str.contains(search_query.lower())]
+        df = self.filter_table(df, filter_names, data_types)
+        result = self.to_html(df, self.table_header)
+        return result
+    @staticmethod
+    def filter_table(df: pd.DataFrame, filter_names: list[str], data_types: list[str]) -> pd.DataFrame:
+        df = df.loc[df.task.isin(set(filter_names))]
+        df = df.loc[df.data.isin(set(data_types))]
+        return df
+    @staticmethod
+    def to_html(df: pd.DataFrame, table_header: str) -> str:
+        table_data = ''.join(df.html_table_content)
+        html = f'''
+        <table>
+            {table_header}
+            {table_data}
+        </table>'''
+        return html

nist.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+nbconvert==6.3.0
+ipython==7.28.0
+ipython_genutils==0.1.0
+jinja2==3.1.2