Spaces:

brichett
/

tsgpt

Running

App Files Files Community

brichett commited on Nov 7, 2024

Commit

2e46e9b

verified ·

1 Parent(s): 40c3a36

Fix bug in handling violation_context from gradio_server

Browse files

Files changed (1) hide show

src/enforcement_module/policy_enforcement_decider.py +64 -59

src/enforcement_module/policy_enforcement_decider.py CHANGED Viewed

@@ -1,59 +1,64 @@
-from types import ModuleType
-import math
-from langchain.llms import OpenAI
-import requests
-import requests.models
-import requests
-from hamilton.function_modifiers import config
-def enforcement_prompt(user_input: str, violation_context: dict) -> str:
-    """
-    Generates the prompt to be sent to the LLM for determining the appropriate enforcement action.
-    """
-    dio_name = violation_context.get("entity_name", "None")
-    dio_details = violation_context.get("entity_info", {}).get("Summary", "Unknown")
-    dio_category = violation_context.get("entity_info", {}).get("Policy Category", "Unknown")
-    radicalization_flag = violation_context.get("extremism_detected", False)
-    dio_sentiment = violation_context.get("aspect_sentiment", "None")
-    prompt = (
-        f"Based on the following content and context, determine the appropriate enforcement action:\\n\\n"
-        f"Content: '{user_input}'\\n"
-        f"Radicalization detected: {radicalization_flag}\\n"
-        f"DIO Mentioned: {dio_name} (Category: {dio_category})\\n"
-        f"System's Classification of Content Sentiment Towards DIO: {dio_sentiment}"
-        # f"{user_info}\\n\\n"
-        "Policy Summary: Any entity mentioned in the 'DIO Mentioned' field is from the Dangerous Individuals & Organizations (DIO) blacklist. "
-        "If this entity is mentioned in the user content in a supportive, glorifying, or representative manner, it constitutes a violation of our platform's community standards. "
-        "Our platform does not allow content that glorifies, supports, or represents individuals or organizations involved in violence, hate, or terrorism. "
-        "These entities include terrorist groups, hate groups, violent non-state actors, and others who advocate or engage in violent activities. "
-        "Any content that endorses or supports these entities or their actions is a policy violation. "
-        "Users may discuss these topics neutrally or in a condemnatory manner, but any positive portrayal is prohibited. "
-        "Based on this policy, choose one of the following enforcement actions: Warning, Limited Visibility, Temporary Suspension, Permanent Ban. "
-        "Provide a brief explanation for your choice."
-    )
-    return prompt
-def get_enforcement_decision(enforcement_prompt: str, mistral_public_url: str) -> dict:
-    """
-    Sends the enforcement prompt to the Mistral model server and retrieves the enforcement decision.
-    """
-    input_text = {
-        "context": enforcement_prompt,
-        "question": "What is the appropriate enforcement action?"
-    }
-    response = requests.post(f'{mistral_public_url}/mistral-inference', json=input_text, stream=False)
-    return {
-        "enforcement_action": response.text.strip(),
-        "prompt": enforcement_prompt
-    }

+from types import ModuleType
+import math
+from langchain.llms import OpenAI
+import requests
+import requests.models
+import requests
+from hamilton.function_modifiers import config
+def enforcement_prompt(user_input: str, violation_context: dict) -> str:
+    """
+    Generates the prompt to be sent to the LLM for determining the appropriate enforcement action.
+    """
+    print(f"Received enforcement user_input: {user_input}")
+    print(f"Received enforcement violation_context: {violation_context}")
+    if len(violation_context.keys()) == 1 and "detect_glorification" in violation_context.keys():
+        violation_context = violation_context['detect_glorification']
+    dio_name = violation_context.get("entity_name", "None")
+    dio_details = violation_context.get("entity_info", {}).get("Summary", "Unknown")
+    dio_category = violation_context.get("entity_info", {}).get("Policy Category", "Unknown")
+    radicalization_flag = violation_context.get("extremism_detected", False)
+    dio_sentiment = violation_context.get("aspect_sentiment", "None")
+    prompt = (
+        f"Based on the following content and context, determine the appropriate enforcement action:\\n\\n"
+        f"Content: '{user_input}'\\n"
+        f"Radicalization detected: {radicalization_flag}\\n"
+        f"DIO Mentioned: {dio_name} (Category: {dio_category})\\n"
+        f"System's Classification of Content Sentiment Towards DIO: {dio_sentiment}"
+        # f"{user_info}\\n\\n"
+        "Policy Summary: Any entity mentioned in the 'DIO Mentioned' field is from the Dangerous Individuals & Organizations (DIO) blacklist. "
+        "If this entity is mentioned in the user content in a supportive, glorifying, or representative manner, it constitutes a violation of our platform's community standards. "
+        "Our platform does not allow content that glorifies, supports, or represents individuals or organizations involved in violence, hate, or terrorism. "
+        "These entities include terrorist groups, hate groups, violent non-state actors, and others who advocate or engage in violent activities. "
+        "Any content that endorses or supports these entities or their actions is a policy violation. "
+        "Users may discuss these topics neutrally or in a condemnatory manner, but any positive portrayal is prohibited. "
+        "Based on this policy, choose one of the following enforcement actions: Warning, Limited Visibility, Temporary Suspension, Permanent Ban. "
+        "Provide a brief explanation for your choice."
+    )
+    return prompt
+def get_enforcement_decision(enforcement_prompt: str, mistral_public_url: str) -> dict:
+    """
+    Sends the enforcement prompt to the Mistral model server and retrieves the enforcement decision.
+    """
+    input_text = {
+        "context": enforcement_prompt,
+        "question": "What is the appropriate enforcement action?"
+    }
+    response = requests.post(f'{mistral_public_url}/mistral-inference', json=input_text, stream=False)
+    return {
+        "enforcement_action": response.text.strip(),
+        "prompt": enforcement_prompt
+    }