|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModel, PreTrainedTokenizerFast |
|
import torch |
|
import numpy as np |
|
from typing import List, Dict |
|
|
|
class SentenceEncoder: |
|
def __init__(self, model_name="aubmindlab/bert-large-arabertv2", max_length=512): |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
self.model = AutoModel.from_pretrained(model_name) |
|
self.max_length = max_length |
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
self.model.to(self.device) |
|
|
|
def mean_pooling(self, model_output, attention_mask): |
|
"""تجميع متوسط التمثيل للجملة""" |
|
token_embeddings = model_output[0] |
|
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() |
|
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) |
|
|
|
def encode(self, sentences: List[str]) -> np.ndarray: |
|
"""تحويل الجمل إلى متجهات""" |
|
|
|
encoded_input = self.tokenizer( |
|
sentences, |
|
padding=True, |
|
truncation=True, |
|
max_length=self.max_length, |
|
return_tensors='pt' |
|
).to(self.device) |
|
|
|
|
|
with torch.no_grad(): |
|
model_output = self.model(**encoded_input) |
|
|
|
|
|
sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask']) |
|
|
|
|
|
sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) |
|
|
|
return sentence_embeddings.cpu().numpy() |
|
|
|
class ContractAnalyzer: |
|
def __init__(self): |
|
print("جاري تحميل النموذج...") |
|
self.encoder = SentenceEncoder() |
|
print("تم تحميل النموذج بنجاح!") |
|
|
|
self.legal_keywords = [ |
|
"يلتزم", "الزام", "يتعهد", "يحق", "لا يحق", "شرط جزائي", |
|
"فسخ العقد", "إنهاء", "تعويض", "غرامة", "مدة العقد", |
|
"طرف أول", "طرف ثاني", "قيمة العقد", "التزامات", "سداد", |
|
"دفعات", "ينكل", "ضمان", "مخالفة", "إخلال", "قوة قاهرة" |
|
] |
|
|
|
self.analysis_prompt = """ |
|
تحليل العقد القانوني: |
|
|
|
1. معلومات أساسية: |
|
- تاريخ العقد: {date} |
|
- الأطراف المتعاقدة: {parties} |
|
- موضوع العقد: {subject} |
|
|
|
2. تحليل المحتوى (درجة التشابه): {similarity_score} |
|
|
|
3. المخاطر المحتملة: |
|
{risks} |
|
|
|
4. العناصر المفقودة أو غير الواضحة: |
|
{missing_elements} |
|
|
|
5. توصيات قانونية: |
|
{recommendations} |
|
""" |
|
def extract_contract_info(self, text): |
|
"""استخراج المعلومات الأساسية من العقد""" |
|
info = { |
|
"date": "غير محدد", |
|
"parties": [], |
|
"subject": "غير محدد" |
|
} |
|
|
|
|
|
date_indicators = ["بتاريخ", "في يوم", "الموافق"] |
|
for indicator in date_indicators: |
|
if indicator in text: |
|
|
|
start_idx = text.find(indicator) |
|
end_idx = text.find("\n", start_idx) |
|
if end_idx == -1: |
|
end_idx = text.find(".", start_idx) |
|
if end_idx != -1: |
|
info["date"] = text[start_idx:end_idx].strip() |
|
|
|
|
|
party_indicators = ["طرف أول", "طرف ثاني", "الطرف الأول", "الطرف الثاني", "الفريق الأول", "الفريق الثاني"] |
|
for indicator in party_indicators: |
|
if indicator in text: |
|
start_idx = text.find(indicator) |
|
end_idx = text.find("\n", start_idx) |
|
if end_idx == -1: |
|
end_idx = text.find(".", start_idx) |
|
if end_idx != -1: |
|
info["parties"].append(text[start_idx:end_idx].strip()) |
|
|
|
|
|
|
|
if info["subject"] == "غير محدد": |
|
|
|
first_sentences = text.split('\n')[:3] |
|
for sentence in first_sentences: |
|
if any(word in sentence.lower() for word in ["اتفاق", "عقد", "تعاقد"]): |
|
info["subject"] = sentence.strip() |
|
break |
|
|
|
return info |
|
|
|
def compute_similarity(self, sentences: List[str]) -> float: |
|
"""حساب درجة التشابه بين الجمل""" |
|
if not sentences: |
|
return 0.0 |
|
|
|
embeddings = self.encoder.encode(sentences) |
|
if len(embeddings) < 2: |
|
return 1.0 |
|
|
|
|
|
similarity_matrix = np.dot(embeddings, embeddings.T) |
|
|
|
|
|
n = len(similarity_matrix) |
|
similarity_sum = (similarity_matrix.sum() - n) / (n * (n - 1)) if n > 1 else 0 |
|
|
|
return float(similarity_sum) |
|
|
|
def analyze_contract(self, contract_text: str) -> str: |
|
try: |
|
|
|
sentences = [s.strip() for s in contract_text.split('.') if len(s.strip()) > 5] |
|
|
|
|
|
similarity_score = self.compute_similarity(sentences) |
|
|
|
|
|
contract_info = self.extract_contract_info(contract_text) |
|
results = self.analyze_content(sentences) |
|
|
|
|
|
formatted_results = self.analysis_prompt.format( |
|
date=contract_info["date"], |
|
parties="\n".join(contract_info["parties"]) or "غير محدد", |
|
subject=contract_info["subject"], |
|
similarity_score=f"{similarity_score:.2%}", |
|
risks="\n".join([f"• {risk}" for risk in results["risks"]]) or "لا توجد مخاطر واضحة", |
|
missing_elements="\n".join([f"• {element}" for element in results["missing_elements"]]) or "لا توجد عناصر مفقودة", |
|
recommendations="\n".join([f"• {rec}" for rec in results["recommendations"]]) or "لا توجد توصيات إضافية" |
|
) |
|
|
|
return formatted_results |
|
|
|
except Exception as e: |
|
return f"حدث خطأ أثناء التحليل: {str(e)}" |
|
|
|
def analyze_content(self, sentences: List[str]) -> Dict: |
|
"""تحليل محتوى العقد""" |
|
results = { |
|
"risks": [], |
|
"missing_elements": [], |
|
"recommendations": [] |
|
} |
|
|
|
|
|
for sentence in sentences: |
|
|
|
risk_words = ["مخالفة", "خرق", "نزاع", "خلاف", "إخلال", "فسخ"] |
|
if any(word in sentence.lower() for word in risk_words): |
|
results["risks"].append(sentence.strip()) |
|
|
|
|
|
required_elements = [ |
|
"مدة العقد", "قيمة العقد", "التزامات الطرفين", |
|
"طريقة السداد", "الضمانات", "شروط الإنهاء" |
|
] |
|
|
|
for element in required_elements: |
|
if not any(element in s for s in sentences): |
|
results["missing_elements"].append(element) |
|
results["recommendations"].append(f"يجب إضافة {element} بشكل واضح في العقد") |
|
|
|
return results |
|
|
|
|
|
|
|
analyzer = ContractAnalyzer() |
|
|
|
|
|
def analyze_text(text): |
|
return analyzer.analyze_contract(text) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_text, |
|
inputs=gr.Textbox( |
|
placeholder="أدخل نص العقد هنا...", |
|
label="نص العقد", |
|
lines=30, |
|
rtl=True, |
|
), |
|
outputs=gr.Textbox( |
|
label="نتائج التحليل", |
|
lines=30, |
|
rtl=True, |
|
), |
|
title="محلل العقود القانونية ", |
|
description=""" |
|
قم بإدخال نص العقد القانوني للحصول على تحليل شامل يتضمن: |
|
• المعلومات الأساسية للعقد |
|
• المخاطر المحتملة |
|
• العناصر المفقودة |
|
• التوصيات القانونية |
|
""", |
|
theme=gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="blue", |
|
neutral_hue="blue", |
|
|
|
), |
|
css=""" |
|
.gradio-container { |
|
direction: rtl !important; |
|
text-align: right !important; |
|
} |
|
.output-markdown { |
|
direction: rtl !important; |
|
text-align: right !important; |
|
} |
|
.input-markdown { |
|
direction: rtl !important; |
|
text-align: right !important; |
|
} |
|
label { |
|
text-align: right !important; |
|
} |
|
.prose { |
|
direction: rtl !important; |
|
text-align: right !important; |
|
} |
|
""" |
|
) |
|
|
|
|
|
iface.launch(share=True, debug=True) |
|
|
|
|
|
|