Spaces:
Sleeping
Sleeping
DrishtiSharma
commited on
Update mylab/attempt1/app.py
Browse files- mylab/attempt1/app.py +163 -0
mylab/attempt1/app.py
CHANGED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import chromadb
|
3 |
+
from datetime import datetime
|
4 |
+
import streamlit as st
|
5 |
+
from patentwiz import preprocess_data, qa_agent
|
6 |
+
|
7 |
+
# Check if the API key is loaded
|
8 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
9 |
+
if not api_key:
|
10 |
+
st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
|
11 |
+
st.stop()
|
12 |
+
|
13 |
+
# Clear ChromaDB cache to fix tenant issue
|
14 |
+
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
15 |
+
|
16 |
+
PROMPT = """
|
17 |
+
Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters:
|
18 |
+
1. **Physical Measurements**:
|
19 |
+
- Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters.
|
20 |
+
- For each measurement, provide the following details:
|
21 |
+
- Substance or component being measured.
|
22 |
+
- Specific value or range of the measurement.
|
23 |
+
- Unit of measurement (if provided).
|
24 |
+
- Measurement type or context (e.g., frequency, impedance, gain, etc.).
|
25 |
+
2. **Patent Metadata**:
|
26 |
+
- Title of the patent.
|
27 |
+
- Abstract summarizing the technical focus.
|
28 |
+
- Metadata, including:
|
29 |
+
- Patent number.
|
30 |
+
- Filing date.
|
31 |
+
- Inventors.
|
32 |
+
- Assignee (if applicable).
|
33 |
+
### Output Format:
|
34 |
+
The response should be formatted as a structured JSON object, as shown below:
|
35 |
+
{
|
36 |
+
"Patent_Title": "Title",
|
37 |
+
"Patent_Abstract": "Abstract",
|
38 |
+
"Patent_Metadata": {
|
39 |
+
"Patent_Number": "Number",
|
40 |
+
"Filing_Date": "Date",
|
41 |
+
"Inventors": ["Name1", "Name2"],
|
42 |
+
"Assignee": "Assignee Name"
|
43 |
+
},
|
44 |
+
"Content": [
|
45 |
+
{
|
46 |
+
"Measurement_substance": "substance",
|
47 |
+
"Measured_value": "value",
|
48 |
+
"Measured_unit": "unit",
|
49 |
+
"measurement_type": "type"
|
50 |
+
}
|
51 |
+
// Additional measurements
|
52 |
+
]
|
53 |
+
}
|
54 |
+
"""
|
55 |
+
|
56 |
+
# Title and description
|
57 |
+
st.title("Blah")
|
58 |
+
st.write(
|
59 |
+
"Analyze patents to extract physical measurements such as frequency, bandwidth, and more. "
|
60 |
+
"Provide a date range to download patents and analyze them using GPT models."
|
61 |
+
)
|
62 |
+
|
63 |
+
# User Input Section
|
64 |
+
st.header("Enter Date Range for Patent Analysis")
|
65 |
+
start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20")
|
66 |
+
end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27")
|
67 |
+
|
68 |
+
num_patents_to_analyze = st.number_input(
|
69 |
+
"Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze."
|
70 |
+
)
|
71 |
+
|
72 |
+
model_choice = st.selectbox(
|
73 |
+
"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
|
74 |
+
)
|
75 |
+
|
76 |
+
logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
|
77 |
+
|
78 |
+
# Keyword Management
|
79 |
+
st.header("Manage Keywords")
|
80 |
+
st.write("Add or delete keywords for filtering patents.")
|
81 |
+
default_keywords = [
|
82 |
+
"RF", "Radio Frequency", "Wireless Communication", "Antenna", "Microwave", "Electromagnetic Waves",
|
83 |
+
"Beamforming", "5G", "6G", "Patch Antenna", "Dipole Antenna", "Phased Array", "Radiation Pattern", "IoT",
|
84 |
+
"Wireless Charging"
|
85 |
+
]
|
86 |
+
keywords_input = st.text_area(
|
87 |
+
"Enter keywords for filtering (comma-separated):", value=", ".join(default_keywords)
|
88 |
+
)
|
89 |
+
user_keywords = [kw.strip() for kw in keywords_input.split(",") if kw.strip()]
|
90 |
+
|
91 |
+
# Field Selection
|
92 |
+
st.header("Choose Fields for Filtering")
|
93 |
+
fields = st.multiselect(
|
94 |
+
"Select fields to search for keywords:",
|
95 |
+
["Title", "Abstract", "Claims", "Summary", "Detailed Description"],
|
96 |
+
default=["Title", "Abstract"]
|
97 |
+
)
|
98 |
+
|
99 |
+
# Run Analysis Button
|
100 |
+
if st.button("Analyze Patents"):
|
101 |
+
if not start_date_input or not end_date_input:
|
102 |
+
st.error("Please enter both start and end dates!")
|
103 |
+
elif not user_keywords:
|
104 |
+
st.error("Please provide at least one keyword for filtering.")
|
105 |
+
elif not fields:
|
106 |
+
st.error("Please select at least one field for filtering.")
|
107 |
+
else:
|
108 |
+
try:
|
109 |
+
# Parse date inputs
|
110 |
+
start_date = datetime.strptime(start_date_input, "%Y-%m-%d")
|
111 |
+
end_date = datetime.strptime(end_date_input, "%Y-%m-%d")
|
112 |
+
|
113 |
+
# Validate date range
|
114 |
+
if start_date > end_date:
|
115 |
+
st.error("End date must be after start date!")
|
116 |
+
st.stop()
|
117 |
+
|
118 |
+
# Step 1: Download and preprocess patents
|
119 |
+
with st.spinner("Downloading and extracting patents..."):
|
120 |
+
saved_patent_names = preprocess_data.parse_and_save_patents(
|
121 |
+
start_date, end_date, logging_enabled
|
122 |
+
)
|
123 |
+
if not saved_patent_names:
|
124 |
+
st.error("No patents found for the given date range.")
|
125 |
+
st.stop()
|
126 |
+
st.success(f"{len(saved_patent_names)} patents found and processed!")
|
127 |
+
|
128 |
+
# Step 2: Filter patents based on user input
|
129 |
+
with st.spinner("Filtering patents..."):
|
130 |
+
filtered_patents = preprocess_data.filter_rf_patents(
|
131 |
+
saved_patent_names, keywords=user_keywords, fields=fields
|
132 |
+
)
|
133 |
+
if not filtered_patents:
|
134 |
+
st.error("No patents matched the filtering criteria.")
|
135 |
+
st.stop()
|
136 |
+
st.success(f"{len(filtered_patents)} relevant patents found and processed!")
|
137 |
+
|
138 |
+
# Step 3: Analyze patents using GPT
|
139 |
+
random_patents = filtered_patents[:num_patents_to_analyze]
|
140 |
+
total_cost = 0
|
141 |
+
results = []
|
142 |
+
|
143 |
+
st.write("Starting patent analysis...")
|
144 |
+
for i, patent_file in enumerate(random_patents):
|
145 |
+
cost, output = qa_agent.call_QA_to_json(
|
146 |
+
PROMPT,
|
147 |
+
start_date.year, start_date.month, start_date.day,
|
148 |
+
saved_patent_names, i, logging_enabled, model_choice
|
149 |
+
)
|
150 |
+
total_cost += cost
|
151 |
+
results.append(output)
|
152 |
+
|
153 |
+
# Step 4: Display results
|
154 |
+
st.write(f"**Total Cost:** ${total_cost:.4f}")
|
155 |
+
st.write("### Analysis Results:")
|
156 |
+
for idx, result in enumerate(results):
|
157 |
+
st.subheader(f"Patent {idx + 1}")
|
158 |
+
st.json(result)
|
159 |
+
|
160 |
+
except ValueError as ve:
|
161 |
+
st.error(f"Invalid date format: {ve}")
|
162 |
+
except Exception as e:
|
163 |
+
st.error(f"An unexpected error occurred: {e}")
|