Hellisotherpeople commited on
Commit
4133681
·
1 Parent(s): 741ff55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -2
app.py CHANGED
@@ -11,9 +11,22 @@ import streamlit.components.v1 as components
11
 
12
  st.set_page_config(page_title="DebateKG")
13
  st.title("DebateKG - Automatic Policy Debate Case Creation")
 
14
  st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
 
@@ -22,7 +35,7 @@ seg = pysbd.Segmenter(language="en", clean=False)
22
 
23
 
24
  embeddings = Embeddings({
25
- "path": "entence-transformers/all-mpnet-base-v2",
26
  "content": True,
27
  "functions": [
28
  {"name": "graph", "function": "graph.attribute"},
@@ -44,4 +57,73 @@ embeddings = Embeddings({
44
  embeddings.load("DebateSum_SemanticGraph_mpnet_extract.tar.gz")
45
  graph = embeddings.graph
46
 
47
- print(graph.backend.number_of_nodes(), graph.backend.number_of_edges())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  st.set_page_config(page_title="DebateKG")
13
  st.title("DebateKG - Automatic Policy Debate Case Creation")
14
+ st.write("WIP, give me a few more days before reviewing!")
15
  st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
16
 
17
 
18
+ form = st.sidebar.form("Main Settings")
19
+ form.header("Main Settings")
20
+ number_of_paths = form.number_input("Enter the cutoff number of paths for all shortest path search", value = 4)
21
+ highlight_threshold = form.number_input("Enter the minimum similarity value needed to highlight" , value = 4)
22
+ show_extract = form.checkbox("Show extracts", value = False)
23
+ show_abstract = form.checkbox("Show abstract", value = False)
24
+ show_full_doc = form.checkbox("Show full doc", value = False)
25
+ show_citation = form.checkbox("Show citation", value = False)
26
+ rerank_word = form.text_area("Enter the word", value = "Full-Document")
27
+ rerank_topic = form.text_area("Enter the topic", value = "Full-Document")
28
+
29
+ form.form_submit_button("Submit")
30
 
31
 
32
 
 
35
 
36
 
37
  embeddings = Embeddings({
38
+ "path": "sentence-transformers/all-mpnet-base-v2",
39
  "content": True,
40
  "functions": [
41
  {"name": "graph", "function": "graph.attribute"},
 
57
  embeddings.load("DebateSum_SemanticGraph_mpnet_extract.tar.gz")
58
  graph = embeddings.graph
59
 
60
+ def david_distance(source, target, attrs):
61
+ distance = max(1.0 - attrs["weight"], 0.0)
62
+ return distance if distance >= 0.15 else 1.00
63
+
64
+ def david_showpath(source, target, the_graph):
65
+ return nx.shortest_path(the_graph, source, target, david_distance)
66
+
67
+
68
+
69
+ import string
70
+
71
+ def highlight(index, result):
72
+ output = f"{index}. "
73
+ spans = [(token, score, "#fff59d" if score > 0.01 else None) for token, score in result["tokens"]]
74
+
75
+ for token, _, color in spans:
76
+ output += f"<span style='background-color: {color}'>{token}</span> " if color else f"{token} "
77
+
78
+ return output
79
+
80
+
81
+
82
+ def showpath_any(list_of_arguments, strip_punctuation = True, the_graph=graph.backend):
83
+ list_of_paths = []
84
+ for x, y in zip(list_of_arguments, list_of_arguments[1:]):
85
+ a_path = david_showpath(x, y, the_graph)
86
+ list_of_paths.extend(a_path)
87
+ #print(list_of_paths)
88
+ path = [graph.attribute(p, "text") for p in list_of_paths]
89
+ list_of_evidence_ids = []
90
+ for text in path:
91
+ if strip_punctuation:
92
+ text = text.translate(str.maketrans("","", string.punctuation))
93
+ list_of_evidence_ids.append(int(embeddings.search(f"select id from txtai where similar('{text}') limit 1")[0]['id']))
94
+ print(list_of_evidence_ids)
95
+
96
+ sections = []
97
+ for x, p in enumerate(path):
98
+ if x == 0:
99
+ # Print start node
100
+
101
+ sections.append(f"{x + 1}. {p}")
102
+ #sections.append(dataset["Abstract"][list_of_evidence_ids[x]])
103
+ #sections.append(dataset["Citation"][list_of_evidence_ids[x+1]])
104
+ #sections.append(dataset["Full-Document"][list_of_evidence_ids[x]])
105
+
106
+ if x < len(path) - 1:
107
+ # Explain and highlight next path element
108
+ results = embeddings.explain(p, [path[x + 1]], limit=1)[0]
109
+ sections.append(highlight(x + 2, results))
110
+ #sections.append(dataset["Abstract"][list_of_evidence_ids[x+1]])
111
+ #sections.append(dataset["Citation"][list_of_evidence_ids[x+1]])
112
+ #sections.append(dataset["Full-Document"][list_of_evidence_ids[x+1]])
113
+
114
+ return components.html("<br/><br/>".join(sections), scrolling = True, width = 800, height = 1000)
115
+
116
+ def question(text, rerank_word = "", rerank_topic = "", limit = 100):
117
+ return embeddings.search(f"select id, text, topic, evidence_id, score from txtai where similar('{text}') and text like '%{rerank_word}%' and topic like '%{rerank_topic}%' limit {limit}")
118
+
119
+
120
+
121
+ query_form = st.form("Query the Index:")
122
+ query_form.write("Write a SQL query")
123
+ query_form_submitted = query_form.form_submit_button("Click me to get ")
124
+
125
+
126
+ #showpath_any([3, 12, 15])
127
+
128
+ with st.expander("mine", expanded = False):
129
+ st.write(embeddings.search(f"select * from txtai where similar('you') and text like '%the%' limit 10"))