MatthiasC commited on
Commit
1fafa62
·
1 Parent(s): 9a6d6d1

Change text here and there

Browse files
Files changed (1) hide show
  1. app.py +29 -8
app.py CHANGED
@@ -223,6 +223,24 @@ def highlight_entities():
223
  return HTML_WRAPPER.format(soup)
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  def render_dependency_parsing(text: Dict):
227
  html = render_sentence_custom(text, nlp)
228
  html = html.replace("\n\n", "\n")
@@ -433,7 +451,7 @@ if summarize_button:
433
  # DEPENDENCY PARSING PART
434
  st.header("2️⃣ Dependency comparison")
435
  st.markdown(
436
- "The second method we use for post-processing is called **Dependency parsing**: the process in which the "
437
  "grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
438
  "relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following "
439
  "dependency graph:")
@@ -455,7 +473,7 @@ if summarize_button:
455
  "dependencies between article and summary (as we did with entity matching) would not be a robust method."
456
  " More on the different sorts of dependencies and their description can be found [here](https://universaldependencies.org/docs/en/dep/).")
457
  st.markdown("However, we have found that **there are specific dependencies that are often an "
458
- "indication of a wrongly constructed sentence** -when there is no article match. We (currently) use 2 "
459
  "common dependencies which - when present in the summary but not in the article - are highly "
460
  "indicative of factualness errors. "
461
  "Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
@@ -489,16 +507,18 @@ if summarize_button:
489
  "empirically tested they are definitely not sufficiently robust for general use-cases.")
490
  st.markdown("####")
491
  st.markdown(
492
- "Below we generate 3 different kind of summaries, and based on the two discussed methods, their errors are "
493
- "detected to estimate a factualness score. Based on this basic approach, "
494
  "the best summary (read: the one that a human would prefer or indicate as the best one) "
495
- "will hopefully be at the top. Summaries with the same scores will get the same rank displayed. We currently "
496
  "only do this for the example articles (for which the different summmaries are already generated). The reason "
497
- "for this is that HuggingFace spaces are limited in their CPU memory.")
 
 
498
  st.markdown("####")
499
 
500
  if selected_article != "Provide your own input" and article_text == fetch_article_contents(selected_article):
501
- with st.spinner("Calculating more summaries and scoring them, this might take a minute or two..."):
502
  summaries_list = []
503
  deduction_points = []
504
 
@@ -524,7 +544,8 @@ if summarize_button:
524
  cur_rank = 1
525
  rank_downgrade = 0
526
  for i in range(len(deduction_points)):
527
- st.write(f'🏆 Rank {cur_rank} summary: 🏆', display_summary(summaries_list[i]), unsafe_allow_html=True)
 
528
  if i < len(deduction_points) - 1:
529
  rank_downgrade += 1
530
  if not deduction_points[i + 1] == deduction_points[i]:
 
223
  return HTML_WRAPPER.format(soup)
224
 
225
 
226
+ def highlight_entities_new(summary_str: str):
227
+ st.session_state.summary_output = summary_str
228
+ summary_content = st.session_state.summary_output
229
+ markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
230
+ markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
231
+ markdown_end = "</mark>"
232
+
233
+ matched_entities, unmatched_entities = get_and_compare_entities(False)
234
+
235
+ for entity in matched_entities:
236
+ summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end)
237
+
238
+ for entity in unmatched_entities:
239
+ summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
240
+ soup = BeautifulSoup(summary_content, features="html.parser")
241
+ return HTML_WRAPPER.format(soup)
242
+
243
+
244
  def render_dependency_parsing(text: Dict):
245
  html = render_sentence_custom(text, nlp)
246
  html = html.replace("\n\n", "\n")
 
451
  # DEPENDENCY PARSING PART
452
  st.header("2️⃣ Dependency comparison")
453
  st.markdown(
454
+ "The second method we use for post-processing is called **Dependency Parsing**: the process in which the "
455
  "grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
456
  "relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following "
457
  "dependency graph:")
 
473
  "dependencies between article and summary (as we did with entity matching) would not be a robust method."
474
  " More on the different sorts of dependencies and their description can be found [here](https://universaldependencies.org/docs/en/dep/).")
475
  st.markdown("However, we have found that **there are specific dependencies that are often an "
476
+ "indication of a wrongly constructed sentence** when there is no article match. We (currently) use 2 "
477
  "common dependencies which - when present in the summary but not in the article - are highly "
478
  "indicative of factualness errors. "
479
  "Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
 
507
  "empirically tested they are definitely not sufficiently robust for general use-cases.")
508
  st.markdown("####")
509
  st.markdown(
510
+ "*Below we generate 3 different kind of summaries, and based on the two discussed methods, their errors are "
511
+ "detected to estimate a summary score. Based on this basic approach, "
512
  "the best summary (read: the one that a human would prefer or indicate as the best one) "
513
+ "will hopefully be at the top. We currently "
514
  "only do this for the example articles (for which the different summmaries are already generated). The reason "
515
+ "for this is that HuggingFace spaces are limited in their CPU memory. We also highlight the entities as done "
516
+ "before, but note that the rankings are done on a combination of unmatched entities and "
517
+ "dependencies (with the latter not shown here).*")
518
  st.markdown("####")
519
 
520
  if selected_article != "Provide your own input" and article_text == fetch_article_contents(selected_article):
521
+ with st.spinner("Fetching summaries, ranking them and highlighting entities, this might take a minute or two..."):
522
  summaries_list = []
523
  deduction_points = []
524
 
 
544
  cur_rank = 1
545
  rank_downgrade = 0
546
  for i in range(len(deduction_points)):
547
+ #st.write(f'🏆 Rank {cur_rank} summary: 🏆', display_summary(summaries_list[i]), unsafe_allow_html=True)
548
+ st.write(f'🏆 Rank {cur_rank} summary: 🏆', highlight_entities_new(summaries_list[i]), unsafe_allow_html=True)
549
  if i < len(deduction_points) - 1:
550
  rank_downgrade += 1
551
  if not deduction_points[i + 1] == deduction_points[i]: