jeremyarancio commited on
Commit
161967b
·
1 Parent(s): 61d05d6

fix: :zap: Handle Server timeout errors + Add whitespace additions and deletions in text diff

Browse files
Files changed (5) hide show
  1. app.py +7 -6
  2. back_end.py +4 -0
  3. tests/__init__.py +0 -0
  4. tests/test_compute_diff.py +43 -0
  5. utils.py +18 -3
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
 
3
  from back_end import next_annotation, submit_correction, enable_buttons
4
- from utils import diff_texts, LANGS
5
 
6
 
7
  ## COMPONENTS
@@ -11,10 +11,11 @@ with gr.Blocks() as demo:
11
  ### Review the Spellcheck corrections. Your precious feedback will be integrated to the Open Food Facts database!
12
 
13
  ### Instructions:
14
- * *You are provided the original list of ingredients text as stored in the Open Food Facts (OFF) database, the Spellcheck prediction, and optionally a picture of the product.*
15
- * *Your task, if you accept 💣, is to review the Spellcheck prediction by either validating or correcting it.*
16
- * *The picture is only here to help you during the annotation as a reference. It can happen that the language of the text and the picture are different. **Keep calm and focus on the text.***
17
- * *It can happen that the Producer has made a mistake on the product packaging. Since we parse the list of ingredients to extract its information, it would be preferable if you fix the typo.*
 
18
 
19
  ### Important:
20
  * Authenticate yourself using your Open Food Facts username and password to add modifications to a product. If you're not registered yet, you can do so [here](https://world.openfoodfacts.org/cgi/user.pl)!
@@ -99,7 +100,7 @@ with gr.Blocks() as demo:
99
  )
100
 
101
  annotator_correction.change(
102
- diff_texts, # Call diff function
103
  inputs=[original, annotator_correction],
104
  outputs=diff_display,
105
  )
 
1
  import gradio as gr
2
 
3
  from back_end import next_annotation, submit_correction, enable_buttons
4
+ from utils import compute_diff, LANGS
5
 
6
 
7
  ## COMPONENTS
 
11
  ### Review the Spellcheck corrections. Your precious feedback will be integrated to the Open Food Facts database!
12
 
13
  ### Instructions:
14
+ * You are provided the original list of ingredients text as stored in the Open Food Facts (OFF) database, the Spellcheck prediction, and optionally a picture of the product.
15
+ * Your task, if you accept 💣, is to review the Spellcheck prediction by either validating or correcting it.
16
+ * The picture is only here to help you during the annotation as a reference. It can happen that the language of the text and the picture are different. **Keep calm and focus on the text.**
17
+ * It can happen that the Producer has made a mistake on the product packaging. Since we parse the list of ingredients to extract its information, it would be preferable if you fix the typo.
18
+ * Deleted whitespaces are indicated as `#` and additional whitespaces are indicated as `^`.
19
 
20
  ### Important:
21
  * Authenticate yourself using your Open Food Facts username and password to add modifications to a product. If you're not registered yet, you can do so [here](https://world.openfoodfacts.org/cgi/user.pl)!
 
100
  )
101
 
102
  annotator_correction.change(
103
+ compute_diff, # Call diff function
104
  inputs=[original, annotator_correction],
105
  outputs=diff_display,
106
  )
back_end.py CHANGED
@@ -98,6 +98,8 @@ def import_random_insight(
98
  else:
99
  gr.Warning("No insights found; fetching default insight instead.")
100
  return import_random_insight(insight_type, predictor)
 
 
101
  except requests.RequestException as e:
102
  gr.Error(f"Import product from Product Opener failed: {e}")
103
 
@@ -136,6 +138,8 @@ def submit_to_product_opener(
136
  try:
137
  response = requests.post(url, data=payload, headers=headers)
138
  response.raise_for_status()
 
 
139
  except requests.RequestException as e:
140
  logger.error(e)
141
  logger.error(response.content)
 
98
  else:
99
  gr.Warning("No insights found; fetching default insight instead.")
100
  return import_random_insight(insight_type, predictor)
101
+ except requests.ReadTimeout:
102
+ gr.Error("There's an issue with the server... Wait a couple of minutes and try again.")
103
  except requests.RequestException as e:
104
  gr.Error(f"Import product from Product Opener failed: {e}")
105
 
 
138
  try:
139
  response = requests.post(url, data=payload, headers=headers)
140
  response.raise_for_status()
141
+ except requests.ReadTimeout:
142
+ gr.Error("There's an issue with the server... Wait a couple of minutes and try again.")
143
  except requests.RequestException as e:
144
  logger.error(e)
145
  logger.error(response.content)
tests/__init__.py ADDED
File without changes
tests/test_compute_diff.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from utils import compute_diff
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ "text1, text2, expected",
8
+ [
9
+ (
10
+ "helo",
11
+ "hello",
12
+ [
13
+ ("h", None),
14
+ ("e", None),
15
+ ("l", None),
16
+ ("l", "+"),
17
+ ("o", None),
18
+ ]
19
+ ),
20
+ (
21
+ "helo\nworld",
22
+ "hello world",
23
+ [
24
+ ("h", None),
25
+ ("e", None),
26
+ ("l", None),
27
+ ("l", "+"),
28
+ ("o", None),
29
+ ("\n", "-"),
30
+ ("^", "+"),
31
+ ("w", None),
32
+ ("o", None),
33
+ ("r", None),
34
+ ("l", None),
35
+ ("d", None),
36
+ ]
37
+ ),
38
+ ]
39
+ )
40
+ def test_compute_diff(text1, text2, expected):
41
+ pairs = compute_diff(text1, text2)
42
+ assert list(pairs) == expected
43
+
utils.py CHANGED
@@ -1,13 +1,28 @@
1
  from difflib import Differ
 
2
  import logging
3
 
4
 
5
- def diff_texts(text1, text2):
6
  d = Differ()
7
- return [
8
  (token[2:], token[0] if token[0] != " " else None)
9
  for token in d.compare(text1, text2)
10
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  def get_logger():
 
1
  from difflib import Differ
2
+ from typing import Tuple, Iterable
3
  import logging
4
 
5
 
6
+ def compute_diff(text1, text2) -> Iterable[Tuple[str, str | None]]:
7
  d = Differ()
8
+ pairs = [
9
  (token[2:], token[0] if token[0] != " " else None)
10
  for token in d.compare(text1, text2)
11
+ ]
12
+ return _postprocess_compute_diff(pairs)
13
+
14
+
15
+ def _postprocess_compute_diff(pairs: Iterable[Tuple[str, str | None]]) -> Iterable[Tuple[str, str | None]]:
16
+ """Whitespace deletions add additions are missed by the diff component."""
17
+ for idx, (char, flag) in enumerate(pairs):
18
+ if char == " " and flag in ["+", "-"]:
19
+ if idx > 0 and idx < len(pairs):
20
+ if pairs[idx - 1][1] == flag or pairs[idx + 1][1] == flag:
21
+ yield (" ", flag)
22
+ else:
23
+ yield ("^", "+") if flag == "+" else ("#", "-")
24
+ else:
25
+ yield (char, flag)
26
 
27
 
28
  def get_logger():