Spaces:

openfoodfacts
/

ingredients-spellcheck-annotate

Running

App Files Files Community

jeremyarancio commited on Nov 5, 2024

Commit

161967b

1 Parent(s): 61d05d6

fix: :zap: Handle Server timeout errors + Add whitespace additions and deletions in text diff

Browse files

Files changed (5) hide show

app.py +7 -6
back_end.py +4 -0
tests/__init__.py +0 -0
tests/test_compute_diff.py +43 -0
utils.py +18 -3

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from back_end import next_annotation, submit_correction, enable_buttons
-from utils import diff_texts, LANGS
 ## COMPONENTS
@@ -11,10 +11,11 @@ with gr.Blocks() as demo:
         ### Review the Spellcheck corrections. Your precious feedback will be integrated to the Open Food Facts database!
         ### Instructions:
-        * *You are provided the original list of ingredients text as stored in the Open Food Facts (OFF) database, the Spellcheck prediction, and optionally a picture of the product.*
-        * *Your task, if you accept 💣, is to review the Spellcheck prediction by either validating or correcting it.*
-        * *The picture is only here to help you during the annotation as a reference. It can happen that the language of the text and the picture are different. **Keep calm and focus on the text.***
-        * *It can happen that the Producer has made a mistake on the product packaging. Since we parse the list of ingredients to extract its information, it would be preferable if you fix the typo.*
         ### Important:
         * Authenticate yourself using your Open Food Facts username and password to add modifications to a product. If you're not registered yet, you can do so [here](https://world.openfoodfacts.org/cgi/user.pl)!
@@ -99,7 +100,7 @@ with gr.Blocks() as demo:
     )
     annotator_correction.change(
-        diff_texts,  # Call diff function
         inputs=[original, annotator_correction],
         outputs=diff_display,
     )

 import gradio as gr
 from back_end import next_annotation, submit_correction, enable_buttons
+from utils import compute_diff, LANGS
 ## COMPONENTS
         ### Review the Spellcheck corrections. Your precious feedback will be integrated to the Open Food Facts database!
         ### Instructions:
+        * You are provided the original list of ingredients text as stored in the Open Food Facts (OFF) database, the Spellcheck prediction, and optionally a picture of the product.
+        * Your task, if you accept 💣, is to review the Spellcheck prediction by either validating or correcting it.
+        * The picture is only here to help you during the annotation as a reference. It can happen that the language of the text and the picture are different. **Keep calm and focus on the text.**
+        * It can happen that the Producer has made a mistake on the product packaging. Since we parse the list of ingredients to extract its information, it would be preferable if you fix the typo.
+        * Deleted whitespaces are indicated as `#` and additional whitespaces are indicated as `^`.
         ### Important:
         * Authenticate yourself using your Open Food Facts username and password to add modifications to a product. If you're not registered yet, you can do so [here](https://world.openfoodfacts.org/cgi/user.pl)!
     )
     annotator_correction.change(
+        compute_diff,  # Call diff function
         inputs=[original, annotator_correction],
         outputs=diff_display,
     )

back_end.py CHANGED Viewed

@@ -98,6 +98,8 @@ def import_random_insight(
         else:
             gr.Warning("No insights found; fetching default insight instead.")
             return import_random_insight(insight_type, predictor)
     except requests.RequestException as e:
         gr.Error(f"Import product from Product Opener failed: {e}")
@@ -136,6 +138,8 @@ def submit_to_product_opener(
     try:
         response = requests.post(url, data=payload, headers=headers)
         response.raise_for_status()
     except requests.RequestException as e:
         logger.error(e)
         logger.error(response.content)

         else:
             gr.Warning("No insights found; fetching default insight instead.")
             return import_random_insight(insight_type, predictor)
+    except requests.ReadTimeout:
+        gr.Error("There's an issue with the server... Wait a couple of minutes and try again.")
     except requests.RequestException as e:
         gr.Error(f"Import product from Product Opener failed: {e}")
     try:
         response = requests.post(url, data=payload, headers=headers)
         response.raise_for_status()
+    except requests.ReadTimeout:
+        gr.Error("There's an issue with the server... Wait a couple of minutes and try again.")
     except requests.RequestException as e:
         logger.error(e)
         logger.error(response.content)

tests/__init__.py ADDED Viewed

File without changes

tests/test_compute_diff.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import pytest
+from utils import compute_diff
+@pytest.mark.parametrize(
+    "text1, text2, expected",
+    [
+        (
+            "helo",
+            "hello",
+            [
+                ("h", None),
+                ("e", None),
+                ("l", None),
+                ("l", "+"),
+                ("o", None),
+            ]
+        ),
+        (
+            "helo\nworld",
+            "hello world",
+            [
+                ("h", None),
+                ("e", None),
+                ("l", None),
+                ("l", "+"),
+                ("o", None),
+                ("\n", "-"),
+                ("^", "+"),
+                ("w", None),
+                ("o", None),
+                ("r", None),
+                ("l", None),
+                ("d", None),
+            ]
+        ),
+    ]
+)
+def test_compute_diff(text1, text2, expected):
+    pairs = compute_diff(text1, text2)
+    assert list(pairs) == expected

utils.py CHANGED Viewed

@@ -1,13 +1,28 @@
 from difflib import Differ
 import logging
-def diff_texts(text1, text2):
     d = Differ()
-    return [
         (token[2:], token[0] if token[0] != " " else None)
         for token in d.compare(text1, text2)
-    ]
 def get_logger():

 from difflib import Differ
+from typing import Tuple, Iterable
 import logging
+def compute_diff(text1, text2) -> Iterable[Tuple[str, str | None]]:
     d = Differ()
+    pairs = [
         (token[2:], token[0] if token[0] != " " else None)
         for token in d.compare(text1, text2)
+    ]
+    return _postprocess_compute_diff(pairs)
+def _postprocess_compute_diff(pairs: Iterable[Tuple[str, str | None]]) -> Iterable[Tuple[str, str | None]]:
+    """Whitespace deletions add additions are missed by the diff component."""
+    for idx, (char, flag) in enumerate(pairs):
+        if char == " " and flag in ["+", "-"]:
+            if idx > 0 and idx < len(pairs):
+                if pairs[idx - 1][1] == flag or pairs[idx + 1][1] == flag:
+                    yield (" ", flag)
+                else:
+                    yield ("^", "+") if flag == "+" else ("#", "-")
+        else:
+            yield (char, flag)
 def get_logger():