Spaces:

rxn4chemistry
/

synthesis-protocol-extraction

Sleeping

Alain Vaucher commited on Apr 19, 2023

Commit

80ffb8e

1 Parent(s): 4d89100

Explicitly download the CDE data; add logs

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import html
 import logging
 import traceback
 from pathlib import Path
 from typing import List
@@ -10,7 +11,7 @@ import pandas as pd
 from rxn.utilities.logging import setup_console_logger
 from rxn.utilities.strings import remove_postfix
-from utils import TranslatorWithSentencePiece, split_into_sentences
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
@@ -64,6 +65,10 @@ def sentence_and_actions_to_html(
 def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
     model = load_model(model_type)
     sentences = split_into_sentences(text)
     action_strings = model.translate(sentences)

 import functools
 import html
 import logging
+import textwrap
 import traceback
 from pathlib import Path
 from typing import List
 from rxn.utilities.logging import setup_console_logger
 from rxn.utilities.strings import remove_postfix
+from utils import TranslatorWithSentencePiece, download_cde_data, split_into_sentences
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
+    logger.info(f'Extracting actions from paragraph "{textwrap.shorten(text, 60)}".')
+    download_cde_data()
     model = load_model(model_type)
     sentences = split_into_sentences(text)
     action_strings = model.translate(sentences)

utils.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 from typing import Iterable, Iterator, List, Union
-import sentencepiece as spm
 import chemdataextractor
 from rxn.onmt_utils.internal_translation_utils import TranslationResult
 from rxn.onmt_utils.translator import Translator
@@ -10,6 +11,16 @@ logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def split_into_sentences(text: str) -> List[str]:
     paragraph = chemdataextractor.doc.Paragraph(text)
     return [sentence.text for sentence in paragraph.sentences]

 import logging
 from typing import Iterable, Iterator, List, Union
 import chemdataextractor
+import sentencepiece as spm
+from chemdataextractor.data import Package
 from rxn.onmt_utils.internal_translation_utils import TranslationResult
 from rxn.onmt_utils.translator import Translator
 logger.addHandler(logging.NullHandler())
+def download_cde_data() -> None:
+    package = Package("models/punkt_chem-1.0.pickle")
+    if package.local_exists():
+        return
+    logger.info("Downloading the necessary ChemDataExtractor data...")
+    package.download()
+    logger.info("Downloading the necessary ChemDataExtractor data... Done.")
 def split_into_sentences(text: str) -> List[str]:
     paragraph = chemdataextractor.doc.Paragraph(text)
     return [sentence.text for sentence in paragraph.sentences]