Alain Vaucher commited on
Commit
80ffb8e
·
1 Parent(s): 4d89100

Explicitly download the CDE data; add logs

Browse files
Files changed (2) hide show
  1. app.py +6 -1
  2. utils.py +12 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import functools
2
  import html
3
  import logging
 
4
  import traceback
5
  from pathlib import Path
6
  from typing import List
@@ -10,7 +11,7 @@ import pandas as pd
10
  from rxn.utilities.logging import setup_console_logger
11
  from rxn.utilities.strings import remove_postfix
12
 
13
- from utils import TranslatorWithSentencePiece, split_into_sentences
14
 
15
  logger = logging.getLogger(__name__)
16
  logger.addHandler(logging.NullHandler())
@@ -64,6 +65,10 @@ def sentence_and_actions_to_html(
64
 
65
 
66
  def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
 
 
 
 
67
  model = load_model(model_type)
68
  sentences = split_into_sentences(text)
69
  action_strings = model.translate(sentences)
 
1
  import functools
2
  import html
3
  import logging
4
+ import textwrap
5
  import traceback
6
  from pathlib import Path
7
  from typing import List
 
11
  from rxn.utilities.logging import setup_console_logger
12
  from rxn.utilities.strings import remove_postfix
13
 
14
+ from utils import TranslatorWithSentencePiece, download_cde_data, split_into_sentences
15
 
16
  logger = logging.getLogger(__name__)
17
  logger.addHandler(logging.NullHandler())
 
65
 
66
 
67
  def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
68
+ logger.info(f'Extracting actions from paragraph "{textwrap.shorten(text, 60)}".')
69
+
70
+ download_cde_data()
71
+
72
  model = load_model(model_type)
73
  sentences = split_into_sentences(text)
74
  action_strings = model.translate(sentences)
utils.py CHANGED
@@ -1,8 +1,9 @@
1
  import logging
2
  from typing import Iterable, Iterator, List, Union
3
 
4
- import sentencepiece as spm
5
  import chemdataextractor
 
 
6
  from rxn.onmt_utils.internal_translation_utils import TranslationResult
7
  from rxn.onmt_utils.translator import Translator
8
 
@@ -10,6 +11,16 @@ logger = logging.getLogger(__name__)
10
  logger.addHandler(logging.NullHandler())
11
 
12
 
 
 
 
 
 
 
 
 
 
 
13
  def split_into_sentences(text: str) -> List[str]:
14
  paragraph = chemdataextractor.doc.Paragraph(text)
15
  return [sentence.text for sentence in paragraph.sentences]
 
1
  import logging
2
  from typing import Iterable, Iterator, List, Union
3
 
 
4
  import chemdataextractor
5
+ import sentencepiece as spm
6
+ from chemdataextractor.data import Package
7
  from rxn.onmt_utils.internal_translation_utils import TranslationResult
8
  from rxn.onmt_utils.translator import Translator
9
 
 
11
  logger.addHandler(logging.NullHandler())
12
 
13
 
14
+ def download_cde_data() -> None:
15
+ package = Package("models/punkt_chem-1.0.pickle")
16
+ if package.local_exists():
17
+ return
18
+
19
+ logger.info("Downloading the necessary ChemDataExtractor data...")
20
+ package.download()
21
+ logger.info("Downloading the necessary ChemDataExtractor data... Done.")
22
+
23
+
24
  def split_into_sentences(text: str) -> List[str]:
25
  paragraph = chemdataextractor.doc.Paragraph(text)
26
  return [sentence.text for sentence in paragraph.sentences]