Alain Vaucher
commited on
Commit
·
80ffb8e
1
Parent(s):
4d89100
Explicitly download the CDE data; add logs
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import functools
|
2 |
import html
|
3 |
import logging
|
|
|
4 |
import traceback
|
5 |
from pathlib import Path
|
6 |
from typing import List
|
@@ -10,7 +11,7 @@ import pandas as pd
|
|
10 |
from rxn.utilities.logging import setup_console_logger
|
11 |
from rxn.utilities.strings import remove_postfix
|
12 |
|
13 |
-
from utils import TranslatorWithSentencePiece, split_into_sentences
|
14 |
|
15 |
logger = logging.getLogger(__name__)
|
16 |
logger.addHandler(logging.NullHandler())
|
@@ -64,6 +65,10 @@ def sentence_and_actions_to_html(
|
|
64 |
|
65 |
|
66 |
def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
|
|
|
|
|
|
|
|
|
67 |
model = load_model(model_type)
|
68 |
sentences = split_into_sentences(text)
|
69 |
action_strings = model.translate(sentences)
|
|
|
1 |
import functools
|
2 |
import html
|
3 |
import logging
|
4 |
+
import textwrap
|
5 |
import traceback
|
6 |
from pathlib import Path
|
7 |
from typing import List
|
|
|
11 |
from rxn.utilities.logging import setup_console_logger
|
12 |
from rxn.utilities.strings import remove_postfix
|
13 |
|
14 |
+
from utils import TranslatorWithSentencePiece, download_cde_data, split_into_sentences
|
15 |
|
16 |
logger = logging.getLogger(__name__)
|
17 |
logger.addHandler(logging.NullHandler())
|
|
|
65 |
|
66 |
|
67 |
def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
|
68 |
+
logger.info(f'Extracting actions from paragraph "{textwrap.shorten(text, 60)}".')
|
69 |
+
|
70 |
+
download_cde_data()
|
71 |
+
|
72 |
model = load_model(model_type)
|
73 |
sentences = split_into_sentences(text)
|
74 |
action_strings = model.translate(sentences)
|
utils.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import logging
|
2 |
from typing import Iterable, Iterator, List, Union
|
3 |
|
4 |
-
import sentencepiece as spm
|
5 |
import chemdataextractor
|
|
|
|
|
6 |
from rxn.onmt_utils.internal_translation_utils import TranslationResult
|
7 |
from rxn.onmt_utils.translator import Translator
|
8 |
|
@@ -10,6 +11,16 @@ logger = logging.getLogger(__name__)
|
|
10 |
logger.addHandler(logging.NullHandler())
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def split_into_sentences(text: str) -> List[str]:
|
14 |
paragraph = chemdataextractor.doc.Paragraph(text)
|
15 |
return [sentence.text for sentence in paragraph.sentences]
|
|
|
1 |
import logging
|
2 |
from typing import Iterable, Iterator, List, Union
|
3 |
|
|
|
4 |
import chemdataextractor
|
5 |
+
import sentencepiece as spm
|
6 |
+
from chemdataextractor.data import Package
|
7 |
from rxn.onmt_utils.internal_translation_utils import TranslationResult
|
8 |
from rxn.onmt_utils.translator import Translator
|
9 |
|
|
|
11 |
logger.addHandler(logging.NullHandler())
|
12 |
|
13 |
|
14 |
+
def download_cde_data() -> None:
|
15 |
+
package = Package("models/punkt_chem-1.0.pickle")
|
16 |
+
if package.local_exists():
|
17 |
+
return
|
18 |
+
|
19 |
+
logger.info("Downloading the necessary ChemDataExtractor data...")
|
20 |
+
package.download()
|
21 |
+
logger.info("Downloading the necessary ChemDataExtractor data... Done.")
|
22 |
+
|
23 |
+
|
24 |
def split_into_sentences(text: str) -> List[str]:
|
25 |
paragraph = chemdataextractor.doc.Paragraph(text)
|
26 |
return [sentence.text for sentence in paragraph.sentences]
|