jannisborn
commited on
wip: initial version
Browse files- .gitignore +1 -0
- LICENSE +21 -0
- README.md +8 -7
- app.py +79 -0
- model_cards/article.md +23 -0
- model_cards/description.md +7 -0
- model_cards/examples.csv +5 -0
- requirements.txt +28 -0
- utils.py +49 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
|
|
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: MoLeR
|
3 |
+
emoji: 💡
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.9.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
python_version: 3.8.13
|
11 |
+
pypi_version: 20.2.4
|
12 |
---
|
13 |
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import pathlib
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
+
from gt4sd.algorithms.generation.moler import MoLeR, MoLeRDefaultGenerator
|
7 |
+
|
8 |
+
from gt4sd.algorithms.registry import ApplicationsRegistry
|
9 |
+
from utils import draw_grid_generate
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
logger.addHandler(logging.NullHandler())
|
13 |
+
|
14 |
+
TITLE = "MoLeR"
|
15 |
+
|
16 |
+
|
17 |
+
def run_inference(
|
18 |
+
algorithm_version: str,
|
19 |
+
scaffolds: str,
|
20 |
+
beam_size: int,
|
21 |
+
number_of_samples: int,
|
22 |
+
seed: int,
|
23 |
+
):
|
24 |
+
config = MoLeRDefaultGenerator(
|
25 |
+
algorithm_version=algorithm_version,
|
26 |
+
scaffolds=scaffolds,
|
27 |
+
beam_size=beam_size,
|
28 |
+
num_samples=4,
|
29 |
+
seed=seed,
|
30 |
+
)
|
31 |
+
model = MoLeR(configuration=config)
|
32 |
+
samples = list(model.sample(number_of_samples))
|
33 |
+
|
34 |
+
draw_grid_generate(samples)
|
35 |
+
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
|
39 |
+
# Preparation (retrieve all available algorithms)
|
40 |
+
all_algos = ApplicationsRegistry.list_available()
|
41 |
+
algos = [
|
42 |
+
x["algorithm_version"]
|
43 |
+
for x in list(filter(lambda x: TITLE in x["algorithm_name"], all_algos))
|
44 |
+
]
|
45 |
+
|
46 |
+
# Load metadata
|
47 |
+
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
48 |
+
|
49 |
+
examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
|
50 |
+
""
|
51 |
+
)
|
52 |
+
|
53 |
+
with open(metadata_root.joinpath("article.md"), "r") as f:
|
54 |
+
article = f.read()
|
55 |
+
with open(metadata_root.joinpath("description.md"), "r") as f:
|
56 |
+
description = f.read()
|
57 |
+
|
58 |
+
demo = gr.Interface(
|
59 |
+
fn=run_inference,
|
60 |
+
title=TITLE,
|
61 |
+
inputs=[
|
62 |
+
gr.Dropdown(algos, label="Algorithm version", value="v0"),
|
63 |
+
gr.Textbox(
|
64 |
+
label="Scaffolds",
|
65 |
+
placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
|
66 |
+
lines=1,
|
67 |
+
),
|
68 |
+
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Beam_size"),
|
69 |
+
gr.Slider(
|
70 |
+
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
71 |
+
),
|
72 |
+
gr.Number(value=42, label="Seed", precision=0),
|
73 |
+
],
|
74 |
+
outputs=gr.HTML(label="Output"),
|
75 |
+
article=article,
|
76 |
+
description=description,
|
77 |
+
examples=examples.values.tolist(),
|
78 |
+
)
|
79 |
+
demo.launch(debug=True, show_error=True)
|
model_cards/article.md
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Model card -- MoLeR
|
2 |
+
|
3 |
+
## Parameters
|
4 |
+
|
5 |
+
### Algorithm Version:
|
6 |
+
Which model checkpoint to use (trained on different datasets).
|
7 |
+
|
8 |
+
### Number of samples
|
9 |
+
How many samples should be generated (between 1 and 50).
|
10 |
+
|
11 |
+
|
12 |
+
## Citation
|
13 |
+
|
14 |
+
```bib
|
15 |
+
@inproceedings{maziarz2021learning,
|
16 |
+
author={Krzysztof Maziarz and Henry Richard Jackson{-}Flux and Pashmina Cameron and
|
17 |
+
Finton Sirockin and Nadine Schneider and Nikolaus Stiefl and Marwin H. S. Segler and Marc Brockschmidt},
|
18 |
+
title = {Learning to Extend Molecular Scaffolds with Structural Motifs},
|
19 |
+
booktitle = {The Tenth International Conference on Learning Representations, {ICLR}},
|
20 |
+
year = {2022}
|
21 |
+
}
|
22 |
+
```
|
23 |
+
|
model_cards/description.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
### Concurrent sequence regression and generation for molecular language modeling
|
3 |
+
|
4 |
+
The RT is a multitask Transformer that reformulates regression as a conditional sequence modeling task.
|
5 |
+
This yields a dichotomous language model that seamlessly integrates property prediction with property-driven conditional generation. For details see the [arXiv preprint](https://arxiv.org/abs/2202.01338), the [development code](https://github.com/IBM/regression-transformer) and the [GT4SD endpoint](https://github.com/GT4SD/gt4sd-core) for inference.
|
6 |
+
|
7 |
+
Each `algorithm_version` refers to one trained model. Each model can be used for **two tasks**, either to *predict* one (or multiple) properties of a molecule or to *generate* a molecule (given a seed molecule and a property constraint).
|
model_cards/examples.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
v0,,1,4,0
|
2 |
+
v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
|
3 |
+
v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2,3,5,5
|
4 |
+
|
5 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-f https://download.pytorch.org/whl/cpu/torch_stable.html
|
2 |
+
-f https://data.pyg.org/whl/torch-1.12.1+cpu.html
|
3 |
+
# pip==20.2.4
|
4 |
+
torch==1.12.1
|
5 |
+
torch-scatter
|
6 |
+
torch-spline-conv
|
7 |
+
torch-sparse
|
8 |
+
torch-geometric
|
9 |
+
torchvision==0.13.1
|
10 |
+
torchaudio==0.12.1
|
11 |
+
gt4sd>=1.0.0
|
12 |
+
molgx>=0.22.0a1
|
13 |
+
molecule_generation
|
14 |
+
nglview
|
15 |
+
PyTDC==0.3.7
|
16 |
+
gradio==3.12.0
|
17 |
+
markdown-it-py>=2.1.0
|
18 |
+
mols2grid>=0.2.0
|
19 |
+
pandas>=1.0.0
|
20 |
+
terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
|
21 |
+
guacamol_baselines @ git+https://github.com/GT4SD/[email protected]
|
22 |
+
moses @ git+https://github.com/GT4SD/[email protected]
|
23 |
+
paccmann_chemistry @ git+https://github.com/PaccMann/[email protected]
|
24 |
+
paccmann_generator @ git+https://github.com/PaccMann/[email protected]
|
25 |
+
paccmann_gp @ git+https://github.com/PaccMann/[email protected]
|
26 |
+
paccmann_omics @ git+https://github.com/PaccMann/[email protected]
|
27 |
+
paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
|
28 |
+
reinvent_models @ git+https://github.com/GT4SD/[email protected]
|
utils.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from collections import defaultdict
|
5 |
+
from typing import Dict, List, Tuple
|
6 |
+
|
7 |
+
import mols2grid
|
8 |
+
import pandas as pd
|
9 |
+
from rdkit import Chem
|
10 |
+
from terminator.selfies import decoder
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
logger.addHandler(logging.NullHandler())
|
14 |
+
|
15 |
+
|
16 |
+
def draw_grid_generate(
|
17 |
+
samples: List[Tuple[str]],
|
18 |
+
domain: str = "molecules",
|
19 |
+
n_cols: int = 5,
|
20 |
+
size=(140, 200),
|
21 |
+
) -> str:
|
22 |
+
"""
|
23 |
+
Uses mols2grid to draw a HTML grid for the generated molecules
|
24 |
+
|
25 |
+
Args:
|
26 |
+
samples: The generated samples.
|
27 |
+
n_cols: Number of columns in grid. Defaults to 5.
|
28 |
+
size: Size of molecule in grid. Defaults to (140, 200).
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
HTML to display
|
32 |
+
"""
|
33 |
+
|
34 |
+
result = defaultdict(list)
|
35 |
+
result.update(
|
36 |
+
{"SMILES": samples, "Name": [f"sample_{i}" for i in range(len(samples))]}
|
37 |
+
)
|
38 |
+
|
39 |
+
result_df = pd.DataFrame(result)
|
40 |
+
print('RESTULT', result_df)
|
41 |
+
obj = mols2grid.display(
|
42 |
+
result_df,
|
43 |
+
tooltip=list(result.keys()),
|
44 |
+
height=1100,
|
45 |
+
n_cols=n_cols,
|
46 |
+
name="Results",
|
47 |
+
size=size,
|
48 |
+
)
|
49 |
+
return obj.data
|