TianlaiChen commited on
Commit
9b1cba9
·
1 Parent(s): 9fec676
Files changed (2) hide show
  1. app.py +16 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, AutoModelForMaskedLM
3
  import torch
4
  from torch.distributions.categorical import Categorical
5
  import numpy as np
 
6
 
7
  # Load the model and tokenizer
8
  tokenizer = AutoTokenizer.from_pretrained("TianlaiChen/PepMLM-650M")
@@ -57,8 +58,17 @@ def generate_peptide(protein_seq, peptide_length, top_k, num_binders):
57
 
58
  # Add the generated binder and its PPL to the results list
59
  binders_with_ppl.append([generated_binder, ppl_value])
 
 
 
 
 
 
 
 
 
 
60
 
61
- return binders_with_ppl
62
 
63
  # Define the Gradio interface
64
  interface = gr.Interface(
@@ -67,13 +77,16 @@ interface = gr.Interface(
67
  gr.Textbox(label="Protein Sequence", info="Enter protein sequence here", type="text"),
68
  gr.Slider(3, 50, value=15, label="Peptide Length", step=1, info='Default value is 15'),
69
  gr.Slider(1, 10, value=3, label="Top K Value", step=1, info='Default value is 3'),
70
- gr.Dropdown(choices=[1, 2, 4, 8, 16, 32], label="Number of Binders", default=1)
71
  ],
72
- outputs=gr.Dataframe(
 
73
  headers=["Binder", "Perplexity"],
74
  datatype=["str", "number"],
75
  col_count=(2, "fixed")
76
  ),
 
 
77
  title="PepMLM: Target Sequence-Conditioned Generation of Peptide Binders via Masked Language Modeling"
78
  )
79
 
 
3
  import torch
4
  from torch.distributions.categorical import Categorical
5
  import numpy as np
6
+ import pandas as pd
7
 
8
  # Load the model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained("TianlaiChen/PepMLM-650M")
 
58
 
59
  # Add the generated binder and its PPL to the results list
60
  binders_with_ppl.append([generated_binder, ppl_value])
61
+
62
+ # Convert the list of lists to a pandas dataframe
63
+ df = pd.DataFrame(binders_with_ppl, columns=["Binder", "Perplexity"])
64
+
65
+ # Save the dataframe to a CSV file
66
+ output_filename = "output.csv"
67
+ df.to_csv(output_filename, index=False)
68
+
69
+
70
+ return binders_with_ppl, output_filename
71
 
 
72
 
73
  # Define the Gradio interface
74
  interface = gr.Interface(
 
77
  gr.Textbox(label="Protein Sequence", info="Enter protein sequence here", type="text"),
78
  gr.Slider(3, 50, value=15, label="Peptide Length", step=1, info='Default value is 15'),
79
  gr.Slider(1, 10, value=3, label="Top K Value", step=1, info='Default value is 3'),
80
+ gr.Dropdown(choices=[1, 2, 4, 8, 16, 32], label="Number of Binders", value=1)
81
  ],
82
+ outputs=[
83
+ gr.Dataframe(
84
  headers=["Binder", "Perplexity"],
85
  datatype=["str", "number"],
86
  col_count=(2, "fixed")
87
  ),
88
+ gr.outputs.File(label="Download CSV")
89
+ ],
90
  title="PepMLM: Target Sequence-Conditioned Generation of Peptide Binders via Masked Language Modeling"
91
  )
92
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  transformers
2
  gradio
3
  torch
4
- numpy
 
 
1
  transformers
2
  gradio
3
  torch
4
+ numpy
5
+ pandas