Spaces:
Runtime error
Runtime error
import pandas as pd | |
from biopandas.pdb import PandasPdb | |
from prody import parsePDBHeader | |
def read_pdb_to_dataframe( | |
pdb_path, | |
model_index: int = 1, | |
parse_header: bool = True, | |
) -> pd.DataFrame: | |
""" | |
Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata. | |
Args: | |
pdb_path (str, optional): Path to a local PDB file to read. Defaults to None. | |
model_index (int, optional): Index of the model to extract from the PDB file, in case | |
it contains multiple models. Defaults to 1. | |
parse_header (bool, optional): Whether to parse the PDB header and extract metadata. | |
Defaults to True. | |
Returns: | |
pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row | |
per atom | |
""" | |
atomic_df = PandasPdb().read_pdb(pdb_path) | |
if parse_header: | |
header = parsePDBHeader(pdb_path) | |
else: | |
header = None | |
atomic_df = atomic_df.get_model(model_index) | |
if len(atomic_df.df["ATOM"]) == 0: | |
raise ValueError(f"No model found for index: {model_index}") | |
return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"]]), header | |
from graphein.protein.graphs import label_node_id | |
def process_dataframe(df: pd.DataFrame, granularity='CA') -> pd.DataFrame: | |
""" | |
Process a DataFrame of protein structure data to reduce ambiguity and simplify analysis. | |
This function performs the following steps: | |
1. Handles alternate locations for an atom, defaulting to keep the first one if multiple exist. | |
2. Assigns a unique node_id to each residue in the DataFrame, using a helper function label_node_id. | |
3. Filters the DataFrame based on specified granularity (defaults to 'CA' for alpha carbon). | |
Parameters | |
---------- | |
df : pd.DataFrame | |
The DataFrame containing protein structure data to process. It is expected to contain columns 'alt_loc' and 'atom_name'. | |
granularity : str, optional | |
The level of detail or perspective at which the DataFrame should be analyzed. Defaults to 'CA' (alpha carbon). | |
""" | |
# handle the case of alternative locations, | |
# if so default to the 1st one = A | |
if 'alt_loc' in df.columns: | |
df['alt_loc'] = df['alt_loc'].replace('', 'A') | |
df = df.loc[(df['alt_loc']=='A')] | |
df = label_node_id(df, granularity) | |
df = df.loc[(df['atom_name']==granularity)] | |
return df | |
from graphein.protein.graphs import initialise_graph_with_metadata | |
from graphein.protein.graphs import add_nodes_to_graph | |
from graphein.protein.visualisation import plotly_protein_structure_graph | |
from PIL import Image | |
import networkx as nx | |
def take_care(pdb_path): | |
df, header = read_pdb_to_dataframe(pdb_path) | |
process_df = process_dataframe(df) | |
g = initialise_graph_with_metadata(protein_df=process_df, # from above cell | |
raw_pdb_df=df, # Store this for traceability | |
pdb_code = '3nir', #and again | |
granularity = 'CA' # Store this so we know what kind of graph we have | |
) | |
g = add_nodes_to_graph(g) | |
def add_backbone_edges(G: nx.Graph) -> nx.Graph: | |
# Iterate over every chain | |
for chain_id in G.graph["chain_ids"]: | |
# Find chain residues | |
chain_residues = [ | |
(n, v) for n, v in G.nodes(data=True) if v["chain_id"] == chain_id | |
] | |
# Iterate over every residue in chain | |
for i, residue in enumerate(chain_residues): | |
try: | |
# Checks not at chain terminus | |
if i == len(chain_residues) - 1: | |
continue | |
# Asserts residues are on the same chain | |
cond_1 = ( residue[1]["chain_id"] == chain_residues[i + 1][1]["chain_id"]) | |
# Asserts residue numbers are adjacent | |
cond_2 = (abs(residue[1]["residue_number"] - chain_residues[i + 1][1]["residue_number"])== 1) | |
# If this checks out, we add a peptide bond | |
if (cond_1) and (cond_2): | |
# Adds "peptide bond" between current residue and the next | |
if G.has_edge(i, i + 1): | |
G.edges[i, i + 1]["kind"].add('backbone_bond') | |
else: | |
G.add_edge(residue[0],chain_residues[i + 1][0],kind={'backbone_bond'},) | |
except IndexError as e: | |
print(e) | |
return G | |
g = add_backbone_edges(g) | |
p = plotly_protein_structure_graph( | |
g, | |
colour_edges_by="kind", | |
colour_nodes_by="seq_position", | |
label_node_ids=False, | |
plot_title="Backbone Protein Graph", | |
node_size_multiplier=1, | |
) | |
image_file = "protein_graph.png" | |
p.write_image(image_file, format='png') | |
# Load the PNG image into a PIL image | |
image = Image.open(image_file) | |
return image |