## Installing GenNet

All comments cells that open with **%%bash** are inputs for the command line.

The requirements for GenNet can be found in the README of the github: https://github.com/ArnovanHilten/GenNet

**Note:** GenNet can run on CPU and GPU. If you want to use GenNet with your GPU you need to ensure that you have CUDA installed that works with the tensorflow version.

In [None]:
%%bash
# git clone https://github.com/ArnovanHilten/GenNet.git  # Cloning the repositorie
git clone https://github.com/lnalinaf/GenNet.git

Cloning into 'GenNet'...


Move into the GenNet directory

Install the requirements

In [None]:
#@title Default title text
%%bash
 # navigate to the created folder
pip3 install --upgrade pip  # update pip is recommended but not a necessity
cd GenNet
pip install -r requirements_GenNet.txt # install all the requirementes voor GenNet.

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting jupyter>=1.0.0 (from -r requirements_GenNet.txt (line 5))
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting protobuf<=3.20.2,>=3.11 (from -r requirements_GenNet.txt (line 18))
  Downloading protobuf-3.20.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 56.2 MB/s eta 0:00:00
Collecting kaleido (from -r requirements_GenNet.txt (line 20))
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 79.9/79.9 MB 11.4 MB/s eta 0:00:00
Collecting tensorflow==2.8.0 (from -r requirements_GenNet.txt (line 21))
  Downloading tensorflow-2.8.0-cp310-cp310-manylinux2010_x86_64.whl (497.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 497.6

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.


In [None]:
import os
os.chdir("/content/GenNet")
import sys
#sys.path.append("/content/root_build/")
#sys.path.append('/usr/local/lib/python3.7/dist-packages')
import glob
import numpy as np
import pandas as pd
#sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import matplotlib

matplotlib.use('agg')
import tensorflow as tf
import tensorflow.keras as K
import scipy
import tables
tf.keras.backend.set_epsilon(0.0000001)
tf_version = tf.__version__  # ToDo use packaging.version
if tf_version <= '1.13.1':
    from GenNet_utils.LocallyDirectedConnected import LocallyDirected1D
    print('= or less then 1.13.1: tensorflow version is', tf_version)
elif tf_version >= '2.0':
    from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D
    print('= or more then 2.0: tensorflow version is', tf_version)
else:
    print("unexpected tensorflow version")
    from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D

= or more then 2.0: tensorflow version is 2.8.0


In [None]:
def layer_block(model, mask, i, regression):
    
    if regression:
        activation_type="relu"
    else:
        activation_type="tanh"
    
    model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
                              name="LocallyDirected_" + str(i))(model)
    model = K.layers.Activation(activation_type)(model)
    model = K.layers.BatchNormalization(center=False, scale=False)(model)
    return model


def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain):
    if num_covariates > 0:
        model = activation_layer(model, regression, negative_values_ytrain)
        model = K.layers.concatenate([model, input_cov], axis=1)
        model = K.layers.BatchNormalization(center=False, scale=False)(model)
        model = K.layers.Dense(units=1, bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)
    return model


def activation_layer(model, regression, negative_values_ytrain):   
    if regression: 
        if negative_values_ytrain:
            model = K.layers.Activation("linear")(model)
            print('using a linear activation function')
        else:
            model = K.layers.Activation("relu")(model)
            print('using a relu activation function')
    else:
        model = K.layers.Activation("sigmoid")(model)
        
    return model

def create_network_from_npz(datapath,
                            inputsize,
                            genotype_path,
                            l1_value=0.01,
                            regression=False,
                            num_covariates=0,
                            mask_order = []):
    print("Creating networks from npz masks")
    print("regression", regression)
    if regression:
        mean_ytrain, negative_values_ytrain = regression_properties(datapath)
    else:
        mean_ytrain = 0
        negative_values_ytrain = False

    masks = []
    mask_shapes_x = []
    mask_shapes_y = []

    print(mask_order)

    if len(mask_order) > 0:  # if mask_order is defined we use this order
        for mask in mask_order:
            mask = scipy.sparse.load_npz(datapath + '/'+str(mask)+'.npz')
            masks.append(mask)
            mask_shapes_x.append(mask.shape[0])
            mask_shapes_y.append(mask.shape[1])

        for x in range(len(masks) - 1):  # check that the masks fit eachother
            assert mask_shapes_y[x] == mask_shapes_x[x + 1]
    else:
        # if mask order is not defined we can sort the mask by the size
        for npz_path in glob.glob(datapath + '/*.npz'):
            mask = scipy.sparse.load_npz(npz_path)
            masks.append(mask)
            mask_shapes_x.append(mask.shape[0])
            mask_shapes_y.append(mask.shape[1])

        for i in range(len(masks)):  # sort all the masks in the correct order
            argsort_x = np.argsort(mask_shapes_x)[::-1]
            argsort_y = np.argsort(mask_shapes_y)[::-1]

            mask_shapes_x = np.array(mask_shapes_x)
            mask_shapes_y = np.array(mask_shapes_y)
            assert all(argsort_x == argsort_y)  # check that both dimensions have the same order

            masks = [masks[i] for i in argsort_y]  # sort masks
            mask_shapes_x = mask_shapes_x[argsort_x]
            mask_shapes_y = mask_shapes_y[argsort_y]

            for x in range(len(masks) - 1):  # check that the masks fit eachother
                assert mask_shapes_y[x] == mask_shapes_x[x + 1]
    print('mask_shapes_x[0]', mask_shapes_x[0])
    assert mask_shapes_x[0] == inputsize
    print('mask_shapes_y[-1]', mask_shapes_y[-1])
    if mask_shapes_y[-1] == 1:  # should we end with a dense layer?
        all_masks_available = True
    else:
        all_masks_available = False

    input_layer = K.Input((inputsize,), name='input_layer')
    input_cov = K.Input((num_covariates,), name='inputs_cov')

    model = K.layers.Reshape(input_shape=(inputsize,), target_shape=(inputsize, 1))(input_layer)

    for i in range(len(masks)):
        mask = masks[i]
        model = layer_block(model, mask, i, regression)

    model = K.layers.Flatten()(model)

    if all_masks_available:
        model = LocallyDirected1D(mask=masks[-1], filters=1, input_shape=(mask.shape[0], 1),
                                  name="output_layer")(model)
    else:
        model = K.layers.Dense(units=1, name="output_layer",
                               kernel_regularizer=tf.keras.regularizers.l1(l=l1_value)
                               )(model)

    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain)

    output_layer = activation_layer(model, regression, negative_values_ytrain)
    model = K.Model(inputs=[input_layer, input_cov], outputs=output_layer)

    print(model.summary())

    return model, masks

In [None]:
from google.colab import files
uploaded = files.upload()

Saving gene_ensmbl_GTEx_brain_mask_tstat.npz to gene_ensmbl_GTEx_brain_mask_tstat.npz
Saving gene_ensmbl_GTEx_mask_tstat.npz to gene_ensmbl_GTEx_mask_tstat.npz
Saving gene_ensmbl_ImmGen_mask_tstat.npz to gene_ensmbl_ImmGen_mask_tstat.npz
Saving SNP_exon_mask.npz to SNP_exon_mask.npz
Saving UKBB_sparse_connection_mask_ensmb_alligned.npz to UKBB_sparse_connection_mask_ensmb_alligned.npz
Saving UKBB_sparse_connection_mask_refseq_alligned.npz to UKBB_sparse_connection_mask_refseq_alligned.npz


In [None]:
from google.colab import files
uploaded = files.upload()

Saving bestweight_job_diabetes.h5 to bestweight_job_diabetes.h5


In [None]:
from tensorflow.python.framework.ops import disable_eager_execution

disable_eager_execution()

In [None]:
"experiment"
datapath = '/content/GenNet/'
inputsize = 6986636
num_covariates = 0
genotype_path = datapath
l1_value = 0.001
model, masks = create_network_from_npz(datapath=datapath, inputsize=inputsize, genotype_path=genotype_path,mask_order=['UKBB_sparse_connection_mask_refseq_alligned'],
                                               l1_value=l1_value, regression=False, num_covariates=num_covariates, )

Creating networks from npz masks
regression False
['UKBB_sparse_connection_mask_refseq_alligned']
mask_shapes_x[0] 6986636
mask_shapes_y[-1] 15827


Instructions for updating:
Colocations handled automatically by placer.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 6986636)]    0           []                               
                                                                                                  
 reshape_1 (Reshape)            (None, 6986636, 1)   0           ['input_layer[0][0]']            
                                                                                                  
 activation (Activation)        (None, 15827, 1)     0           ['LocallyDirected_0[0][0]']      
                                                                                                  
 batch_normalization (BatchNorm  (None, 15827, 1)    2           ['activation[0][0]']             
 alization)                                                                                   

In [None]:
from google.colab import files
uploaded = files.upload()

Saving bestweight_job.h5 to bestweight_job.h5


In [None]:
from google.colab import files
uploaded = files.upload()

KeyboardInterrupt: ignored

In [None]:
model.load_weights(datapath + 'bestweight_job.h5')

ValueError: ignored

In [None]:
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 6986636)]    0           []                               
                                                                                                  
 reshape_7 (Reshape)            (None, 6986636, 1)   0           ['input_layer[0][0]']            
                                                                                                  
 activation_15 (Activation)     (None, 21476, 1)     0           ['LocallyDirected_0[0][0]']      
                                                                                                  
 batch_normalization_8 (BatchNo  (None, 21476, 1)    2           ['activation_15[0][0]']          
 rmalization)                                                                               

In [None]:
#upload genotype.h5
from google.colab import files
uploaded = files.upload()

Saving genotype.h5 to genotype.h5


In [None]:
def get_testdata(datapath):
    # ytest = pd.read_csv(datapath + "ytest_"+studyname+".csv")
    h5file = tables.open_file(datapath  + studyname + '_genotype_processed.h5', "r")
    # ybatch = ytest["labels"]
    # xbatchid = np.array(ytest["tot_index"].values, dtype=np.int64)
    xbatch = h5file.root.data[:]
    # ybatch = np.reshape(np.array(ybatch), (-1, 1))
    h5file.close()
    return xbatch

Saving genotype.h5 to genotype.h5


In [None]:
xtest = get_testdata(datapath)
pred = model.predict(xtest)
print('model prediction: ', pred)

In [None]:
!python GenNet.py plot --help

In [None]:
!python GenNet.py plot -type sunburst -ID 100001

2023-03-10 11:03:58.890805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-03-10 11:03:58.891032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
bitarray failed to import this might give some trouble converting binary files
Resultspath did not exist but is made now
Traceback (most recent call last):
  File "/content/GenNet/GenNet.py", line 284, in <module>
    main()
  File "/content/GenNet/GenNet.py", line 26, in main
    plot(args)
  File "/content/GenNet/GenNet_utils/Create_plots.py", line 233, in plot
    importance