Commit 3b0f4af3 authored by Federico Julian Camerota Verdu's avatar Federico Julian Camerota Verdu
Browse files

Added cudf and dask notebooks

parent 5bd303ee
This diff is collapsed.
%% Cell type:code id:355ef985 tags:
``` python
import sys
sys.path
sys.path.append('/dgx/home/userinternal/fcamerot/.conda/envs/mdtraj37/lib/python3.7/site-packages')
print(sys.path)
```
%% Cell type:markdown id:42ef43af tags:
# IMPORTS
%% Cell type:code id:worthy-electric tags:
``` python
from cuml.dask.decomposition import PCA as dask_pca
import cupy as cu
import cudf
import dask
import dask.array as da
from dask_cuda import LocalCUDACluster
import dask_cudf
from dask.dataframe import from_array, from_pandas
from dask.distributed import Client
import mdtraj as md
import numpy as np
np.set_printoptions(suppress=True)
import pandas as pd
```
%% Cell type:code id:aafcc21e tags:
``` python
dask.__version__
```
%% Cell type:markdown id:fe49c4d6 tags:
# INITIAL PREPARATIONS
%% Cell type:code id:narrow-retention tags:
``` python
dataDir = '/dgx/home/userinternal/fcamerot/ligatetrajectorybenchmark/data/'
#fileName = dataDir + 'orf7a' #~1GB trajectory
#fileName = dataDir + 'nsp16_nsp10_6wkq' #~5GB trajectory
fileName = dataDir + 'NSP12-7-8_6M71' #~10GB trajectory
topol = fileName + '.pdb'
trajs = fileName + '.xtc'
```
%% Cell type:code id:2b526c33 tags:
``` python
Ref = md.load(topol)
```
%% Cell type:code id:a3a47162 tags:
``` python
Ref.center_coordinates()
top = Ref.topology
```
%% Cell type:markdown id:d528a651 tags:
### Create dask cluster and client
%% Cell type:code id:710a340a tags:
``` python
cluster = LocalCUDACluster(n_workers=4)
client = Client(cluster)
```
%% Cell type:markdown id:2ed57674 tags:
# RMSD ANALYSIS
%% Cell type:code id:8935173a tags:
``` python
def rmsd(a, b):
a = da.from_array(a)
a = a.map_blocks(cu.array)
b = da.from_array(b)
b = b.map_blocks(cu.array)
N = b.shape[0]
a -= b
return da.sqrt(da.sum(da.power(a, 2), axis=(1,2)) / N)
```
%% Cell type:markdown id:9c5ba5e6 tags:
### Load trajectory
%% Cell type:code id:3abf6f02 tags:
``` python
trj = md.load(trajs,top=topol)
```
%% Cell type:markdown id:daadb61b tags:
### Compute rmsd
%% Cell type:code id:qualified-house tags:
``` python
%%time
trj.center_coordinates()
rmsd(trj.xyz, Ref.xyz).compute()
```
%% Cell type:markdown id:78c9f4d4 tags:
# PCA ANALYSIS
%% Cell type:code id:bd26ba99 tags:
``` python
%%time
model = dask_pca(n_components=2)
model_trj = trj.xyz[:,:,:]
model_trj = model_trj.reshape(model_trj.shape[0] , -1 )
dask_df = from_pandas(pd.DataFrame(model_trj), npartitions=4)
dask_df = dask_df.map_partitions(cudf.from_pandas)
proj = model.fit_transform(dask_df)
proj = proj.compute()
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment