Commit 697818c7 authored by Federico Julian Camerota Verdu's avatar Federico Julian Camerota Verdu
Browse files

Added mdtraj and mdtraj+cudf notebooks

parent 50ede53e
data/
notebooks/.ipynb_checkpoints
%% Cell type:code id:355ef985 tags:
``` python
import sys
sys.path
sys.path.append('/dgx/home/userinternal/fcamerot/.conda/envs/mdtraj37/lib/python3.7/site-packages')
print(sys.path)
```
%% Cell type:markdown id:42ef43af tags:
## IMPORTS
%% Cell type:code id:worthy-electric tags:
``` python
import mdtraj as md
import pandas as pd
import numpy as np
import cupy as cu
np.set_printoptions(suppress=True)
import time
from sklearn.decomposition import PCA
import os
import cudf
import cuml
```
%% Cell type:markdown id:fe49c4d6 tags:
## INITIAL PREPARATIONS
%% Cell type:code id:narrow-retention tags:
``` python
dataDir = '/dgx/home/userinternal/fcamerot/ligatetrajectorybenchmark/data'
fileName = dataDir + 'nsp16_nsp10_6wkq' #~5GB trajectory
#fileName = dataDir + 'NSP12-7-8_6M71' #~10GB trajectory
topol = fileName + '.pdb'
trajs = fileName + '.xtc'
```
%% Cell type:code id:2b526c33 tags:
``` python
%%time
Ref = md.load(topol)
```
%% Cell type:code id:a3a47162 tags:
``` python
%%time
Ref.center_coordinates()
top = Ref.topology
```
%% Cell type:markdown id:2ed57674 tags:
## RMSD ANALYSIS
%% Cell type:code id:8935173a tags:
``` python
def rmsd(a, b):
a = cu.asarray(a, dtype=cu.float64)
b = cu.asarray(b, dtype=cu.float64)
N = b.shape[0]
if a.shape[1:] != b.shape[1:]:
raise ValueError('a and b must have same shape')
return cu.sqrt(cu.sum((a - b) ** 2) / N)
```
%% Cell type:markdown id:d75ca8e1 tags:
### First load, then compute
%% Cell type:code id:3abf6f02 tags:
``` python
%%time
trj = md.load(trajs,top=topol)
```
%% Cell type:code id:qualified-house tags:
``` python
%%time
trj.center_coordinates()
rmsd(trj.xyz, Ref.xyz)
```
%% Cell type:markdown id:5147257b tags:
### Iterate and compute
%% Cell type:code id:d5b26b6f tags:
``` python
%%time
for chunks in md.iterload(trajs, top=top):
chunks.center_coordinates()
rmsd(chunks.xyz, Ref.xyz)
```
%% Cell type:markdown id:78c9f4d4 tags:
## PCA ANALYSIS
%% Cell type:code id:a606f5c0 tags:
``` python
%%time
md_pca = cuml.PCA(n_components=20)
```
%% Cell type:code id:ed7f2f68 tags:
``` python
%%time
# Snapshots are already loaded at 500ps intervals
pca_trj = trj.xyz[:,:,:]
pca_trj = pca_trj.reshape(pca_trj.shape[0] , -1 )
df = cudf.DataFrame(pca_trj)
```
%% Cell type:code id:anticipated-edition tags:
``` python
%%time
# fit_transform gives us PCA projections (/PCA space)
proj = md_pca.fit_transform(df)
```
%% Cell type:code id:ef498b33 tags:
``` python
```
%% Cell type:code id:355ef985 tags:
``` python
import sys
sys.path
sys.path.append('/dgx/home/userinternal/fcamerot/.conda/envs/mdtraj37/lib/python3.7/site-packages')
print(sys.path)
```
%% Cell type:markdown id:42ef43af tags:
## IMPORTS
%% Cell type:code id:worthy-electric tags:
``` python
import mdtraj as md
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True)
import time
from sklearn.decomposition import PCA
import os
```
%% Cell type:markdown id:fe49c4d6 tags:
## INITIAL PREPARATIONS
%% Cell type:code id:narrow-retention tags:
``` python
dataDir = '/dgx/home/userinternal/fcamerot/ligatetrajectorybenchmark/data'
fileName = dataDir + 'nsp16_nsp10_6wkq' #~5GB trajectory
#fileName = dataDir + 'NSP12-7-8_6M71' #~10GB trajectory
topol = fileName + '.pdb'
trajs = fileName + '.xtc'
```
%% Cell type:code id:diagnostic-dimension tags:
``` python
%%time
Ref = md.load(topol)
```
%% Cell type:code id:ba5e890d tags:
``` python
%%time
Ref.center_coordinates()
top = Ref.topology
```
%% Cell type:markdown id:2ed57674 tags:
## RMSD ANALYSIS
%% Cell type:code id:qualified-house tags: