提交 a9e031a9 作者: 朱学凯

add deepdta

上级 33c9c6b2
AttentionDTA_BIBM @ d8c8a667
Subproject commit d8c8a6673c75c5e457ccf5bc0c187b131a06b7a1
DeepDTA @ 2c9cbafd
Subproject commit 2c9cbafdfb383f2f03bcea4b231b90a072e65b15
# About DeepDTA: deep drug-target binding affinity prediction
The approach used in this work is the modeling of protein sequences and compound 1D representations (SMILES) with convolutional neural networks (CNNs) to predict the binding affinity value of drug-target pairs.
![Figure](https://github.com/hkmztrk/DeepDTA/blob/master/docs/figures/deepdta.PNG)
# Installation
## Data
Please see the [readme](https://github.com/hkmztrk/DeepDTA/blob/master/data/README.md) for detailed explanation.
## Requirements
You'll need to install following in order to run the codes.
* [Python 3.4 <=](https://www.python.org/downloads/)
* [Keras 2.x](https://pypi.org/project/Keras/)
* [Tensorflow 1.x](https://www.tensorflow.org/install/)
* numpy
* matplotlib
You have to place "data" folder under "source" directory.
# Usage
```
python run_experiments.py --num_windows 32 \
--seq_window_lengths 8 12 \
--smi_window_lengths 4 8 \
--batch_size 256 \
--num_epoch 100 \
--max_seq_len 1000 \
--max_smi_len 100 \
--dataset_path 'data/kiba/' \
--problem_type 1 \
--log_dir 'logs/'
```
**For citation:**
```
@article{ozturk2018deepdta,
title={DeepDTA: deep drug--target binding affinity prediction},
author={{\"O}zt{\"u}rk, Hakime and {\"O}zg{\"u}r, Arzucan and Ozkirimli, Elif},
journal={Bioinformatics},
volume={34},
number={17},
pages={i821--i829},
year={2018},
publisher={Oxford University Press}
}
```
import argparse
import os
def argparser():
parser = argparse.ArgumentParser()
# for model
parser.add_argument(
'--seq_window_lengths',
type=int,
nargs='+',
help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)'
)
parser.add_argument(
'--smi_window_lengths',
type=int,
nargs='+',
help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)'
)
parser.add_argument(
'--num_windows',
type=int,
nargs='+',
help='Space seperated list of the number of motif filters corresponding to length list. (ex, --num_windows 100 200 100)'
)
parser.add_argument(
'--num_hidden',
type=int,
default=0,
help='Number of neurons in hidden layer.'
)
parser.add_argument(
'--num_classes',
type=int,
default=0,
help='Number of classes (families).'
)
parser.add_argument(
'--max_seq_len',
type=int,
default=0,
help='Length of input sequences.'
)
parser.add_argument(
'--max_smi_len',
type=int,
default=0,
help='Length of input sequences.'
)
# for learning
parser.add_argument(
'--learning_rate',
type=float,
default=0.001,
help='Initial learning rate.'
)
parser.add_argument(
'--num_epoch',
type=int,
default=100,
help='Number of epochs to train.'
)
parser.add_argument(
'--batch_size',
type=int,
default=256,
help='Batch size. Must divide evenly into the dataset sizes.'
)
parser.add_argument(
'--dataset_path',
type=str,
default='/data/kiba/',
help='Directory for input data.'
)
parser.add_argument(
'--problem_type',
type=int,
default=1,
help='Type of the prediction problem (1-4)'
)
parser.add_argument(
'--binary_th',
type=float,
default=0.0,
help='Threshold to split data into binary classes'
)
parser.add_argument(
'--is_log',
type=int,
default=0,
help='use log transformation for Y'
)
parser.add_argument(
'--checkpoint_path',
type=str,
default='',
help='Path to write checkpoint file.'
)
parser.add_argument(
'--log_dir',
type=str,
default='/tmp',
help='Directory for log data.'
)
parser.add_argument(
'--out',
type=str,
default='/pred',
help='Directory for log data.'
)
parser.add_argument(
'--model',
type=str,
default='/model',
help='Directory for log data.'
)
FLAGS, unparsed = parser.parse_known_args()
# check validity
#assert( len(FLAGS.window_lengths) == len(FLAGS.num_windows) )
return FLAGS
def logging(msg, FLAGS):
fpath = os.path.join( FLAGS.log_dir, "log.txt" )
with open( fpath, "a" ) as fw:
fw.write("%s\n" % msg)
#print(msg)
import sys, re, math, time
import numpy as np
import matplotlib.pyplot as plt
import json
import pickle
import collections
from collections import OrderedDict
from matplotlib.pyplot import cm
from spacy import load
#from keras.preprocessing.sequence import pad_sequences
## ######################## ##
#
# Define CHARSET, CHARLEN
#
## ######################## ##
# CHARPROTSET = { 'A': 0, 'C': 1, 'D': 2, 'E': 3, 'F': 4, 'G': 5, 'H': 6, \
# 'I': 7, 'K': 8, 'L': 9, 'M': 10, 'N': 11, 'P': 12, 'Q': 13, \
# 'R': 14, 'S': 15, 'T': 16, 'V': 17, 'W': 18, 'Y': 19, 'X': 20, \
# 'O': 20, 'U': 20,
# 'B': (2, 11),
# 'Z': (3, 13),
# 'J': (7, 9) }
# CHARPROTLEN = 21
CHARPROTSET = { "A": 1, "C": 2, "B": 3, "E": 4, "D": 5, "G": 6,
"F": 7, "I": 8, "H": 9, "K": 10, "M": 11, "L": 12,
"O": 13, "N": 14, "Q": 15, "P": 16, "S": 17, "R": 18,
"U": 19, "T": 20, "W": 21,
"V": 22, "Y": 23, "X": 24,
"Z": 25 }
CHARPROTLEN = 25
CHARCANSMISET = { "#": 1, "%": 2, ")": 3, "(": 4, "+": 5, "-": 6,
".": 7, "1": 8, "0": 9, "3": 10, "2": 11, "5": 12,
"4": 13, "7": 14, "6": 15, "9": 16, "8": 17, "=": 18,
"A": 19, "C": 20, "B": 21, "E": 22, "D": 23, "G": 24,
"F": 25, "I": 26, "H": 27, "K": 28, "M": 29, "L": 30,
"O": 31, "N": 32, "P": 33, "S": 34, "R": 35, "U": 36,
"T": 37, "W": 38, "V": 39, "Y": 40, "[": 41, "Z": 42,
"]": 43, "_": 44, "a": 45, "c": 46, "b": 47, "e": 48,
"d": 49, "g": 50, "f": 51, "i": 52, "h": 53, "m": 54,
"l": 55, "o": 56, "n": 57, "s": 58, "r": 59, "u": 60,
"t": 61, "y": 62}
CHARCANSMILEN = 62
CHARISOSMISET = {"#": 29, "%": 30, ")": 31, "(": 1, "+": 32, "-": 33, "/": 34, ".": 2,
"1": 35, "0": 3, "3": 36, "2": 4, "5": 37, "4": 5, "7": 38, "6": 6,
"9": 39, "8": 7, "=": 40, "A": 41, "@": 8, "C": 42, "B": 9, "E": 43,
"D": 10, "G": 44, "F": 11, "I": 45, "H": 12, "K": 46, "M": 47, "L": 13,
"O": 48, "N": 14, "P": 15, "S": 49, "R": 16, "U": 50, "T": 17, "W": 51,
"V": 18, "Y": 52, "[": 53, "Z": 19, "]": 54, "\\": 20, "a": 55, "c": 56,
"b": 21, "e": 57, "d": 22, "g": 58, "f": 23, "i": 59, "h": 24, "m": 60,
"l": 25, "o": 61, "n": 26, "s": 62, "r": 27, "u": 63, "t": 28, "y": 64}
CHARISOSMILEN = 64
## ######################## ##
#
# Encoding Helpers
#
## ######################## ##
# Y = -(np.log10(Y/(math.pow(math.e,9))))
def one_hot_smiles(line, MAX_SMI_LEN, smi_ch_ind):
X = np.zeros((MAX_SMI_LEN, len(smi_ch_ind))) #+1
for i, ch in enumerate(line[:MAX_SMI_LEN]):
X[i, (smi_ch_ind[ch]-1)] = 1
return X #.tolist()
def one_hot_sequence(line, MAX_SEQ_LEN, smi_ch_ind):
X = np.zeros((MAX_SEQ_LEN, len(smi_ch_ind)))
for i, ch in enumerate(line[:MAX_SEQ_LEN]):
X[i, (smi_ch_ind[ch])-1] = 1
return X #.tolist()
def label_smiles(line, MAX_SMI_LEN, smi_ch_ind):
X = np.zeros(MAX_SMI_LEN)
for i, ch in enumerate(line[:MAX_SMI_LEN]): # x, smi_ch_ind, y
X[i] = smi_ch_ind[ch]
return X #.tolist()
def label_sequence(line, MAX_SEQ_LEN, smi_ch_ind):
X = np.zeros(MAX_SEQ_LEN)
for i, ch in enumerate(line[:MAX_SEQ_LEN]):
X[i] = smi_ch_ind[ch]
return X #.tolist()
## ######################## ##
#
# DATASET Class
#
## ######################## ##
# works for large dataset
class DataSet(object):
def __init__(self, fpath, setting_no, seqlen, smilen, need_shuffle = False):
self.SEQLEN = seqlen
self.SMILEN = smilen
#self.NCLASSES = n_classes
self.charseqset = CHARPROTSET
self.charseqset_size = CHARPROTLEN
self.charsmiset = CHARISOSMISET ###HERE CAN BE EDITED
self.charsmiset_size = CHARISOSMILEN
self.PROBLEMSET = setting_no
# read raw file
# self._raw = self.read_sets( FLAGS)
# iteration flags
# self._num_data = len(self._raw)
def read_sets(self, FLAGS): ### fpath should be the dataset folder /kiba/ or /davis/
fpath = FLAGS.dataset_path
setting_no = FLAGS.problem_type
print("Reading %s start" % fpath)
test_fold = json.load(open(fpath + "folds/test_fold_setting" + str(setting_no)+".txt"))
train_folds = json.load(open(fpath + "folds/train_fold_setting" + str(setting_no)+".txt"))
return test_fold, train_folds
def parse_data(self, FLAGS, with_label=True):
fpath = FLAGS.dataset_path
print("Read %s start" % fpath)
ligands = json.load(open(fpath+"ligands_can.txt"), object_pairs_hook=OrderedDict)
proteins = json.load(open(fpath+"proteins.txt"), object_pairs_hook=OrderedDict)
Y = pickle.load(open(fpath + "Y","rb"), encoding='latin1') ### TODO: read from raw
if FLAGS.is_log:
Y = -(np.log10(Y/(math.pow(10,9))))
XD = []
XT = []
if with_label:
for d in ligands.keys():
XD.append(label_smiles(ligands[d], self.SMILEN, self.charsmiset))
for t in proteins.keys():
XT.append(label_sequence(proteins[t], self.SEQLEN, self.charseqset))
else:
for d in ligands.keys():
XD.append(one_hot_smiles(ligands[d], self.SMILEN, self.charsmiset))
for t in proteins.keys():
XT.append(one_hot_sequence(proteins[t], self.SEQLEN, self.charseqset))
return XD, XT, Y
class DataSet_for_new(object):
def __init__(self, fpath, setting_no, seqlen, smilen, need_shuffle = False):
self.SEQLEN = seqlen
self.SMILEN = smilen
#self.NCLASSES = n_classes
self.charseqset = CHARPROTSET
self.charseqset_size = CHARPROTLEN
self.charsmiset = CHARISOSMISET ###HERE CAN BE EDITED
self.charsmiset_size = CHARISOSMILEN
self.PROBLEMSET = setting_no
# read raw file
# self._raw = self.read_sets( FLAGS)
# iteration flags
# self._num_data = len(self._raw)
def read_sets(self, FLAGS): ### fpath should be the dataset folder /kiba/ or /davis/
fpath = FLAGS.dataset_path
setting_no = FLAGS.problem_type
print("Reading %s start" % fpath)
test_fold = json.load(open(fpath + "folds/test_fold_setting" + str(setting_no)+".txt"))
train_folds = json.load(open(fpath + "folds/train_fold_setting" + str(setting_no)+".txt"))
return test_fold, train_folds
def parse_data(self, FLAGS, with_label=True):
fpath = FLAGS.dataset_path
print("Read %s start" % fpath)
def load_file(file):
with open(file, 'r') as f:
data = f.readlines()
data = [i.strip() for i in data]
return data
# ligands = json.load(open(fpath+"ligands_can.txt"), object_pairs_hook=OrderedDict)
# proteins = json.load(open(fpath+"proteins.txt"), object_pairs_hook=OrderedDict)
ligands = load_file(fpath["ligand"])
proteins = load_file(fpath["protein"])
# Y = pickle.load(open(fpath + "Y","rb"), encoding='latin1') ### TODO: read from raw
# if FLAGS.is_log:
# Y = -(np.log10(Y/(math.pow(10,9))))
# 修改数据输入
Y = load_file(fpath["y"])
XD = []
XT = []
if with_label:
# for d in ligands.keys():
for d in ligands:
XD.append(label_smiles(d, self.SMILEN, self.charsmiset))
for t in proteins:
XT.append(label_sequence(t, self.SEQLEN, self.charseqset))
# else:
# for d in ligands.keys():
# XD.append(one_hot_smiles(ligands[d], self.SMILEN, self.charsmiset))
# for t in proteins.keys():
# XT.append(one_hot_sequence(proteins[t], self.SEQLEN, self.charseqset))
return XD, XT, Y
import numpy as np
def get_aupr(Y, P):
if hasattr(Y, 'A'): Y = Y.A
if hasattr(P, 'A'): P = P.A
Y = np.where(Y>0, 1, 0)
Y = Y.ravel()
P = P.ravel()
f = open("temp.txt", 'w')
for i in range(Y.shape[0]):
f.write("%f %d\n" %(P[i], Y[i]))
f.close()
f = open("foo.txt", 'w')
subprocess.call(["java", "-jar", "auc.jar", "temp.txt", "list"], stdout=f)
f.close()
f = open("foo.txt")
lines = f.readlines()
aucpr = float(lines[-2].split()[-1])
f.close()
return aucpr
def get_cindex(Y, P):
summ = 0
pair = 0
for i in range(1, len(Y)):
for j in range(0, i):
if i is not j:
if(Y[i] > Y[j]):
pair +=1
summ += 1* (P[i] > P[j]) + 0.5 * (P[i] == P[j])
if pair is not 0:
return summ/pair
else:
return 0
def r_squared_error(y_obs,y_pred):
y_obs = np.array(y_obs)
y_pred = np.array(y_pred)
y_obs_mean = [np.mean(y_obs) for y in y_obs]
y_pred_mean = [np.mean(y_pred) for y in y_pred]
mult = sum((y_pred - y_pred_mean) * (y_obs - y_obs_mean))
mult = mult * mult
y_obs_sq = sum((y_obs - y_obs_mean)*(y_obs - y_obs_mean))
y_pred_sq = sum((y_pred - y_pred_mean) * (y_pred - y_pred_mean) )
return mult / float(y_obs_sq * y_pred_sq)
def get_k(y_obs,y_pred):
y_obs = np.array(y_obs)
y_pred = np.array(y_pred)
return sum(y_obs*y_pred) / float(sum(y_pred*y_pred))
def squared_error_zero(y_obs,y_pred):
k = get_k(y_obs,y_pred)
y_obs = np.array(y_obs)
y_pred = np.array(y_pred)
y_obs_mean = [np.mean(y_obs) for y in y_obs]
upp = sum((y_obs - (k*y_pred)) * (y_obs - (k* y_pred)))
down= sum((y_obs - y_obs_mean)*(y_obs - y_obs_mean))
return 1 - (upp / float(down))
def get_rm2(ys_orig,ys_line):
r2 = r_squared_error(ys_orig, ys_line)
r02 = squared_error_zero(ys_orig, ys_line)
return r2 * (1 - np.sqrt(np.absolute((r2*r2)-(r02*r02))))
\ No newline at end of file
python run_experiments.py --num_windows 32 \
--seq_window_lengths 8 12 \
--smi_window_lengths 4 8 \
--batch_size 256 \
--num_epoch 100 \
--max_seq_len 1000 \
--max_smi_len 100 \
--dataset_path 'data/kiba/' \
--problem_type 1 \
--is_log 0 \
--log_dir 'logs/'
RMSE : 1.485262829572667 ; Pearson Correlation Coefficient : 0.17810684496134926
\ No newline at end of file
6.467522
6.175456
5.5424833
5.7581935
5.435541
5.429863
5.536461
5.5640984
5.882646
5.4769373
5.488556
5.625363
5.313736
5.808272
5.6363673
5.990212
5.051353
6.1895947
6.319407
5.8177853
5.7808843
5.7299786
5.7299786
6.176168
5.6491227
6.741143
4.996721
5.6749225
4.9942102
5.852823
5.673327
5.219913
5.455124
5.543934
5.543934
6.3291707
5.3239236
6.5188212
5.399699
6.225211
7.1990366
6.022619
6.317709
7.2006974
6.2298374
7.3442764
5.3265986
6.6267376
7.236839
6.809347
5.998489
5.5369587
5.222605
5.4759645
4.7184076
4.7446933
4.7446933
4.9453917
4.8579555
5.688682
5.3638024
6.6970015
6.922022
5.2373786
6.1030083
5.9858603
6.2201657
6.695669
6.3168545
5.911902
7.0040517
5.64722
6.207646
6.664865
5.4331574
5.383293
5.3065886
5.8029103
4.62703
5.880394
6.6966767
4.939882
5.5465236
5.925244
6.534122
6.702632
6.4006934
5.208909
4.9819055
5.177346
5.282094
4.9699063
4.968585
5.7913265
4.2424197
4.4141154
4.4141154
4.504127
3.9476442
5.917233
4.9871855
5.5201497
5.50072
6.2361526
5.6124153
6.410565
5.0378847
6.1442685
5.4821434
5.0435605
5.075498
4.989577
5.8059764
6.0818005
4.9550357
5.2234464
7.094842
5.3365226
6.8286414
4.292056
4.778266
6.6305947
5.652393
7.5110865
5.3670945
5.2828608
5.45501
6.0797043
6.1816125
6.1614866
6.103062
6.0777783
6.0552096
6.1117387
6.0335836
7.0962687
6.6434574
6.930296
7.367653
5.395125
5.035877
5.3762827
6.1513405
6.1658254
5.8107777
5.7238894
5.7267556
5.7915087
5.854144
5.7156963
6.226131
5.684789
5.570505
6.7114034
5.4907975
6.784401
6.245473
5.050832
5.5548
6.0875235
6.6564093
4.6011233
5.4594274
6.0246763
6.407019
5.9934154
5.359483
5.359483
5.6769543
5.6910224
6.1209445
5.5472794
6.472957
6.8773623
6.288406
6.348208
7.244477
7.1262827
6.449307
6.6500163
6.635145
5.7422414
5.5459576
6.482847
6.79241
7.0690594
5.228507
5.5705495
7.0427613
5.228507
5.767734
6.946606
5.1587777
6.2449603
5.3617554
7.282804
5.799242
6.132366
6.6706414
7.5444317
5.9720287
6.4760647
6.1630926
7.4778113
7.1809187
6.490697
7.2526608
6.5888877
6.2422147
7.1239395
6.377097
5.5686545
6.1431546
5.850873
6.8660583
6.557059
6.366646
7.0568414
6.088338
7.244094
5.8533297
6.2203307
7.5047336
5.7495475
6.666377
6.3046403
4.3621182
5.652291
8.111586
6.066658
6.2466297
7.560285
6.1744814
6.593776
6.09479
5.6612544
5.939238
7.0114264
6.1705194
5.53363
7.4440317
6.313761
5.314995
6.119054
6.1605816
5.9663806
7.089019
6.0204525
6.3210897
7.2732425
6.473473
6.3467903
6.798541
6.2466297
5.4906287
5.604602
6.0458856
6.3662863
5.373706
5.722967
5.6015425
5.9969525
6.857416
5.22418
5.475809
6.41353
6.5152855
5.2457695
6.7597604
5.3962045
6.0832744
5.25008
6.612254
5.2253714
6.063141
5.4104376
5.572791
6.1929307
6.3741198
6.7629385
7.553162
7.4631696
6.5738697
6.4958653
5.3231397
5.4144382
6.32794
7.419338
7.4119635
6.5656724
6.2679086
6.078931
6.869893
6.956627
6.1463575
6.0671277
6.9165044
5.706223
6.5210114
6.0629053
5.8247347
6.540231
7.2961597
6.316755
6.149106
5.897488
7.3261623
6.429786
5.8642225
7.4559665
6.094894
6.3332133
6.0173855
6.1059537
6.1190944
5.7668066
5.9509315
6.3232346
7.4643135
7.394734
7.3374534
5.5829897
5.4383006
6.1083565
6.768558
6.7527885
4.825678
6.5371857
5.546511
5.6412683
6.221419
5.4170027
6.107479
6.7604346
5.6207657
5.266999
6.107479
5.537837
6.997796
5.4641423
6.148657
4.6983767
5.8901777
5.946473
6.5990458
7.178522
6.350192
6.383547
6.664338
6.483536
6.238896
6.2137227
6.8680735
6.239347
6.8917856
7.1500893
5.336475
6.185486
5.8683696
7.3107433
6.3226795
5.5120544
5.8082886
6.6652327
6.453342
7.2482004
5.4125047
7.7132316
5.2179446
6.0671873
7.1874647
6.0671873
6.366949
5.462262
7.044528
5.051142
5.9070535
6.2963076
6.3234015
7.3026643
6.5437765
5.8900414
7.804816
7.0186996
5.9860735
6.373691
6.1429577
6.501974
6.8024426
7.838379
7.1103787
6.674358
6.6560836
5.846259
6.8628078
7.0601616
6.767179
5.414812
6.167569
5.811791
6.2401786
6.136362
5.629062
5.39774
5.7583795
5.921384
5.883592
6.8214016
6.309103
7.3759375
7.180337
5.689269
5.732831
6.8184648
6.536885
7.0938787
5.6343565
5.703103
7.208612
6.1457124
7.0331993
5.978574
6.5694127
5.490088
6.3470507
7.191054
4.9275794
6.189489
7.3133535
5.484551
6.9921017
6.465222
7.1819453
7.2947345
6.1080327
6.088584
7.2646194
7.166979
6.284159
5.817567
5.276581
5.486908
5.693727
6.1640515
6.6251516
5.727744
5.416168
6.245607
6.894817
7.4190426
7.395757
5.549125
6.1100225
7.7290735
6.2098575
5.459329
5.776312
5.5599875
6.7275357
5.513232
6.311738
6.1481915
5.357658
5.6171327
5.504918
6.066611
6.1421914
6.2803297
5.6643953
6.0860615
6.061245
6.2403326
5.354926
6.075662
5.520243
6.2785616
4.694001
6.130923
5.403153
4.9646115
5.2796474
5.330424
5.3950186
5.4156322
5.635037
7.7476335
4.9557385
6.0133843
4.9308424
5.2614126
5.4945397
6.340603
5.0584426
5.7315745
5.065018
4.2212844
6.0483475
5.234022
4.76302
5.5551796
5.23341
5.210362
5.110138
5.5016603
6.439883
6.080308
5.6813083
6.3253994
5.819166
6.5111785
5.585166
5.1373324
5.597088
6.3559093
5.7910986
5.461396
7.5587125
5.7162747
5.302396
5.75258
7.333736
6.084009
5.0692205
5.241418
6.0477796
5.613452
6.829801
5.3483257
5.9041533
6.10507
6.387156
5.2621484
6.360398
5.093281
6.4135156
5.3720284
5.6075544
5.870746
6.3124127
5.5370436
5.6837006
5.7438555
5.536025
5.910895
5.8842626
6.0206275
4.872495
6.0826097
5.8955636
5.772381
5.00487
5.4394894
5.624068
7.2013736
6.2933707
6.0808144
5.536216
5.9835877
5.1118097
5.260807
5.570847
5.774814
5.0330424
5.633846
5.506184
4.79763
5.709769
5.7201366
6.673873
5.5720124
5.3263655
5.802265
5.8956504
5.971075
6.1701164
6.546115
5.7940593
5.654686
5.650679
6.114338
6.9369965
5.005792
5.2866864
5.535933
5.6892242
6.296921
5.7082005
6.0808687
5.5533624
5.8792586
5.7111464
5.7488804
5.1158767
5.456522
5.2944965
4.844138
5.165123
6.272952
6.227293
5.6877546
6.0242944
7.09873
6.280388
5.0570455
6.835377
5.0329375
6.271915
6.456241
6.5826225
5.275297
4.9394565
5.844745
4.992376
4.359143
5.8388567
6.202779
5.31049
5.7588096
6.6370883
5.344851
5.0862412
4.9653134
5.4524508
4.584369
5.9576993
5.0613866
5.6761823
5.4489317
5.2452593
5.9499335
6.341624
5.222492
5.3182554
6.1915507
6.6565595
6.2678857
6.167898
5.216836
4.9330845
6.163673
5.761212
5.777196
5.4803343
5.0002365
5.363882
4.7241426
6.001968
5.011861
5.2435713
5.385721
4.9913692
5.4485836
6.195839
5.2782316
5.15397
5.2147355
3.947084
5.866563
5.2386394
5.3488293
5.537148
6.279925
6.5004563
6.711674
5.123827
4.6733575
6.192616
6.230602
4.963586
5.543849
5.2388268
5.873175
5.298437
5.753235
5.72702
6.265403
6.061049
6.0983605
5.6412663
4.365433
5.0105605
5.1227226
5.488341
5.3545985
5.026865
5.5988894
5.5406065
4.925748
5.3008337
4.4458466
6.202154
5.4998484
5.4405184
5.9828353
6.2485967
5.361987
6.252941
5.6222343
5.592175
5.91426
5.3488817
5.0730023
6.5664697
5.6709094
5.4205346
5.4755836
5.9263487
6.1135435
5.2365828
6.389742
5.8542604
5.555178
5.5159826
6.2687693
5.486962
6.400485
4.9783463
5.6938205
6.247785
5.1759753
4.9569697
6.900387
5.211111
5.1996584
5.251429
5.4088726
5.475929
4.3215995
4.6796637
5.9881783
6.951918
5.158981
4.9406147
6.8510613
4.1983995
5.2218494
5.8812246
5.5123997
5.194318
4.9121003
5.144572
5.638291
5.4011235
5.3436866
5.499963
5.5898023
5.722905
5.687527
5.5680947
5.8099523
5.5097027
6.48778
5.736049
5.2097087
5.1820335
6.6507125
6.616532
6.221067
6.3871517
6.3871517
6.286934
6.9081984
6.286934
6.9081984
6.6990314
6.320643
6.2955394
6.5417485
6.419246
6.7017126
5.972306
5.593537
5.43418
7.1164546
5.905944
6.4156313
5.312497
5.945516
6.455089
6.5600963
5.989281
6.6487565
6.845637
7.55256
6.2125487
6.355933
6.262196
6.369975
6.230882
6.320643
6.436056
6.3782277
6.570935
6.489056
6.5716195
6.3782277
6.3010693
6.5750556
7.3701096
7.0298605
7.1175966
6.292645
6.0925097
6.094389
6.2471895
6.1777945
6.2675815
6.659286
5.6245112
7.3324537
5.670894
4.9676332
5.06271
4.7458615
6.5478196
5.2051744
5.213387
5.0545807
6.2032223
4.933809
6.7999773
6.37943
6.436173
5.7264333
6.7405252
5.573129
5.869438
5.4759645
6.378834
6.333488
6.100217
5.507454
5.4479175
4.9235926
5.3989277
5.085065
5.908596
5.7883053
7.2705007
6.94589
6.8676815
5.3989096
5.220262
5.0723066
4.967504
5.1772046
4.947744
5.631586
5.166439
5.656475
5.447201
5.5681186
6.291062
5.803462
6.7605786
5.9466567
5.932242
6.043377
6.3112845
5.3567986
6.4313383
6.067965
4.9574113
6.5868063
6.824912
7.011283
5.621954
6.5735836
6.233931
5.930412
6.700928
5.7168355
5.627964
5.7621074
5.531625
6.2479553
5.6877246
5.961213
5.1570897
6.146668
5.6236563
6.710904
6.6558375
5.3288183
5.313829
6.6436534
5.250808
5.056483
6.291795
5.5476236
5.373606
5.647381
6.8223333
5.8831334
5.464456
5.223276
5.7664294
5.005566
6.543031
5.7644773
6.0466204
5.197456
5.066023
6.1397996
5.0928364
5.449444
5.5406613
5.302817
5.7155867
5.690205
6.085121
5.6479006
5.968774
5.277886
4.9721637
5.141983
5.868268
5.0960402
5.4958825
6.888697
5.689254
7.246741
6.4569216
7.246741
6.7176867
5.96806
7.246741
5.127513
6.4972453
6.092105
5.9237747
6.476934
7.319737
6.2266574
6.9943686
6.503329
5.993003
6.2854657
5.5388966
7.033675
5.250325
7.0303593
5.784215
5.7090406
5.5398903
5.4250836
5.4678726
5.880919
4.5848045
4.5973973
5.8356805
4.8457956
6.2677946
5.804833
6.1843605
5.511601
5.745906
5.9973736
5.3361974
6.899275
5.052643
5.3279963
6.2756667
6.748947
6.4433417
5.4147453
5.7602544
5.995493
6.517574
5.7964764
5.756449
5.7015796
5.3037014
6.013344
5.8427305
6.1242275
5.328395
5.36347
4.0370135
5.7501745
6.083869
6.586791
6.8433857
6.5928197
7.375711
4.184068
4.9256296
5.274472
6.4419727
5.377065
5.3647532
6.4676127
5.707027
6.492631
6.507264
6.786269
5.4085755
5.961538
6.421152
6.009842
6.184998
6.194706
5.8600044
6.194706
5.766577
5.506866
7.148358
7.73028
5.235998
6.7807255
7.6936836
7.677403
6.0699534
5.7997165
7.2714953
5.933518
5.5884705
5.967518
4.85046
6.3284354
5.889121
6.0699534
6.2526226
6.8087096
5.1664944
6.893789
7.710523
6.2668576
5.4497004
4.7879705
6.4280524
6.633815
6.5708776
6.8266087
5.4514575
5.875547
7.6783714
6.2668576
5.0025067
6.1850924
5.308676
7.1538615
6.960084
7.5788445
5.6230187
7.233242
5.308676
7.1039195
5.5000815
5.891763
5.494062
5.9090996
6.8754225
7.266729
5.9655166
7.469999
5.796821
7.469951
5.4577727
7.266729
7.1538615
5.500397
5.809769
5.6330376
5.90072
5.5874615
6.1716623
6.022232
5.890116
5.891763
5.8908978
5.830562
6.005416
6.7033906
6.6815243
5.962167
6.2592874
6.335843
7.4758735
6.6545043
5.699082
5.7970614
5.948544
6.4484124
5.9513273
6.690463
5.757504
6.0261083
5.9205647
7.078332
5.9386554
5.5954394
5.913443
5.7262807
6.020115
6.7479596
7.0793085
6.143913
7.337945
6.498925
6.609576
5.9278646
6.4115324
6.9145627
5.4193535
5.529038
5.658442
7.3179717
6.8085446
7.2937202
6.300633
6.0707784
6.425224
5.8391795
7.0364804
6.802596
5.657187
6.4053726
5.361449
5.6154866
6.710414
5.7986093
6.180119
5.641009
6.608839
5.8286757
6.857249
5.192629
6.9102287
5.601957
7.016434
7.333718
5.3080096
6.767535
5.146874
7.1538615
7.0937257
5.817904
7.1551766
5.262034
6.5052304
5.308676
6.5945153
7.3038073
5.4937625
5.4190507
5.19725
5.4331875
5.551677
5.8899326
5.1104307
6.786726
5.603208
6.4982767
7.0305605
6.9125113
6.8070955
6.617151
6.819119
7.043898
6.9137826
5.2608347
5.3874426
5.43459
6.499948
5.4580803
5.991781
5.9642334
7.216374
5.8875966
7.288489
6.671123
5.9083867
7.723083
7.323798
5.9325633
5.7545996
7.660396
4.497191
6.6828775
6.8184133
6.565157
8.335036
7.280432
6.1306925
5.6986794
6.42279
7.158138
5.1008873
5.947463
6.8608756
6.757793
5.6224194
7.133646
7.478069
7.3342133
5.6451106
6.8659325
7.2923803
6.1306925
6.813074
5.6451693
6.6828775
6.11627
5.6391244
6.8865876
5.285064
7.5761304
7.8262296
7.413522
7.5259423
6.931205
5.7975683
6.931205
6.427707
5.7830963
7.891785
5.355285
6.575018
6.205735
5.4644556
7.4474983
5.81593
7.4846444
7.1167746
5.355285
5.4839234
6.5376797
6.507361
5.8048496
7.158224
7.40506
7.189181
6.0103397
5.3693852
6.033007
7.411433
6.7043843
5.350382
6.0198426
7.313643
5.242876
6.7147474
6.1955814
6.201557
6.033007
6.895689
6.94712
5.5774407
6.552561
6.1911273
5.3538375
6.4487123
6.7837596
6.6780787
6.46029
5.9350615
6.3753114
6.220034
5.4951243
6.2052674
6.199719
5.350382
7.470936
7.1710763
5.355625
7.1031995
7.4039483
5.5698967
5.886789
5.8479943
6.0936513
6.4315224
6.5594063
6.4986777
6.689513
5.0134554
6.1502748
5.747095
6.163845
5.749958
6.376411
6.679356
7.57837
6.3566575
5.8474493
6.0372753
6.6605186
6.0885553
6.027188
6.6392574
6.665908
6.939761
5.742042
7.1039853
6.943179
6.93992
6.183739
6.2344203
5.494151
6.3465204
5.583018
6.922671
6.0705886
5.8718204
6.9564886
6.3693786
7.1693697
6.714978
6.911027
6.6053348
6.863506
5.808535
5.363819
6.82491
7.2623053
5.7913437
6.70864
6.207999
6.9364448
6.6619096
6.8284225
6.6435046
6.0850997
6.819591
6.1630807
6.553617
6.414538
6.4792995
6.830395
6.1988306
6.70383
5.893571
7.480984
6.038288
6.088005
6.225726
6.32311
6.6287565
7.309803
7.3003874
6.6930227
6.7055793
6.5289474
6.9721994
5.653107
7.168499
5.2066326
7.397833
7.5011764
7.180694
6.367112
6.9869504
7.1919727
6.5436597
6.739212
7.4145775
6.581346
6.890866
5.590059
6.701334
6.2036176
7.018711
7.4155045
6.621374
5.4439697
6.0163956
6.1863523
6.7419724
6.14213
5.600849
6.889785
6.900848
6.8721566
6.1751094
6.1567
7.4597073
6.334549
7.5797625
6.615716
7.4721603
6.3857384
5.6308236
7.3923736
6.861946
6.1686893
6.5457377
6.863506
6.708418
7.3462076
6.4951878
6.6720157
6.621374
7.3204875
5.881873
7.1606784
6.4986777
6.5406203
6.802854
6.980467
6.2821507
6.9008846
6.849551
6.9547825
6.101282
6.113383
6.761018
5.404313
7.764529
5.399896
5.807212
5.5574293
6.936211
5.5323634
7.32933
6.9930487
6.8210154
6.7600226
6.761018
5.5657625
5.404313
5.5657625
7.4274545
5.880738
5.8185554
6.936211
6.7175026
5.6989126
6.97765
5.5470405
6.8210154
5.8185554
6.97765
5.5574293
7.764529
5.6989126
5.78929
5.5657625
7.2127204
5.880738
6.9452853
7.734715
5.516134
6.7001934
6.5111785
6.9696507
7.279464
6.8160696
7.0648828
6.47734
4.892709
6.838542
5.8903117
5.8013043
6.8162866
6.095815
5.2835183
6.1388106
6.109304
5.336598
5.2365756
5.575399
5.9057035
5.8066893
5.9752703
5.636675
6.0353594
5.3802185
5.6251574
5.6704345
6.209346
5.9591203
5.8158846
5.7781544
6.062674
5.98491
5.931804
5.9149375
5.553958
6.720748
5.8682327
5.7852535
6.119146
5.6444488
5.579783
5.8886986
5.8848295
5.778422
6.050983
5.541333
6.1166854
5.883996
5.420611
6.6455793
6.6455793
5.8289485
6.1575556
5.9673476
6.44633
5.8289485
5.470754
5.822579
6.4728546
6.834802
6.7331834
6.486193
7.068154
6.87085
5.0722394
6.44633
6.897445
6.238226
7.011872
5.0722394
6.452281
5.420611
5.7927575
6.795473
5.210459
6.2383
6.795473
5.210459
5.0722394
6.897445
6.3392963
5.9988317
6.9227586
6.238226
5.8289485
6.9227586
6.491039
6.413065
6.5267844
5.6489882
6.5463734
6.221266
6.4356327
6.492518
5.191523
5.5414495
5.0106807
5.671316
6.2691092
5.3167467
4.599587
6.686434
6.904624
5.8677015
5.136459
5.28131
5.5673103
6.093651
5.586382
5.614611
6.299219
6.267537
6.4146814
5.166318
5.6953497
5.1839194
6.616153
5.7625847
6.449054
6.4484005
5.2185993
5.0258117
6.510044
6.2456594
6.3107443
5.253698
4.874043
7.0243454
6.813769
5.0963154
6.1877913
6.256463
5.983534
5.1863794
5.6602716
5.964029
5.453381
6.30822
5.841526
6.6644497
4.994491
5.104412
5.8758774
6.279933
5.284153
4.6679087
6.3260183
6.086885
5.315095
5.96516
6.289207
5.3958697
6.084689
8.099635
4.9490347
6.0606585
6.989985
6.996923
6.609996
6.6182337
6.9544344
6.7357354
5.8761187
6.306437
5.1342115
7.0245657
7.105329
5.714878
7.656184
6.3806853
5.37554
5.5982084
6.7982135
5.150727
6.161255
6.629156
5.7284007
6.578259
6.8417006
6.838354
6.675882
4.937349
5.436128
7.474793
7.087658
6.6610703
5.464772
6.625414
6.1665034
6.930605
5.976609
6.8952274
5.7105894
6.0348577
5.8940177
5.6628723
6.7803946
5.2164364
6.085289
5.243691
7.301918
5.984678
6.246605
6.6402817
6.6718707
5.058132
6.271823
6.5274215
7.1961713
6.4966507
6.600976
6.814276
5.9806075
5.921116
7.263263
5.3209357
5.8854475
6.9353766
5.8740463
6.7519217
6.0534625
7.3050323
4.8728805
6.2462926
6.193253
6.9410944
7.206945
6.8756843
4.8776455
7.161649
6.684436
6.1178584
4.964346
6.058733
4.9377127
7.071442
6.4998837
5.9685774
6.2168183
6.122586
6.344562
6.93882
6.207654
6.8316703
6.2324185
7.4262877
6.852638
6.9488473
6.967378
5.7041483
5.752214
6.9887104
5.8168845
5.366215
6.4164557
6.99331
6.3117337
5.912122
6.034814
5.4360113
6.3156056
5.768529
5.2735353
5.8346186
6.3373537
6.710958
6.3504143
6.7891693
5.733319
7.1769357
6.5388384
5.989903
5.125636
6.233138
6.3925943
5.968524
6.3226624
6.5813217
6.171594
6.1498837
6.390574
5.9392643
5.3613753
5.3613753
6.6410275
5.3675675
5.3613753
6.634193
6.1924634
6.391904
6.871654
6.219294
6.629328
6.896392
7.01068
6.6230016
6.4824905
6.411081
6.737173
7.0029035
6.5056505
7.084986
6.6073723
6.4475627
6.8394356
6.7053137
6.9089494
6.9577923
8.258459
6.417294
6.888794
7.0931168
5.9251447
6.390502
5.8794794
7.0603194
7.023944
6.0059996
6.430966
6.293146
7.1824613
6.755937
5.7323427
6.4565535
6.799849
6.4493523
7.002536
6.745455
6.3426175
6.112106
5.8313427
6.957606
6.9475193
6.5767937
6.4443398
6.692452
6.1842217
5.4548492
6.7909026
7.249493
6.330179
6.2062163
6.2923346
6.204242
5.9851522
5.2534046
6.065061
5.967025
4.7989936
5.1041102
5.1041102
5.703567
4.978741
6.6864715
5.451647
6.2062163
5.1269336
5.5204434
5.869374
5.483179
6.054952
6.0446043
5.8325305
6.161826
5.9581776
6.032217
5.656331
5.1506643
5.3290267
6.3243265
5.403124
6.7040186
5.7676363
5.347377
5.5251346
6.179441
6.282246
5.9100685
5.9557357
6.6209874
5.7568254
5.226681
5.086042
5.084005
6.8140683
5.8810186
6.013246
5.8042626
6.469317
6.7973375
4.9946556
5.5528173
5.587806
4.7347746
5.4313626
5.963972
5.55007
4.9913373
6.499063
6.198709
5.7344494
7.164914
4.91922
5.962316
5.594864
4.726662
5.3363442
5.303285
5.0192604
4.9645977
5.559672
5.0866556
5.702638
5.6967287
4.817513
5.1762395
6.8561244
6.0643682
5.52713
5.0605087
4.628874
6.078061
5.3982654
5.8325305
6.725618
5.285354
6.594067
5.810192
6.011201
6.21154
5.524951
4.939838
4.860134
5.4144764
5.152231
5.2471013
5.5232177
6.279133
5.612306
5.8928256
4.715712
6.3039327
5.1076765
6.1576266
6.299791
4.497044
6.1354585
5.3971553
4.7871237
5.837885
5.6793804
6.369365
5.8135138
6.672243
5.2631044
5.333174
5.70223
5.0657125
5.8304696
6.381367
5.7556
5.70386
5.784319
5.225506
5.737361
5.878706
5.2299895
5.5232844
4.896603
5.8859997
6.8943076
6.884344
7.0199056
6.8824687
6.168615
5.6734533
5.938505
5.8402905
7.5200944
6.2119412
5.9067955
5.6050105
5.2653317
5.2059035
5.3764734
7.3070383
5.121628
5.302547
5.1488953
5.073028
6.330474
5.5348835
6.254263
7.032158
6.266541
4.8056045
6.185329
4.8056045
6.210166
7.032158
6.5320005
6.1325426
5.3772693
7.032158
6.045276
5.4125586
6.1912265
6.5712376
6.696406
6.5712376
5.881195
6.696406
7.1256957
6.266541
5.2304263
5.968301
5.5359416
5.946769
6.4270353
5.306702
6.241101
6.5984855
6.4078135
6.0889125
6.4195113
6.5750647
6.564784
5.58414
5.5847282
5.7705774
5.9008756
6.248256
6.449347
6.696406
6.302776
6.5434055
4.715931
5.7887897
5.3597546
5.9008756
5.3460603
6.629929
7.207822
5.8611436
6.5493627
5.1872463
7.207822
6.5493627
6.206546
6.6267595
6.3123145
7.5563545
6.745877
6.730434
6.305086
6.2199597
5.887241
6.093286
6.54374
6.803407
6.1036186
6.372928
6.99878
6.2782784
5.7770004
6.8059063
6.104658
5.867501
6.714566
6.619787
5.639694
6.827391
5.0718384
6.7611136
6.4251614
6.0556
6.5472074
6.7874436
6.392543
6.572409
6.093286
6.730434
6.7283907
6.7611136
7.020267
6.421667
6.305086
6.8642035
6.709178
6.4529314
5.9857283
5.9857283
5.9857283
5.9857283
5.9857283
5.9857283
7.633851
7.3244433
6.7036314
6.310724
6.5329785
6.5929947
5.8618703
5.706366
6.3075547
7.196339
5.8618703
7.4691744
6.266541
7.196339
7.1543555
4.715931
7.2166243
6.449347
4.8056045
5.705721
7.1543555
5.9802284
7.4237223
5.705721
5.909997
5.8047705
6.408692
6.675869
6.8563094
6.2897015
6.208653
4.9841747
7.753767
7.2476044
6.3989143
6.8494897
5.382583
5.2271442
5.6690764
5.850965
6.150583
5.4191294
5.4166822
5.9547234
5.5940204
5.12253
5.8302155
6.1483936
6.6816854
5.729907
6.02498
6.190885
5.7683287
5.8202615
4.7404923
5.6471686
4.830724
5.735604
6.503758
5.480142
5.4643526
5.913085
6.2220254
6.46555
5.8918805
5.812311
5.621966
5.440773
5.9304795
5.9624453
5.778366
5.5331364
6.6829753
5.2990212
6.7413177
5.9778285
6.110689
6.6678534
6.0362663
6.3602934
7.2238183
6.1834545
6.09217
6.6472893
7.2145658
6.609166
6.4584284
5.7264094
6.004381
6.3551064
6.86889
7.275844
6.0574303
6.273188
7.361916
6.518585
5.7264094
6.089704
6.945603
6.7924833
5.9155984
6.8057795
6.273188
6.996869
6.3595424
5.7264094
6.937701
7.157386
7.0525227
7.176013
6.3581676
6.7461414
6.030363
6.273188
7.4644194
6.1666574
6.4856677
6.824707
6.0735745
4.8937964
7.4275713
5.842707
5.590454
6.665908
6.1751094
5.4439697
5.8474493
5.2686744
5.3236814
6.2928267
5.6969624
7.7352457
6.8126435
6.5901375
5.1797843
6.3946877
6.064659
5.639618
5.5225515
6.3346405
6.4140244
5.7628813
5.7628813
7.073425
7.1590343
6.4880567
6.5973787
6.4605
6.5495114
6.4723654
7.4309497
6.4920187
5.363725
5.363725
6.545209
6.783951
5.9570446
6.2748013
6.2770853
6.3901615
6.2841473
6.705341
6.2602153
5.9027004
5.599551
5.9539185
6.516948
5.836895
5.720774
6.421318
6.634815
7.008399
7.3101745
7.436628
5.791473
6.443822
7.540551
7.6198635
6.3792706
6.577402
6.0336556
6.6158924
6.7791862
5.207016
6.7588897
7.844326
6.2823796
6.703623
6.3063498
7.3694186
6.7811313
6.801373
6.9923296
6.7802753
6.415253
6.406323
7.19898
8.035133
7.239584
7.2319098
6.855592
7.358061
7.7228737
7.3471255
6.6850677
5.441019
6.7229156
6.3937416
7.397027
7.01828
5.386232
7.442019
6.870635
5.9424877
5.959657
6.917614
6.3962884
5.608596
7.933974
7.8642387
7.593652
6.7859898
7.6548233
7.0926714
7.4650073
7.274835
6.8358603
7.576113
7.26126
5.768214
6.202369
6.0876827
6.538478
5.649271
5.921262
6.3254766
6.147927
6.515933
5.9064384
6.538478
6.846441
5.921262
7.2793765
7.322479
6.0876827
6.846441
6.16077
7.322479
6.829197
6.874249
5.9064384
6.2150803
7.0031123
7.142586
6.4135585
5.916401
5.3300714
6.16077
7.0092688
6.3254766
6.041011
6.147927
5.160581
5.649271
7.0092688
6.9832053
7.0031123
7.142586
6.2150803
5.916401
6.515933
6.874249
5.776429
5.179457
5.160581
5.3300714
6.147927
5.179457
6.4135585
6.9832053
6.5818276
5.776429
7.65558
5.1435676
6.1266737
4.9849977
6.0107045
6.124716
4.896845
7.310674
6.701244
5.5520854
5.8173804
6.836
6.508478
5.8555493
6.7545896
6.061616
5.5693445
6.366635
5.9226646
6.3179035
5.6914854
7.0459146
5.9947195
5.5198374
6.7556376
5.488785
6.6878595
7.0701385
5.632714
5.488785
6.6878595
5.479324
4.8022003
6.549372
5.4726763
4.796402
5.2994943
5.4624367
5.1461525
5.3280177
7.1630106
7.189575
7.323072
5.6898046
5.764552
6.2346563
5.0439134
5.566765
5.8321614
7.482371
5.654193
5.0227966
5.4175177
5.793097
5.1891603
7.249434
6.9494905
6.692452
6.7909026
5.8313427
6.4443398
5.117519
4.969426
6.3044095
7.1654277
6.978273
5.7630506
6.1041512
5.816536
7.1312404
6.3851147
7.3398256
5.9146333
5.793097
5.5414186
5.5925574
7.1681724
5.5963306
6.2440205
5.9414506
6.2716227
5.342479
6.002595
4.8309937
6.3972225
6.441071
6.5236244
6.4115953
6.481668
5.4318457
6.335093
5.051272
6.789253
4.9637017
5.997874
5.3621125
6.7521963
5.549838
5.2304025
6.458685
5.4439697
4.5430627
5.1819153
4.936701
6.6406226
6.013207
7.1670628
5.791503
5.8600044
6.5929947
5.0783725
5.7297564
7.151771
5.792663
6.3942857
5.7756777
5.3302484
5.3856387
4.998325
6.3902006
6.4683266
5.9130745
5.697284
6.3861585
5.1710477
5.84126
5.054979
5.161748
6.8855643
6.509545
4.9780416
4.6288743
5.893382
5.458822
6.5929947
5.396231
6.0478344
7.151388
5.472148
5.3613753
6.0795956
5.954827
5.6403384
6.8229175
5.96385
6.2803507
5.4260345
6.003474
5.90873
5.068148
6.7081003
5.6237574
5.9156036
6.20552
6.1944494
5.103165
5.448076
6.229413
5.321262
5.42142
5.191296
5.5573125
6.183935
6.9862604
5.9582157
6.7052107
5.578901
5.5467825
5.4386425
4.657817
5.4645243
5.394047
5.680804
5.23331
5.632147
5.3293037
5.6983004
5.2864676
5.458678
6.553249
5.8577437
5.387469
5.1316023
5.258217
6.5670776
5.4777985
6.364756
5.8544817
5.3393674
6.5614405
5.806353
6.805071
6.8549705
6.6222363
5.329012
5.3986382
5.580193
5.5056796
5.126788
6.4482384
5.718361
5.067653
5.5428834
5.4606895
4.830859
5.467882
6.005772
5.458219
6.384924
4.9810133
5.417175
5.5332274
5.594346
5.6218033
4.785696
5.573224
5.3005395
6.0044074
5.3885126
5.9977345
4.6011233
5.969474
6.3812084
6.3211975
6.5309973
6.7326913
4.94913
5.924118
5.5111303
5.6410227
5.7037992
5.687299
5.4345837
5.507501
5.803969
5.4489136
5.8247304
5.4088726
6.0947556
5.258217
5.3665876
6.5929947
6.489723
5.112482
6.3868294
6.331831
6.356875
5.7077713
6.6891823
5.7072167
4.5430627
5.393888
5.1954904
6.254587
6.9102287
5.5712314
5.3885126
5.7756777
6.545688
5.4136405
4.9245124
5.3926406
6.9398413
5.4600725
5.2927065
5.8954806
5.758978
5.1536565
5.906547
5.1954904
6.1635704
5.613791
6.3214912
6.2841077
5.404313
5.649161
6.6616483
5.716136
5.8577437
7.386524
5.3898168
5.8805575
5.4844666
6.696406
4.296452
7.231281
6.3793616
5.738969
6.78125
5.643633
6.3860025
5.9562473
4.8628693
5.2466655
5.6291747
6.32467
5.0683675
6.517736
5.955325
6.654591
6.2360516
4.8251915
4.9508615
5.8747706
5.575154
6.179828
5.515387
5.732545
4.9928393
5.7124543
4.9841185
5.6373434
5.210205
6.7779403
4.785696
5.2912946
6.337707
5.6388702
4.8755174
5.697323
4.913454
5.872951
5.143903
6.3294034
4.2287254
5.9581776
5.500564
5.4695253
6.1849694
6.145796
5.5224404
4.9601436
6.7825117
6.2582684
5.171891
5.4694295
5.7206063
5.7361574
5.7455497
7.101007
5.4775743
5.8247304
5.967657
7.275006
6.2976384
5.5106735
6.616783
5.792663
5.2035565
5.3227415
5.8785
5.7449207
5.520708
5.810928
4.972129
4.9813147
6.3966904
5.482785
5.63081
5.8641477
6.6450086
5.352088
5.3322296
5.165727
4.5430627
5.579981
6.1755667
4.89529
6.4848285
5.1645045
6.1189384
6.1077356
6.1308722
5.7454777
5.270401
6.111031
5.404313
7.406449
5.5731516
5.0669465
6.111031
5.2109985
5.662803
6.9715033
5.970828
5.539181
5.7307296
5.6358924
6.537821
5.365324
5.883437
5.8760376
5.3372855
5.9737754
7.143226
5.515236
6.6628046
6.660112
5.5342803
4.8639097
5.8131795
5.2035565
5.7094584
5.962167
5.0812287
5.0690403
5.991651
6.050492
5.357571
5.1913004
5.3590655
5.901271
6.1262636
5.563857
6.5929947
6.949256
6.1830287
5.3565626
6.7407084
6.5436597
4.9169283
6.649489
4.8056045
6.7903647
5.291184
6.771679
5.190703
7.2657433
5.2911468
5.315289
5.027199
6.209844
5.120556
5.4544845
5.2825212
5.467882
4.9815717
6.0703673
5.2565007
5.616389
4.7857103
4.9852576
6.829197
5.352088
6.1400914
5.5977283
5.857426
5.5487347
5.187174
5.2864676
5.3120327
5.1604357
5.987527
5.55243
6.7486444
5.6913486
6.660112
6.034846
5.304135
5.4954214
5.7780843
5.4354787
5.7176723
5.578759
5.351402
5.594662
5.468805
4.7305474
5.497133
6.227457
4.7853174
5.4288287
6.1657615
5.8360243
5.198638
6.1765084
6.189051
5.415945
6.155344
4.9300656
5.583409
5.9787865
5.614335
5.258217
6.449
5.7538176
5.401879
4.8056045
5.208134
4.861804
5.158764
5.520353
5.6117415
5.728972
5.7927938
5.739141
7.857526
5.454875
5.8247304
5.4733872
5.2669387
5.1728573
5.7257504
4.7857103
5.017933
5.176077
5.4600725
4.5430627
5.509913
7.0282617
5.624545
6.549943
5.0664043
5.0360723
4.927418
6.964592
6.0670524
5.357271
4.973726
5.8764157
5.8323
5.191296
5.455521
5.6581197
5.5000715
5.6792316
5.233205
5.817236
5.6474333
5.405328
6.1796565
5.467526
5.3885126
4.900526
6.522156
5.208627
5.1417003
5.11362
6.704824
5.90873
5.886378
5.112907
5.90873
5.305006
5.221336
5.6202893
4.8586655
5.525492
5.952324
5.7454777
5.411204
7.1086564
5.3362865
6.007184
5.7276807
5.317792
5.200025
7.087147
6.5142217
5.382159
5.4394507
5.4737973
6.3902006
5.1400824
5.3565626
6.0822306
5.924946
5.1305737
5.7658415
5.2723975
6.265974
6.456534
6.6053085
6.718813
5.3699093
5.69949
7.090281
6.0329123
6.279035
5.1232476
5.9395356
5.258217
5.2768917
7.0589504
6.0967617
5.940691
5.9540744
5.1727777
6.2118473
5.1320553
5.7810836
5.179801
5.485778
6.308994
5.655888
6.1280494
5.6262813
5.6282444
5.3190145
5.8162494
5.593325
5.198212
5.791503
5.8796315
5.282864
5.2723975
5.5311418
5.501936
5.526906
5.7457466
6.0266857
5.613171
5.305006
6.3531313
5.054123
4.923471
5.4268613
5.906971
5.2669387
6.4360414
5.80696
6.133394
5.2304025
6.7693343
6.303871
7.190364
7.551679
6.099068
6.5598974
6.200362
7.1434317
6.893181
7.6898084
6.8266087
6.166194
6.8279295
6.0852942
5.987242
7.0758677
6.3492208
6.117755
6.176795
7.226082
6.406439
7.6452026
5.6524563
6.0194936
7.056963
6.997655
7.356374
5.8845797
6.166194
5.5555096
6.606661
6.8870244
6.941169
5.3704224
6.986261
6.3947344
6.9378343
6.7914295
7.489514
6.70887
7.3117642
7.8877735
6.663147
6.04455
6.4844275
5.6994276
6.239663
6.3301196
6.73893
5.7858152
7.7444754
6.958664
6.5268927
6.113218
7.143577
6.8727345
6.1731687
6.7365127
6.9751334
6.9013796
6.777693
6.491389
7.211866
6.776532
5.576763
5.7659464
6.543843
5.57827
6.709599
6.8726244
6.59173
6.631732
6.9985876
6.87736
7.489514
5.7071085
7.478836
5.8673086
6.6094766
6.31264
5.267159
5.078062
7.6211348
6.527839
7.064787
6.698535
6.037001
5.423989
6.3911605
6.444887
6.695321
6.7394996
5.736779
5.216387
7.045471
7.211866
5.9609942
5.67116
7.0122066
6.5268397
5.684079
6.491335
7.529561
6.29732
6.1150737
7.3355327
7.3476768
6.4410257
5.725683
6.767765
6.610578
6.195107
7.193926
6.7086015
6.503502
6.126737
6.401618
6.2685027
7.2798486
7.1237426
6.576234
5.5889177
5.5564
6.631732
6.4358587
6.504532
5.6332417
6.166194
7.407914
6.977326
6.3540125
6.530521
6.855708
6.266541
6.200362
6.580424
5.5941973
7.3038073
6.5268397
7.407914
7.648735
6.644659
7.3630214
7.3830495
6.7175026
7.9444075
5.551831
6.2259016
6.815142
5.5941973
7.5179734
6.8726244
5.418245
7.0771327
7.356374
7.985815
7.1549835
6.7114034
6.5329785
7.9444075
5.702716
7.032158
6.6356225
6.4751844
6.2157063
7.002886
6.3380733
4.9186764
7.059797
6.512761
5.5139256
6.4034224
6.8924704
6.171549
6.8826456
7.283107
6.5136123
6.070827
7.2054534
6.7876177
7.114401
6.3176
6.5958576
6.9308805
5.94277
7.2578697
6.7709775
6.6093802
7.0226016
7.6956105
6.57564
5.24795
6.5658736
6.09532
5.8043475
5.1326013
5.5831256
5.791923
7.2986455
6.9308805
6.8938518
6.401618
6.6153946
7.100722
6.6537275
6.0852942
6.6262474
7.2718153
6.8286877
7.033388
7.1461835
5.4987116
6.2271605
6.113218
6.5715804
6.4013047
5.693973
6.3374543
7.207704
7.200252
7.022265
7.061931
7.356374
7.785687
7.7303634
6.6093802
5.414932
6.418995
7.121869
6.9924088
6.873275
7.2392983
6.5481467
6.92484
6.8052654
5.17515
6.609482
6.4053726
7.087753
6.554045
5.2214603
7.283107
5.535821
5.67116
7.253158
6.612894
6.166194
7.3042836
6.184141
7.0771327
6.7876177
7.287871
7.4218545
6.1883736
7.0770483
6.535317
6.832174
6.600105
5.954684
7.4687033
6.279503
6.3911605
6.3022556
5.92696
5.5817537
7.177708
6.8230877
6.130222
6.73893
6.597915
6.3860326
5.8244486
7.4675035
6.73782
7.7303634
6.576234
5.9591675
7.528009
7.463671
7.776771
4.9186764
7.2441607
7.4598103
5.555604
5.925364
6.055718
6.818296
6.9219446
7.1237426
6.7072544
8.132364
7.765772
6.306419
6.5526924
6.629962
7.407914
6.88002
7.4218545
6.2322044
5.7431617
6.608367
7.1270494
5.7034698
6.631732
5.895033
5.6011834
5.2891855
7.322918
6.670511
6.1788745
5.8673086
7.0759993
5.709116
6.330988
5.2891855
7.190937
6.5914927
7.9216895
6.3681445
7.449414
7.2305517
6.6684856
6.875267
6.856292
6.200362
6.194875
7.7046857
6.291841
6.8455205
7.1434317
7.045471
7.417041
6.519386
6.5465546
5.832465
4.9847794
5.7858152
6.1484227
4.9847794
7.002103
7.3764777
6.9037566
5.7687716
6.5935707
5.7544155
7.657319
7.1032205
6.597572
6.5268397
5.524064
6.4302344
6.9143195
7.4307804
6.4751844
7.2464986
6.4751844
6.969932
6.877607
7.257072
7.658885
7.101384
7.1734056
7.180482
7.417041
6.795477
6.286934
5.5353627
6.3492208
7.200252
7.1749763
7.356374
6.184141
6.2948523
7.246741
7.418542
7.101068
5.7008233
7.287871
7.478836
6.384309
6.539104
6.0423512
6.3184676
6.3126426
6.515685
7.152094
7.6351986
7.0226016
6.932092
6.0898604
6.631732
6.1096177
6.200362
6.028014
7.0226016
6.3851595
5.4892125
6.57564
7.3830495
6.595982
5.2004204
5.913258
6.279041
6.4515824
7.018715
6.737904
5.695808
5.3844495
6.21219
6.7588897
5.564103
7.1074014
7.101068
6.4452515
6.082627
6.360474
5.6854997
7.7070384
7.3236628
6.4016185
6.117755
5.3257074
6.5914974
5.545625
6.1585646
5.8972793
5.69482
6.581993
6.7855964
RMSE : 1.4034081434737957 ; Pearson Correlation Coefficient : 0.2416971016625298
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 1.4512754880382956 ; Pearson Correlation Coefficient : 0.11439684637165645
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 1.2566115226505494 ; Pearson Correlation Coefficient : 0.3483553292295794
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 0.9898476924424743 ; Pearson Correlation Coefficient : 0.7531926430165059
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
from __future__ import print_function
#import matplotlib
#matplotlib.use('Agg')
import numpy as np
import tensorflow as tf
import random as rn
### We modified Pahikkala et al. (2014) source code for cross-val process ###
import os
from torch import save
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
import keras
from keras import backend as K
tf.set_random_seed(0)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
from datahelper import *
#import logging
from itertools import product
from arguments import argparser, logging
import keras
from keras.models import Model
from keras.preprocessing import sequence
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, GRU
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Masking, RepeatVector, merge, Flatten
from keras.models import Model
from keras.utils import plot_model
from keras.layers import Bidirectional
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import optimizers, layers
import sys, pickle, os
import math, json, time
import decimal
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from random import shuffle
from copy import deepcopy
from sklearn import preprocessing
from emetrics import get_aupr, get_cindex, get_rm2
TABSY = "\t"
figdir = "figures/"
def build_combined_onehot(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
XDinput = Input(shape=(FLAGS.max_smi_len, FLAGS.charsmiset_size))
XTinput = Input(shape=(FLAGS.max_seq_len, FLAGS.charseqset_size))
encode_smiles= Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(XDinput)
encode_smiles = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = GlobalMaxPooling1D()(encode_smiles) #pool_size=pool_length[i]
encode_protein = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(XTinput)
encode_protein = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = GlobalMaxPooling1D()(encode_protein)
encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein])
#encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein], axis=-1) #merge.Add()([encode_smiles, encode_protein])
# Fully connected
FC1 = Dense(1024, activation='relu')(encode_interaction)
FC2 = Dropout(0.1)(FC1)
FC2 = Dense(1024, activation='relu')(FC2)
FC2 = Dropout(0.1)(FC2)
FC2 = Dense(512, activation='relu')(FC2)
predictions = Dense(1, kernel_initializer='normal')(FC2)
interactionModel = Model(inputs=[XDinput, XTinput], outputs=[predictions])
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score]) #, metrics=['cindex_score']
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_combined_onehot.png')
return interactionModel
def build_combined_categorical(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
XDinput = Input(shape=(FLAGS.max_smi_len,), dtype='int32') ### Buralar flagdan gelmeliii
XTinput = Input(shape=(FLAGS.max_seq_len,), dtype='int32')
### SMI_EMB_DINMS FLAGS GELMELII
encode_smiles = Embedding(input_dim=FLAGS.charsmiset_size+1, output_dim=128, input_length=FLAGS.max_smi_len)(XDinput)
encode_smiles = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = GlobalMaxPooling1D()(encode_smiles)
encode_protein = Embedding(input_dim=FLAGS.charseqset_size+1, output_dim=128, input_length=FLAGS.max_seq_len)(XTinput)
encode_protein = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = GlobalMaxPooling1D()(encode_protein)
encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein], axis=-1) #merge.Add()([encode_smiles, encode_protein])
# Fully connected
FC1 = Dense(1024, activation='relu')(encode_interaction)
FC2 = Dropout(0.1)(FC1)
FC2 = Dense(1024, activation='relu')(FC2)
FC2 = Dropout(0.1)(FC2)
FC2 = Dense(512, activation='relu')(FC2)
# And add a logistic regression on top
predictions = Dense(1, kernel_initializer='normal')(FC2) #OR no activation, rght now it's between 0-1, do I want this??? activation='sigmoid'
interactionModel = Model(inputs=[XDinput, XTinput], outputs=[predictions])
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score]) #, metrics=['cindex_score']
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_combined_categorical.png')
return interactionModel
def build_single_drug(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XTmodel = Sequential()
XTmodel.add(Activation('linear', input_shape=(FLAGS.target_count,)))
encode_smiles = Sequential()
encode_smiles.add(Embedding(input_dim=FLAGS.charsmiset_size+1, output_dim=128, input_length=FLAGS.max_smi_len))
encode_smiles.add(Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)) #input_shape=(MAX_SMI_LEN, SMI_EMBEDDING_DIMS)
encode_smiles.add(Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1))
encode_smiles.add(Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1))
encode_smiles.add(GlobalMaxPooling1D())
interactionModel.add(Merge([encode_smiles, XTmodel], mode='concat', concat_axis=1))
#interactionModel.add(layers.merge.Concatenate([XDmodel, XTmodel]))
# Fully connected
interactionModel.add(Dense(1024, activation='relu')) #1024
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu')) #1024
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_single_drug.png')
return interactionModel
def build_single_prot(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XDmodel = Sequential()
XDmodel.add(Activation('linear', input_shape=(FLAGS.drugcount,)))
XTmodel1 = Sequential()
XTmodel1.add(Embedding(input_dim=FLAGS.charseqset_size+1, output_dim=128, input_length=FLAGS.max_seq_len))
XTmodel1.add(Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)) #input_shape=(MAX_SEQ_LEN, SEQ_EMBEDDING_DIMS)
XTmodel1.add(Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1))
XTmodel1.add(Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1))
XTmodel1.add(GlobalMaxPooling1D())
interactionModel.add(Merge([XDmodel, XTmodel1], mode='concat', concat_axis=1))
# Fully connected
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_single_protein.png')
return interactionModel
def build_baseline(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XDmodel = Sequential()
XDmodel.add(Dense(1, activation='linear', input_shape=(FLAGS.drug_count, )))
XTmodel = Sequential()
XTmodel.add(Dense(1, activation='linear', input_shape=(FLAGS.target_count,)))
interactionModel.add(Merge([XDmodel, XTmodel], mode='concat', concat_axis=1))
# Fully connected
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_baseline.png')
return interactionModel
def nfold_1_2_3_setting_sample(XD, XT, Y, measure, runmethod, FLAGS, dataset):
bestparamlist = []
# test_set, outer_train_sets = dataset.read_sets(FLAGS)
# foldinds = len(outer_train_sets)
# test_sets = []
# ## TRAIN AND VAL
# val_sets = []
# train_sets = []
#logger.info('Start training')
# for val_foldind in range(foldinds):
# val_fold = outer_train_sets[val_foldind]
# val_sets.append(val_fold)
# otherfolds = deepcopy(outer_train_sets)
# otherfolds.pop(val_foldind)
# otherfoldsinds = [item for sublist in otherfolds for item in sublist]
# train_sets.append(otherfoldsinds)
# test_sets.append(test_set)
# print("val set", str(len(val_fold)))
# print("train set", str(len(otherfoldsinds)))
# bestparamind, best_param_list, bestperf, all_predictions_not_need, losses_not_need = general_nfold_cv(XD, XT, Y, label_row_inds, label_col_inds,
# measure, runmethod, FLAGS, train_sets, val_sets)
#print("Test Set len", str(len(test_set)))
#print("Outer Train Set len", str(len(outer_train_sets)))
# bestparam, best_param_list, bestperf, all_predictions, all_losses = general_nfold_cv(XD, XT, Y, label_row_inds, label_col_inds,
# measure, runmethod, FLAGS, train_sets, test_sets)
# bestparam, best_param_list, bestperf, all_predictions, all_losses =
general_nfold_cv(XD, XT, Y, measure, runmethod, FLAGS)
# testperf = all_predictions[bestparamind]##pointer pos
# logging("---FINAL RESULTS-----", FLAGS)
# logging("best param index = %s, best param = %.5f" %
# (bestparamind, bestparam), FLAGS)
# testperfs = []
# testloss= []
# avgperf = 0.
# for test_foldind in range(len(test_sets)):
# foldperf = all_predictions[bestparamind][test_foldind]
# foldloss = all_losses[bestparamind][test_foldind]
# testperfs.append(foldperf)
# testloss.append(foldloss)
# avgperf += foldperf
# avgperf = avgperf / len(test_sets)
# avgloss = np.mean(testloss)
# teststd = np.std(testperfs)
# logging("Test Performance CI", FLAGS)
# logging(testperfs, FLAGS)
# logging("Test Performance MSE", FLAGS)
# logging(testloss, FLAGS)
# return avgperf, avgloss, teststd
def general_nfold_cv(XD, XT, Y, prfmeasure, runmethod, FLAGS): ## BURAYA DA FLAGS LAZIM????
paramset1 = FLAGS.num_windows #[32]#[32, 512] #[32, 128] # filter numbers
paramset2 = FLAGS.smi_window_lengths #[4, 8]#[4, 32] #[4, 8] #filter length smi
paramset3 = FLAGS.seq_window_lengths #[8, 12]#[64, 256] #[64, 192]#[8, 192, 384]
epoch = FLAGS.num_epoch #100
batchsz = FLAGS.batch_size #256
logging("---Parameter Search-----", FLAGS)
# w = len(val_sets)
# h = len(paramset1) * len(paramset2) * len(paramset3)
# all_predictions = [[0 for x in range(w)] for y in range(h)]
# all_losses = [[0 for x in range(w)] for y in range(h)]
# print(all_predictions)
# for foldind in range(len(val_sets)):
# valinds = val_sets[foldind]
# labeledinds = labeled_sets[foldind]
# Y_train = np.mat(np.copy(Y))
# params = {}
# XD_train = XD
# XT_train = XT
# trrows = label_row_inds[labeledinds]
# trcols = label_col_inds[labeledinds]
# XD_train = XD[trrows]
# XT_train = XT[trcols]
# train_drugs, train_prots, train_Y = prepare_interaction_pairs(XD, XT, Y, trrows, trcols)
# terows = label_row_inds[valinds]
# tecols = label_col_inds[valinds]
# #print("terows", str(terows), str(len(terows)))
# #print("tecols", str(tecols), str(len(tecols)))
# val_drugs, val_prots, val_Y = prepare_interaction_pairs(XD, XT, Y, terows, tecols)
pointer = 0
for param1ind in range(len(paramset1)): #hidden neurons
param1value = paramset1[param1ind]
for param2ind in range(len(paramset2)): #learning rate
param2value = paramset2[param2ind]
for param3ind in range(len(paramset3)):
param3value = paramset3[param3ind]
gridmodel = runmethod(FLAGS, param1value, param2value, param3value)
es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=15)
gridres = gridmodel.fit(([np.array(XD),np.array(XT)]), np.array(Y), batch_size=batchsz, epochs=epoch,
# validation_data=( ([np.array(val_drugs), np.array(val_prots) ]), np.array(val_Y)),
shuffle=False,
callbacks=[es])
gridmodel.save('model/my_model_{}'.format(pointer))
# predicted_labels = gridmodel.predict([np.array(val_drugs), np.array(val_prots) ])
# loss, rperf2 = gridmodel.evaluate(([np.array(val_drugs),np.array(val_prots) ]), np.array(val_Y), verbose=0)
# rperf = prfmeasure(val_Y, predicted_labels)
# rperf = rperf[0]
# logging("P1 = %d, P2 = %d, P3 = %d, Fold = %d, CI-i = %f, CI-ii = %f, MSE = %f" %
# (param1ind, param2ind, param3ind, foldind, rperf, rperf2, loss), FLAGS)
plotLoss(gridres, param1ind, param2ind, param3ind)
# all_predictions[pointer][foldind] =rperf #TODO FOR EACH VAL SET allpredictions[pointer][foldind]
# all_losses[pointer][foldind]= loss
pointer +=1
# bestperf = -float('Inf')
# bestpointer = None
# best_param_list = []
# ##Take average according to folds, then chooose best params
# pointer = 0
# for param1ind in range(len(paramset1)):
# for param2ind in range(len(paramset2)):
# for param3ind in range(len(paramset3)):
# avgperf = 0.
# for foldind in range(len(val_sets)):
# foldperf = all_predictions[pointer][foldind]
# avgperf += foldperf
# avgperf /= len(val_sets)
# #print(epoch, batchsz, avgperf)
# if avgperf > bestperf:
# bestperf = avgperf
# bestpointer = pointer
# best_param_list = [param1ind, param2ind, param3ind]
# pointer +=1
# return bestpointer, best_param_list, bestperf, all_predictions, all_losses
def cindex_score(y_true, y_pred):
g = tf.subtract(tf.expand_dims(y_pred, -1), y_pred)
g = tf.cast(g == 0.0, tf.float32) * 0.5 + tf.cast(g > 0.0, tf.float32)
f = tf.subtract(tf.expand_dims(y_true, -1), y_true) > 0.0
f = tf.matrix_band_part(tf.cast(f, tf.float32), -1, 0)
g = tf.reduce_sum(tf.multiply(g, f))
f = tf.reduce_sum(f)
return tf.where(tf.equal(g, 0), 0.0, g/f) #select
def plotLoss(history, batchind, epochind, param3ind):
figname = "b"+str(batchind) + "_e" + str(epochind) + "_" + str(param3ind) + "_" + "_" + str(time.time())
plt.figure()
plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
#plt.legend(['trainloss', 'valloss', 'cindex', 'valcindex'], loc='upper left')
plt.legend(['trainloss'], loc='upper left')
plt.savefig("figures/"+figname +".png" , dpi=None, facecolor='w', edgecolor='w', orientation='portrait',
papertype=None, format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None)
plt.close()
## PLOT CINDEX
plt.figure()
plt.title('model concordance index')
plt.ylabel('cindex')
plt.xlabel('epoch')
plt.plot(history.history['cindex_score'])
# plt.plot(history.history['val_cindex_score'])
plt.legend(['traincindex'], loc='upper left')
plt.savefig("figures/"+figname + "_acc.png" , dpi=None, facecolor='w', edgecolor='w', orientation='portrait',
papertype=None, format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None)
plt.close()
def prepare_interaction_pairs(XD, XT, Y, rows, cols):
drugs = []
targets = []
targetscls = []
affinity=[]
for pair_ind in range(len(rows)):
drug = XD[rows[pair_ind]]
drugs.append(drug)
target=XT[cols[pair_ind]]
targets.append(target)
affinity.append(Y[rows[pair_ind],cols[pair_ind]])
drug_data = np.stack(drugs)
target_data = np.stack(targets)
return drug_data,target_data, affinity
def experiment(FLAGS, perfmeasure, deepmethod, foldcount=6): #5-fold cross validation + test
#Input
#XD: [drugs, features] sized array (features may also be similarities with other drugs
#XT: [targets, features] sized array (features may also be similarities with other targets
#Y: interaction values, can be real values or binary (+1, -1), insert value float("nan") for unknown entries
#perfmeasure: function that takes as input a list of correct and predicted outputs, and returns performance
#higher values should be better, so if using error measures use instead e.g. the inverse -error(Y, P)
#foldcount: number of cross-validation folds for settings 1-3, setting 4 always runs 3x3 cross-validation
dataset = DataSet_for_new( fpath = FLAGS.dataset_path, ### BUNU ARGS DA GUNCELLE
setting_no = FLAGS.problem_type, ##BUNU ARGS A EKLE
seqlen = FLAGS.max_seq_len,
smilen = FLAGS.max_smi_len,
need_shuffle = False )
# set character set size
FLAGS.charseqset_size = dataset.charseqset_size
FLAGS.charsmiset_size = dataset.charsmiset_size
XD, XT, Y = dataset.parse_data(FLAGS)
XD = np.asarray(XD)
XT = np.asarray(XT)
Y = np.asarray(Y)
drugcount = XD.shape[0]
print(drugcount)
targetcount = XT.shape[0]
print(targetcount)
FLAGS.drug_count = drugcount
FLAGS.target_count = targetcount
# label_row_inds, label_col_inds = np.where(np.isnan(Y)==False) #basically finds the point address of affinity [x,y]
if not os.path.exists(figdir):
os.makedirs(figdir)
print(FLAGS.log_dir)
# S1_avgperf, S1_avgloss, S1_teststd =
nfold_1_2_3_setting_sample(XD, XT, Y, perfmeasure, deepmethod, FLAGS, dataset)
logging("Setting " + str(FLAGS.problem_type), FLAGS)
# logging("avg_perf = %.5f, avg_mse = %.5f, std = %.5f" %
# (S1_avgperf, S1_avgloss, S1_teststd), FLAGS)
def test_experiment(FLAGS, perfmeasure, deepmethod, foldcount=6): #5-fold cross validation + test
#Input
#XD: [drugs, features] sized array (features may also be similarities with other drugs
#XT: [targets, features] sized array (features may also be similarities with other targets
#Y: interaction values, can be real values or binary (+1, -1), insert value float("nan") for unknown entries
#perfmeasure: function that takes as input a list of correct and predicted outputs, and returns performance
#higher values should be better, so if using error measures use instead e.g. the inverse -error(Y, P)
#foldcount: number of cross-validation folds for settings 1-3, setting 4 always runs 3x3 cross-validation
dataset = DataSet_for_new( fpath = FLAGS.dataset_path, ### BUNU ARGS DA GUNCELLE
setting_no = FLAGS.problem_type, ##BUNU ARGS A EKLE
seqlen = FLAGS.max_seq_len,
smilen = FLAGS.max_smi_len,
need_shuffle = False )
# set character set size
FLAGS.charseqset_size = dataset.charseqset_size
FLAGS.charsmiset_size = dataset.charsmiset_size
XD, XT, Y = dataset.parse_data(FLAGS)
XD = np.asarray(XD)
XT = np.asarray(XT)
Y = np.asarray(Y)
drugcount = XD.shape[0]
print(drugcount)
targetcount = XT.shape[0]
print(targetcount)
FLAGS.drug_count = drugcount
FLAGS.target_count = targetcount
# label_row_inds, label_col_inds = np.where(np.isnan(Y)==False) #basically finds the point address of affinity [x,y]
if not os.path.exists(FLAGS.out):
os.makedirs(FLAGS.out)
print(FLAGS.out)
# drug_data = np.stack(drugs)
# target_data = np.stack(targets)
gridmodel = keras.models.load_model(FLAGS.model, compile=False)
predicted_labels = gridmodel.predict([np.array(XD), np.array(XT) ])
save_path = FLAGS.out + "results.txt"
with open(save_path, "w") as f:
for i in predicted_labels:
f.write(str(i[0]) + '\n')
print("---------------------predict over-------------------------")
# S1_avgperf, S1_avgloss, S1_teststd =
# nfold_1_2_3_setting_sample(XD, XT, Y, perfmeasure, deepmethod, FLAGS, dataset)
# logging("Setting " + str(FLAGS.problem_type), FLAGS)
# logging("avg_perf = %.5f, avg_mse = %.5f, std = %.5f" %
# (S1_avgperf, S1_avgloss, S1_teststd), FLAGS)
def run_regression( FLAGS ):
perfmeasure = get_cindex
deepmethod = build_combined_categorical
experiment(FLAGS, perfmeasure, deepmethod)
def run_predict(FLAGS):
perfmeasure = get_cindex
deepmethod = build_combined_categorical
test_experiment(FLAGS, perfmeasure, deepmethod)
if __name__=="__main__":
FLAGS = argparser()
# train
# FLAGS.num_windows = [32]
# FLAGS.seq_window_lengths = [8, 12]
# FLAGS.smi_window_lengths = [4, 8]
# FLAGS.batch_size = 256
# FLAGS.num_epoch = 2
# FLAGS.max_seq_len = 1000
# FLAGS.max_smi_len = 100
# # FLAGS.dataset_path = 'data/kiba/'
# FLAGS.dataset_path = {"ligand":'../../../data/train/train_smile',
# "protein":'../../../data/train/train_protein_seq',
# "y":'../../../data/train/train_ic50',}
# FLAGS.problem_type = 1
# FLAGS.log_dir = "logs/"
# FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/"
# if not os.path.exists(FLAGS.log_dir):
# os.makedirs(FLAGS.log_dir)
# logging(str(FLAGS), FLAGS)
# run_regression( FLAGS )
# test
FLAGS.max_seq_len = 1000
FLAGS.max_smi_len = 100
# FLAGS.dataset_path = {"ligand":'../../../data/test/test_smile',
# "protein":'../../../data/test/test_protein_seq',
# "y":'../../../data/test/test_ic50'}
# FLAGS.dataset_path = {"ligand":'../../../data/ER/ER_smile',
# "protein":'../../../data/ER/ER_protein_seq',
# "y":'../../../data/ER/ER_ic50'}
# FLAGS.dataset_path = {"ligand":'../../../data/GPCR/GPCR_smile',
# "protein":'../../../data/GPCR/GPCR_protein_seq',
# "y":'../../../data/GPCR/GPCR_ic50'}
# FLAGS.dataset_path = {"ligand":'../../../data/Ion_channel/channel_smile',
# "protein":'../../../data/Ion_channel/channel_protein_seq',
# "y":'../../../data/Ion_channel/channel_ic50'}
FLAGS.dataset_path = {"ligand":'../../../data/Tyrosine_kinase/kinase_smile',
"protein":'../../../data/Tyrosine_kinase/kinase_protein_seq',
"y":'../../../data/Tyrosine_kinase/kinase_ic50'}
FLAGS.out = "./output/Tyrosine_kinase/"
FLAGS.model = "model/my_model_0.h5"
run_predict(FLAGS)
\ No newline at end of file
from __future__ import print_function
#import matplotlib
#matplotlib.use('Agg')
import numpy as np
import tensorflow as tf
import random as rn
### We modified Pahikkala et al. (2014) source code for cross-val process ###
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
import keras
from keras import backend as K
tf.set_random_seed(0)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
from datahelper import *
#import logging
from itertools import product
from arguments import argparser, logging
import keras
from keras.models import Model
from keras.preprocessing import sequence
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, GRU
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Masking, RepeatVector, merge, Flatten
from keras.models import Model
from keras.utils import plot_model
from keras.layers import Bidirectional
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import optimizers, layers
import sys, pickle, os
import math, json, time
import decimal
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from random import shuffle
from copy import deepcopy
from sklearn import preprocessing
from emetrics import get_aupr, get_cindex, get_rm2
TABSY = "\t"
figdir = "figures/"
def build_combined_onehot(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
XDinput = Input(shape=(FLAGS.max_smi_len, FLAGS.charsmiset_size))
XTinput = Input(shape=(FLAGS.max_seq_len, FLAGS.charseqset_size))
encode_smiles= Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(XDinput)
encode_smiles = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = GlobalMaxPooling1D()(encode_smiles) #pool_size=pool_length[i]
encode_protein = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(XTinput)
encode_protein = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = GlobalMaxPooling1D()(encode_protein)
encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein])
#encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein], axis=-1) #merge.Add()([encode_smiles, encode_protein])
# Fully connected
FC1 = Dense(1024, activation='relu')(encode_interaction)
FC2 = Dropout(0.1)(FC1)
FC2 = Dense(1024, activation='relu')(FC2)
FC2 = Dropout(0.1)(FC2)
FC2 = Dense(512, activation='relu')(FC2)
predictions = Dense(1, kernel_initializer='normal')(FC2)
interactionModel = Model(inputs=[XDinput, XTinput], outputs=[predictions])
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score]) #, metrics=['cindex_score']
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_combined_onehot.png')
return interactionModel
def build_combined_categorical(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
XDinput = Input(shape=(FLAGS.max_smi_len,), dtype='int32') ### Buralar flagdan gelmeliii
XTinput = Input(shape=(FLAGS.max_seq_len,), dtype='int32')
### SMI_EMB_DINMS FLAGS GELMELII
encode_smiles = Embedding(input_dim=FLAGS.charsmiset_size+1, output_dim=128, input_length=FLAGS.max_smi_len)(XDinput)
encode_smiles = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)(encode_smiles)
encode_smiles = GlobalMaxPooling1D()(encode_smiles)
encode_protein = Embedding(input_dim=FLAGS.charseqset_size+1, output_dim=128, input_length=FLAGS.max_seq_len)(XTinput)
encode_protein = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)(encode_protein)
encode_protein = GlobalMaxPooling1D()(encode_protein)
encode_interaction = keras.layers.concatenate([encode_smiles, encode_protein], axis=-1) #merge.Add()([encode_smiles, encode_protein])
# Fully connected
FC1 = Dense(1024, activation='relu')(encode_interaction)
FC2 = Dropout(0.1)(FC1)
FC2 = Dense(1024, activation='relu')(FC2)
FC2 = Dropout(0.1)(FC2)
FC2 = Dense(512, activation='relu')(FC2)
# And add a logistic regression on top
predictions = Dense(1, kernel_initializer='normal')(FC2) #OR no activation, rght now it's between 0-1, do I want this??? activation='sigmoid'
interactionModel = Model(inputs=[XDinput, XTinput], outputs=[predictions])
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score]) #, metrics=['cindex_score']
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_combined_categorical.png')
return interactionModel
def build_single_drug(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XTmodel = Sequential()
XTmodel.add(Activation('linear', input_shape=(FLAGS.target_count,)))
encode_smiles = Sequential()
encode_smiles.add(Embedding(input_dim=FLAGS.charsmiset_size+1, output_dim=128, input_length=FLAGS.max_smi_len))
encode_smiles.add(Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1)) #input_shape=(MAX_SMI_LEN, SMI_EMBEDDING_DIMS)
encode_smiles.add(Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1))
encode_smiles.add(Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH1, activation='relu', padding='valid', strides=1))
encode_smiles.add(GlobalMaxPooling1D())
interactionModel.add(Merge([encode_smiles, XTmodel], mode='concat', concat_axis=1))
#interactionModel.add(layers.merge.Concatenate([XDmodel, XTmodel]))
# Fully connected
interactionModel.add(Dense(1024, activation='relu')) #1024
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu')) #1024
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_single_drug.png')
return interactionModel
def build_single_prot(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XDmodel = Sequential()
XDmodel.add(Activation('linear', input_shape=(FLAGS.drugcount,)))
XTmodel1 = Sequential()
XTmodel1.add(Embedding(input_dim=FLAGS.charseqset_size+1, output_dim=128, input_length=FLAGS.max_seq_len))
XTmodel1.add(Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1)) #input_shape=(MAX_SEQ_LEN, SEQ_EMBEDDING_DIMS)
XTmodel1.add(Conv1D(filters=NUM_FILTERS*2, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1))
XTmodel1.add(Conv1D(filters=NUM_FILTERS*3, kernel_size=FILTER_LENGTH2, activation='relu', padding='valid', strides=1))
XTmodel1.add(GlobalMaxPooling1D())
interactionModel.add(Merge([XDmodel, XTmodel1], mode='concat', concat_axis=1))
# Fully connected
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_single_protein.png')
return interactionModel
def build_baseline(FLAGS, NUM_FILTERS, FILTER_LENGTH1, FILTER_LENGTH2):
interactionModel = Sequential()
XDmodel = Sequential()
XDmodel.add(Dense(1, activation='linear', input_shape=(FLAGS.drug_count, )))
XTmodel = Sequential()
XTmodel.add(Dense(1, activation='linear', input_shape=(FLAGS.target_count,)))
interactionModel.add(Merge([XDmodel, XTmodel], mode='concat', concat_axis=1))
# Fully connected
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(1024, activation='relu'))
interactionModel.add(Dropout(0.1))
interactionModel.add(Dense(512, activation='relu'))
interactionModel.add(Dense(1, kernel_initializer='normal'))
interactionModel.compile(optimizer='adam', loss='mean_squared_error', metrics=[cindex_score])
print(interactionModel.summary())
plot_model(interactionModel, to_file='figures/build_baseline.png')
return interactionModel
def nfold_1_2_3_setting_sample(XD, XT, Y, label_row_inds, label_col_inds, measure, runmethod, FLAGS, dataset):
bestparamlist = []
test_set, outer_train_sets = dataset.read_sets(FLAGS)
foldinds = len(outer_train_sets)
test_sets = []
## TRAIN AND VAL
val_sets = []
train_sets = []
#logger.info('Start training')
for val_foldind in range(foldinds):
val_fold = outer_train_sets[val_foldind]
val_sets.append(val_fold)
otherfolds = deepcopy(outer_train_sets)
otherfolds.pop(val_foldind)
otherfoldsinds = [item for sublist in otherfolds for item in sublist]
train_sets.append(otherfoldsinds)
test_sets.append(test_set)
print("val set", str(len(val_fold)))
print("train set", str(len(otherfoldsinds)))
bestparamind, best_param_list, bestperf, all_predictions_not_need, losses_not_need = general_nfold_cv(XD, XT, Y, label_row_inds, label_col_inds,
measure, runmethod, FLAGS, train_sets, val_sets)
#print("Test Set len", str(len(test_set)))
#print("Outer Train Set len", str(len(outer_train_sets)))
bestparam, best_param_list, bestperf, all_predictions, all_losses = general_nfold_cv(XD, XT, Y, label_row_inds, label_col_inds,
measure, runmethod, FLAGS, train_sets, test_sets)
testperf = all_predictions[bestparamind]##pointer pos
logging("---FINAL RESULTS-----", FLAGS)
logging("best param index = %s, best param = %.5f" %
(bestparamind, bestparam), FLAGS)
testperfs = []
testloss= []
avgperf = 0.
for test_foldind in range(len(test_sets)):
foldperf = all_predictions[bestparamind][test_foldind]
foldloss = all_losses[bestparamind][test_foldind]
testperfs.append(foldperf)
testloss.append(foldloss)
avgperf += foldperf
avgperf = avgperf / len(test_sets)
avgloss = np.mean(testloss)
teststd = np.std(testperfs)
logging("Test Performance CI", FLAGS)
logging(testperfs, FLAGS)
logging("Test Performance MSE", FLAGS)
logging(testloss, FLAGS)
return avgperf, avgloss, teststd
def general_nfold_cv(XD, XT, Y, label_row_inds, label_col_inds, prfmeasure, runmethod, FLAGS, labeled_sets, val_sets): ## BURAYA DA FLAGS LAZIM????
paramset1 = FLAGS.num_windows #[32]#[32, 512] #[32, 128] # filter numbers
paramset2 = FLAGS.smi_window_lengths #[4, 8]#[4, 32] #[4, 8] #filter length smi
paramset3 = FLAGS.seq_window_lengths #[8, 12]#[64, 256] #[64, 192]#[8, 192, 384]
epoch = FLAGS.num_epoch #100
batchsz = FLAGS.batch_size #256
logging("---Parameter Search-----", FLAGS)
w = len(val_sets)
h = len(paramset1) * len(paramset2) * len(paramset3)
all_predictions = [[0 for x in range(w)] for y in range(h)]
all_losses = [[0 for x in range(w)] for y in range(h)]
print(all_predictions)
for foldind in range(len(val_sets)):
valinds = val_sets[foldind]
labeledinds = labeled_sets[foldind]
Y_train = np.mat(np.copy(Y))
params = {}
XD_train = XD
XT_train = XT
trrows = label_row_inds[labeledinds]
trcols = label_col_inds[labeledinds]
XD_train = XD[trrows]
XT_train = XT[trcols]
train_drugs, train_prots, train_Y = prepare_interaction_pairs(XD, XT, Y, trrows, trcols)
terows = label_row_inds[valinds]
tecols = label_col_inds[valinds]
#print("terows", str(terows), str(len(terows)))
#print("tecols", str(tecols), str(len(tecols)))
val_drugs, val_prots, val_Y = prepare_interaction_pairs(XD, XT, Y, terows, tecols)
pointer = 0
for param1ind in range(len(paramset1)): #hidden neurons
param1value = paramset1[param1ind]
for param2ind in range(len(paramset2)): #learning rate
param2value = paramset2[param2ind]
for param3ind in range(len(paramset3)):
param3value = paramset3[param3ind]
gridmodel = runmethod(FLAGS, param1value, param2value, param3value)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
gridres = gridmodel.fit(([np.array(train_drugs),np.array(train_prots) ]), np.array(train_Y), batch_size=batchsz, epochs=epoch,
validation_data=( ([np.array(val_drugs), np.array(val_prots) ]), np.array(val_Y)), shuffle=False, callbacks=[es] )
predicted_labels = gridmodel.predict([np.array(val_drugs), np.array(val_prots) ])
loss, rperf2 = gridmodel.evaluate(([np.array(val_drugs),np.array(val_prots) ]), np.array(val_Y), verbose=0)
rperf = prfmeasure(val_Y, predicted_labels)
rperf = rperf[0]
logging("P1 = %d, P2 = %d, P3 = %d, Fold = %d, CI-i = %f, CI-ii = %f, MSE = %f" %
(param1ind, param2ind, param3ind, foldind, rperf, rperf2, loss), FLAGS)
plotLoss(gridres, param1ind, param2ind, param3ind, foldind)
all_predictions[pointer][foldind] =rperf #TODO FOR EACH VAL SET allpredictions[pointer][foldind]
all_losses[pointer][foldind]= loss
pointer +=1
bestperf = -float('Inf')
bestpointer = None
best_param_list = []
##Take average according to folds, then chooose best params
pointer = 0
for param1ind in range(len(paramset1)):
for param2ind in range(len(paramset2)):
for param3ind in range(len(paramset3)):
avgperf = 0.
for foldind in range(len(val_sets)):
foldperf = all_predictions[pointer][foldind]
avgperf += foldperf
avgperf /= len(val_sets)
#print(epoch, batchsz, avgperf)
if avgperf > bestperf:
bestperf = avgperf
bestpointer = pointer
best_param_list = [param1ind, param2ind, param3ind]
pointer +=1
return bestpointer, best_param_list, bestperf, all_predictions, all_losses
def cindex_score(y_true, y_pred):
g = tf.subtract(tf.expand_dims(y_pred, -1), y_pred)
g = tf.cast(g == 0.0, tf.float32) * 0.5 + tf.cast(g > 0.0, tf.float32)
f = tf.subtract(tf.expand_dims(y_true, -1), y_true) > 0.0
f = tf.matrix_band_part(tf.cast(f, tf.float32), -1, 0)
g = tf.reduce_sum(tf.multiply(g, f))
f = tf.reduce_sum(f)
return tf.where(tf.equal(g, 0), 0.0, g/f) #select
def plotLoss(history, batchind, epochind, param3ind, foldind):
figname = "b"+str(batchind) + "_e" + str(epochind) + "_" + str(param3ind) + "_" + str( foldind) + "_" + str(time.time())
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
#plt.legend(['trainloss', 'valloss', 'cindex', 'valcindex'], loc='upper left')
plt.legend(['trainloss', 'valloss'], loc='upper left')
plt.savefig("figures/"+figname +".png" , dpi=None, facecolor='w', edgecolor='w', orientation='portrait',
papertype=None, format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None)
plt.close()
## PLOT CINDEX
plt.figure()
plt.title('model concordance index')
plt.ylabel('cindex')
plt.xlabel('epoch')
plt.plot(history.history['cindex_score'])
plt.plot(history.history['val_cindex_score'])
plt.legend(['traincindex', 'valcindex'], loc='upper left')
plt.savefig("figures/"+figname + "_acc.png" , dpi=None, facecolor='w', edgecolor='w', orientation='portrait',
papertype=None, format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None)
plt.close()
def prepare_interaction_pairs(XD, XT, Y, rows, cols):
drugs = []
targets = []
targetscls = []
affinity=[]
for pair_ind in range(len(rows)):
drug = XD[rows[pair_ind]]
drugs.append(drug)
target=XT[cols[pair_ind]]
targets.append(target)
affinity.append(Y[rows[pair_ind],cols[pair_ind]])
drug_data = np.stack(drugs)
target_data = np.stack(targets)
return drug_data,target_data, affinity
def experiment(FLAGS, perfmeasure, deepmethod, foldcount=6): #5-fold cross validation + test
#Input
#XD: [drugs, features] sized array (features may also be similarities with other drugs
#XT: [targets, features] sized array (features may also be similarities with other targets
#Y: interaction values, can be real values or binary (+1, -1), insert value float("nan") for unknown entries
#perfmeasure: function that takes as input a list of correct and predicted outputs, and returns performance
#higher values should be better, so if using error measures use instead e.g. the inverse -error(Y, P)
#foldcount: number of cross-validation folds for settings 1-3, setting 4 always runs 3x3 cross-validation
dataset = DataSet( fpath = FLAGS.dataset_path, ### BUNU ARGS DA GUNCELLE
setting_no = FLAGS.problem_type, ##BUNU ARGS A EKLE
seqlen = FLAGS.max_seq_len,
smilen = FLAGS.max_smi_len,
need_shuffle = False )
# set character set size
FLAGS.charseqset_size = dataset.charseqset_size
FLAGS.charsmiset_size = dataset.charsmiset_size
XD, XT, Y = dataset.parse_data(FLAGS)
XD = np.asarray(XD)
XT = np.asarray(XT)
Y = np.asarray(Y)
drugcount = XD.shape[0]
print(drugcount)
targetcount = XT.shape[0]
print(targetcount)
FLAGS.drug_count = drugcount
FLAGS.target_count = targetcount
label_row_inds, label_col_inds = np.where(np.isnan(Y)==False) #basically finds the point address of affinity [x,y]
if not os.path.exists(figdir):
os.makedirs(figdir)
print(FLAGS.log_dir)
S1_avgperf, S1_avgloss, S1_teststd = nfold_1_2_3_setting_sample(XD, XT, Y, label_row_inds, label_col_inds,
perfmeasure, deepmethod, FLAGS, dataset)
logging("Setting " + str(FLAGS.problem_type), FLAGS)
logging("avg_perf = %.5f, avg_mse = %.5f, std = %.5f" %
(S1_avgperf, S1_avgloss, S1_teststd), FLAGS)
def run_regression( FLAGS ):
perfmeasure = get_cindex
deepmethod = build_combined_categorical
experiment(FLAGS, perfmeasure, deepmethod)
if __name__=="__main__":
FLAGS = argparser()
FLAGS.num_windows = [32]
FLAGS.seq_window_lengths = [8, 12]
FLAGS.smi_window_lengths = [4, 8]
FLAGS.batch_size = 256
FLAGS.num_epoch = 100
FLAGS.max_seq_len = 1000
FLAGS.max_smi_len = 100
FLAGS.dataset_path = 'data/kiba/'
FLAGS.problem_type = 1
FLAGS.log_dir = "logs"
FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/"
if not os.path.exists(FLAGS.log_dir):
os.makedirs(FLAGS.log_dir)
logging(str(FLAGS), FLAGS)
run_regression( FLAGS )
python run_baseline.py
\ No newline at end of file
MolTrans @ 47ac16b8
Subproject commit 47ac16b8c158b080ba6cdaec74cd7aa9c1332b73
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论