提交 2dfa9ebc 作者: 朱学凯

updata .gitignore

上级 b51ff548
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="renderExternalDocumentation" value="true" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="subword-nmt" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CPI.iml" filepath="$PROJECT_DIR$/.idea/CPI.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW" value="true" />
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>
\ No newline at end of file
<changelist name="Uncommitted_changes_before_Update_at_2021_4_29,_1_41_下午_[Default_Changelist]" date="1619674906341" recycled="true" deleted="true">
<option name="PATH" value="$PROJECT_DIR$/.idea/shelf/Uncommitted_changes_before_Update_at_2021_4_29,_1_41_下午_[Default_Changelist]/shelved.patch" />
<option name="DESCRIPTION" value="Uncommitted changes before Update at 2021/4/29, 1:41 下午 [Default Changelist]" />
</changelist>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
import numpy as np
import re
def eval_result(pred, label):
pred = np.array(pred)
label = np.array(label)
num = len(pred)
diff = pred - label
mse = np.sum(np.power(diff, 2)) / num
rmse = np.sqrt(mse)
pearson_co = np.corrcoef(pred, label)
return rmse, pearson_co
def eval(pred_path, label_path):
with open(pred_path, 'r') as f:
pred = f.readlines()
pred = [float(i.strip()) for i in pred]
with open(label_path, 'r') as f:
label = f.readlines()
label = [float(i.strip()) for i in label]
remse, r_mat = eval_result(pred, label)
r = r_mat[0, 1]
save_path = pred_path.replace('test.txt', 'eval_results')
with open(save_path, 'w') as f:
f.write('RMSE : {} ; Pearson Correlation Coefficient : {}'.format(remse, r))
print('RMSE : {} ; Pearson Correlation Coefficient : {}'.format(remse, r))
if __name__ == '__main__':
with open('pre_test.sh', 'r') as f:
pred_dir = f.readline()
pred_dir = pred_dir.split()[5].split('/')[-1]
pred_result = './predict/{}/test.txt'.format(pred_dir)
test_label_path = './data/test_ic50'
eval(pred_result, test_label_path)
CUDA_VISIBLE_DEVICES=7 python run_interaction.py --task=test --b=64 --output=./predict/lr-1e-5-batch-32-e-10-layer6-0428-step-57659 --config=./config/config_layer_6.json --init=./model/lr-1e-5-batch-32-e-10-layer6-0428/epoch-6-step-57659-loss-0.6694820924709807.pth --do_eval=True
\ No newline at end of file
from argparse import ArgumentParser
from dataset import Data_Encoder
import torch
from torch.utils.data import DataLoader
from configuration_bert import BertConfig
from modeling_bert import BertAffinityModel
from torch.utils.tensorboard import SummaryWriter
import os
from tqdm import tqdm
torch.set_default_tensor_type(torch.DoubleTensor)
def get_task(task_name):
if task_name.lower() == 'train':
df_train = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_ic50',
}
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
return df_train, tokenizer_config
elif task_name.lower() == 'test':
df_test = {"sps": './data/test_sps',
"smile": './data/test_smile',
"affinity": './data/test_ic50',
}
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
return df_test, tokenizer_config
def train(args, model, dataset):
data_loder_para = {'batch_size': args.batch_size,
'shuffle': True,
'num_workers': args.workers,
}
data_generator = DataLoader(dataset, **data_loder_para)
model.train()
opt = torch.optim.Adam(model.parameters(), lr=args.lr)
loss_fct = torch.nn.MSELoss()
writer = SummaryWriter('./log/' + args.savedir)
num_step = args.epochs * len(data_generator)
step = 0
save_step = num_step // 10
# detect GPU
if torch.cuda.is_available():
model.cuda()
# print(model)
print('epoch num : {}'.format(args.epochs))
print('step num : {}'.format(num_step))
print('batch size : {}'.format(args.batch_size))
print('learning rate : {}'.format(args.lr))
print('begin training')
# training
for epoch in range(args.epochs):
for i, (input, token_type_ids, input_mask, affinity) in enumerate(data_generator):
# use cuda
# input model
if torch.cuda.is_available():
pred_affinity = model(input_ids=input.cuda(), token_type_ids=token_type_ids.cuda(), attention_mask=input_mask.cuda())
loss = loss_fct(pred_affinity, affinity.cuda().unsqueeze(-1))
else:
pred_affinity = model(input_ids=input, token_type_ids=token_type_ids, attention_mask=input_mask)
loss = loss_fct(pred_affinity, affinity.unsqueeze(-1))
step += 1
writer.add_scalar('loss', loss, global_step=step)
# Update gradient
opt.zero_grad()
loss.backward()
opt.step()
# if (i % 100 == 0):
print('Training at Epoch ' + str(epoch + 1) + ' step ' + str(step) + ' with loss ' + str(
loss.cpu().detach().numpy()))
# save
if epoch >= 1 and step % save_step == 0:
save_path = './model/' + args.savedir + '/'
if not os.path.exists(save_path):
os.mkdir(save_path)
torch.save(model.state_dict(), save_path + 'epoch-{}-step-{}-loss-{}.pth'.format(epoch, step, loss))
print('training over')
writer.close()
def test(args, model, dataset):
data_loder_para = {'batch_size': args.batch_size,
'shuffle': False,
'num_workers': args.workers,
}
data_generator = DataLoader(dataset, **data_loder_para)
with torch.no_grad():
if torch.cuda.is_available():
model.load_state_dict(torch.load(args.init), strict=True)
else:
model.load_state_dict(torch.load(args.init, map_location=torch.device('cpu')), strict=True)
model.eval()
if not os.path.exists(args.output):
os.mkdir(args.output)
result = args.output + '/' + '{}.txt'.format(args.task)
print('begin predicting')
with open(result, 'w') as f:
for i, (input, token_type_ids, input_mask, affinity) in enumerate(tqdm(data_generator)):
if torch.cuda.is_available():
model.cuda()
pred_affinity = model(input_ids=input.cuda(), token_type_ids=token_type_ids.cuda(),
attention_mask=input_mask.cuda())
else:
pred_affinity = model(input_ids=input, token_type_ids=token_type_ids, attention_mask=input_mask)
pred_affinity = pred_affinity.cpu().numpy()
for res in range(args.batch_size):
pred = pred_affinity[res, :][0]
f.write(str(pred) + '\n')
if args.do_eval:
os.system('python eval.py')
def main(args):
# load data
data_file, tokenizer_config = get_task(args.task)
dataset = Data_Encoder(data_file, tokenizer_config)
# creat model
print('------------------creat model---------------------------')
config = BertConfig.from_pretrained(args.config)
model = BertAffinityModel(config)
print('model name : BertAffinity')
print('task name : {}'.format(args.task))
if args.task == 'train':
train(args, model, dataset)
elif args.task in ['test']:
test(args, model, dataset)
if __name__ == '__main__':
# get parameter
parser = ArgumentParser(description='BertAffinity')
parser.add_argument('-b', '--batch-size', default=8, type=int,
metavar='N',
help='mini-batch size (default: 16), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
help='number of data loading workers (default: 0)')
parser.add_argument('--epochs', default=50, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--task', choices=['train', 'test', 'channel', 'ER', 'GPCR', 'kinase'],
default='train', type=str, metavar='TASK',
help='Task name. Could be train, test, channel, ER, GPCR, kinase.')
parser.add_argument('--lr', '--learning-rate', default=1e-5, type=float,
metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--config', default='./config/config.json', type=str, help='model config file path')
# parser.add_argument('--log', default='training_log', type=str, help='training log')
parser.add_argument('--savedir', default='train', type=str, help='log and model save path')
# parser.add_argument('--device', default='0', type=str, help='name of GPU')
parser.add_argument('--init', default='model', type=str, help='init checkpoint')
parser.add_argument('--output', default='predict', type=str, help='result save path')
# parser.add_argument('--shuffle', default=True, type=str, help='shuffle data')
parser.add_argument('--do_eval', default=False, type=bool, help='do eval')
args = parser.parse_args()
# local test
# args.task = 'train'
# args.savedir = 'local_test_train'
# args.epochs = 10
# args.lr = 1e-5
# args.config = './config/config_layer_3.json'
# args.task = 'test'
# args.init = './model/lr-1e-5-batch-32-e-10-layer3-0417-add-type-ids-and-mask/epoch-9-step-82370-loss-0.8841055645024439.pth'
# args.output = './predict/test'
# args.config = './config/config_layer_3.json'
main(args)
CUDA_VISIBLE_DEVICES=1 python run_interaction.py --b=32 --task=train --epochs=10 --lr=1e-5 --savedir=lr-1e-5-batch-32-e-10-layer6-0428 --config=./config/config_layer_6.json
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
This source diff could not be displayed because it is too large. You can view the blob instead.
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
------------------creat model---------------------------
model name : BertAffinity
task name : train
epoch num : 10
step num : 41190
batch size : 64
learning rate : 1e-05
begin training
Traceback (most recent call last):
File "run_interaction.py", line 186, in <module>
main(args)
File "run_interaction.py", line 131, in main
train(args, model, dataset)
File "run_interaction.py", line 67, in train
pred_affinity = model(input.cuda().long())
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/work/CPI/BertAffinity/CPI-main/modeling_bert.py", line 2001, in forward
return_dict=return_dict,
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/work/CPI/BertAffinity/CPI-main/modeling_bert.py", line 577, in forward
output_attentions,
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/work/CPI/BertAffinity/CPI-main/modeling_bert.py", line 463, in forward
past_key_value=self_attn_past_key_value,
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/work/CPI/BertAffinity/CPI-main/modeling_bert.py", line 396, in forward
output_attentions,
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/work/CPI/BertAffinity/CPI-main/modeling_bert.py", line 321, in forward
attention_probs = self.dropout(attention_probs)
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/modules/dropout.py", line 58, in forward
return F.dropout(input, self.p, self.training, self.inplace)
File "/root/.pyenv/versions/3.7.4/lib/python3.7/site-packages/torch/nn/functional.py", line 973, in dropout
else _VF.dropout(input, p, training))
RuntimeError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 10.76 GiB total capacity; 9.63 GiB already allocated; 173.12 MiB free; 9.64 GiB reserved in total by PyTorch)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
File mode changed from 100755 to 100644
RMSE : 1.1800662398679211 ; Pearson Correlation Coefficient : 0.6371230205539953
\ No newline at end of file
RMSE : 1.0795645303119095 ; Pearson Correlation Coefficient : 0.7137683236148267
\ No newline at end of file
RMSE : 1.0240501538228575 ; Pearson Correlation Coefficient : 0.7363279193240799
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 1.091491741778561 ; Pearson Correlation Coefficient : 0.711841625345012
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 1.0806709885951111 ; Pearson Correlation Coefficient : 0.6831883388665838
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
RMSE : 1.0635720063061838 ; Pearson Correlation Coefficient : 0.698596143686921
\ No newline at end of file
RMSE : 1.2460797962867016 ; Pearson Correlation Coefficient : 0.5748486828479313
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
File mode changed from 100755 to 100644
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论