提交 44a442cd 作者: 朱学凯

add z-score data

上级 256bf696
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (code)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
......
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (code)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......@@ -2,7 +2,12 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="f877ac68-9cea-46d8-9125-207eebe5b5d6" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/CPI.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/CPI.iml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/run_interaction.py" beforeDir="false" afterPath="$PROJECT_DIR$/run_interaction.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/train.sh" beforeDir="false" afterPath="$PROJECT_DIR$/train.sh" afterDir="false" />
<change beforePath="$PROJECT_DIR$/utils/normalize_data.py" beforeDir="false" afterPath="$PROJECT_DIR$/utils/normalize_data.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -49,7 +54,7 @@
<recent name="$PROJECT_DIR$/experment_result/learning_rate" />
</key>
</component>
<component name="RunManager" selected="Python.normalize_data">
<component name="RunManager" selected="Python.run_interaction">
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="CPI" />
<option name="INTERPRETER_OPTIONS" value="" />
......@@ -120,9 +125,6 @@
<module name="CPI" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/utils" />
<option name="IS_MODULE_SDK" value="true" />
......@@ -184,8 +186,8 @@
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.normalize_data" />
<item itemvalue="Python.run_interaction" />
<item itemvalue="Python.normalize_data" />
<item itemvalue="Python.test" />
<item itemvalue="Python.dataset" />
<item itemvalue="Python.eval" />
......@@ -232,10 +234,10 @@
</breakpoint-manager>
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/CPI$dataset.coverage" NAME="dataset Coverage Results" MODIFIED="1618641059668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$test.coverage" NAME="test Coverage Results" MODIFIED="1618643206375" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$normalize_data.coverage" NAME="normalize_data Coverage Results" MODIFIED="1619675786375" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/utils" />
<SUITE FILE_PATH="coverage/CPI$normalize_data.coverage" NAME="normalize_data Coverage Results" MODIFIED="1619667621484" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/utils" />
<SUITE FILE_PATH="coverage/CPI$run_interaction.coverage" NAME="run_interaction Coverage Results" MODIFIED="1619685403006" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$eval.coverage" NAME="eval Coverage Results" MODIFIED="1618396849549" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$run_interaction.coverage" NAME="run_interaction Coverage Results" MODIFIED="1619081693342" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$dataset.coverage" NAME="dataset Coverage Results" MODIFIED="1618641059668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
</component>
</project>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -10,33 +10,49 @@ from tqdm import tqdm
torch.set_default_tensor_type(torch.DoubleTensor)
def get_task(task_name):
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
if task_name.lower() == 'train':
df_train = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_ic50',
}
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
return df_train, tokenizer_config
elif task_name.lower() == 'test':
df_test = {"sps": './data/test_sps',
"smile": './data/test_smile',
"affinity": './data/test_ic50',
}
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
return df_test, tokenizer_config
elif task_name.lower() == 'train_z_10':
df = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_z_10_ic50',
}
return df, tokenizer_config
elif task_name.lower() == 'train_z_100':
df = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_z_100_ic50',
}
return df, tokenizer_config
def train(args, model, dataset):
data_loder_para = {'batch_size': args.batch_size,
'shuffle': True,
......@@ -136,7 +152,7 @@ def main(args):
print('model name : BertAffinity')
print('task name : {}'.format(args.task))
if args.task == 'train':
if args.task in ['train', 'train_z_10', 'train_z_100']:
train(args, model, dataset)
elif args.task in ['test']:
......@@ -178,11 +194,11 @@ if __name__ == '__main__':
# local test
# args.task = 'train'
# args.savedir = 'local_test_train'
# args.epochs = 10
# args.lr = 1e-5
# args.config = './config/config_layer_3.json'
args.task = 'train_z_10'
args.savedir = 'local_test_train'
args.epochs = 10
args.lr = 1e-5
args.config = './config/config_layer_3.json'
......
CUDA_VISIBLE_DEVICES=4 python run_interaction.py --b=32 --task=train --epochs=10 --lr=1e-5 --savedir=lr-1e-5-batch-32-e-10-layer9-0420 --config=./config/config_layer_9.json
\ No newline at end of file
CUDA_VISIBLE_DEVICES=4 python run_interaction.py --b=32 --task=train_z --epochs=10 --lr=1e-5 --savedir=lr-1e-5-batch-32-e-10-layer9-0420 --config=./config/config_layer_9.json
\ No newline at end of file
import numpy as np
from tqdm import tqdm
def z_score(data, save, enlarge):
with open(data, 'r') as f:
......@@ -14,7 +15,7 @@ def z_score(data, save, enlarge):
new_affinity *= enlarge
new_affinity = list(new_affinity)
with open(save, 'w') as f:
for aff in new_affinity:
for aff in tqdm(new_affinity):
f.write(str(aff) + '\n')
......@@ -22,7 +23,7 @@ def z_score(data, save, enlarge):
if __name__ == '__main__':
data = '../data/test_ic50'
save = '../data/test_z_ic50'
data = '../data/train_ic50'
save = '../data/train_z_100_ic50'
enlarge = 100
z_score(data, save, enlarge)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论