提交 44a442cd 作者: 朱学凯

add z-score data

上级 256bf696
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (code)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyDocumentationSettings"> <component name="PyDocumentationSettings">
......
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (code)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
</project> </project>
\ No newline at end of file
...@@ -2,7 +2,12 @@ ...@@ -2,7 +2,12 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="f877ac68-9cea-46d8-9125-207eebe5b5d6" name="Default Changelist" comment=""> <list default="true" id="f877ac68-9cea-46d8-9125-207eebe5b5d6" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/CPI.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/CPI.iml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/run_interaction.py" beforeDir="false" afterPath="$PROJECT_DIR$/run_interaction.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/train.sh" beforeDir="false" afterPath="$PROJECT_DIR$/train.sh" afterDir="false" />
<change beforePath="$PROJECT_DIR$/utils/normalize_data.py" beforeDir="false" afterPath="$PROJECT_DIR$/utils/normalize_data.py" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
...@@ -49,7 +54,7 @@ ...@@ -49,7 +54,7 @@
<recent name="$PROJECT_DIR$/experment_result/learning_rate" /> <recent name="$PROJECT_DIR$/experment_result/learning_rate" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.normalize_data"> <component name="RunManager" selected="Python.run_interaction">
<configuration default="true" type="PythonConfigurationType" factoryName="Python"> <configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="CPI" /> <module name="CPI" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
...@@ -120,9 +125,6 @@ ...@@ -120,9 +125,6 @@
<module name="CPI" /> <module name="CPI" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" /> <option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/utils" /> <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/utils" />
<option name="IS_MODULE_SDK" value="true" /> <option name="IS_MODULE_SDK" value="true" />
...@@ -184,8 +186,8 @@ ...@@ -184,8 +186,8 @@
</configuration> </configuration>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Python.normalize_data" />
<item itemvalue="Python.run_interaction" /> <item itemvalue="Python.run_interaction" />
<item itemvalue="Python.normalize_data" />
<item itemvalue="Python.test" /> <item itemvalue="Python.test" />
<item itemvalue="Python.dataset" /> <item itemvalue="Python.dataset" />
<item itemvalue="Python.eval" /> <item itemvalue="Python.eval" />
...@@ -232,10 +234,10 @@ ...@@ -232,10 +234,10 @@
</breakpoint-manager> </breakpoint-manager>
</component> </component>
<component name="com.intellij.coverage.CoverageDataManagerImpl"> <component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/CPI$dataset.coverage" NAME="dataset Coverage Results" MODIFIED="1618641059668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$test.coverage" NAME="test Coverage Results" MODIFIED="1618643206375" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" /> <SUITE FILE_PATH="coverage/CPI$test.coverage" NAME="test Coverage Results" MODIFIED="1618643206375" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$normalize_data.coverage" NAME="normalize_data Coverage Results" MODIFIED="1619675786375" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/utils" /> <SUITE FILE_PATH="coverage/CPI$normalize_data.coverage" NAME="normalize_data Coverage Results" MODIFIED="1619667621484" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/utils" />
<SUITE FILE_PATH="coverage/CPI$run_interaction.coverage" NAME="run_interaction Coverage Results" MODIFIED="1619685403006" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$eval.coverage" NAME="eval Coverage Results" MODIFIED="1618396849549" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" /> <SUITE FILE_PATH="coverage/CPI$eval.coverage" NAME="eval Coverage Results" MODIFIED="1618396849549" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/CPI$run_interaction.coverage" NAME="run_interaction Coverage Results" MODIFIED="1619081693342" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" /> <SUITE FILE_PATH="coverage/CPI$dataset.coverage" NAME="dataset Coverage Results" MODIFIED="1618641059668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
</component> </component>
</project> </project>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -10,11 +10,6 @@ from tqdm import tqdm ...@@ -10,11 +10,6 @@ from tqdm import tqdm
torch.set_default_tensor_type(torch.DoubleTensor) torch.set_default_tensor_type(torch.DoubleTensor)
def get_task(task_name): def get_task(task_name):
if task_name.lower() == 'train':
df_train = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_ic50',
}
tokenizer_config = {"vocab_file": './config/vocab.txt', tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt', "vocab_pair": './config/drug_codes_chembl.txt',
...@@ -22,21 +17,42 @@ def get_task(task_name): ...@@ -22,21 +17,42 @@ def get_task(task_name):
"separate_id": "[SEP]", "separate_id": "[SEP]",
"max_len": 256 "max_len": 256
} }
if task_name.lower() == 'train':
df_train = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_ic50',
}
return df_train, tokenizer_config return df_train, tokenizer_config
elif task_name.lower() == 'test': elif task_name.lower() == 'test':
df_test = {"sps": './data/test_sps', df_test = {"sps": './data/test_sps',
"smile": './data/test_smile', "smile": './data/test_smile',
"affinity": './data/test_ic50', "affinity": './data/test_ic50',
} }
tokenizer_config = {"vocab_file": './config/vocab.txt',
"vocab_pair": './config/drug_codes_chembl.txt',
"begin_id": '[CLS]',
"separate_id": "[SEP]",
"max_len": 256
}
return df_test, tokenizer_config return df_test, tokenizer_config
elif task_name.lower() == 'train_z_10':
df = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_z_10_ic50',
}
return df, tokenizer_config
elif task_name.lower() == 'train_z_100':
df = {"sps": './data/train_sps',
"smile": './data/train_smile',
"affinity": './data/train_z_100_ic50',
}
return df, tokenizer_config
def train(args, model, dataset): def train(args, model, dataset):
data_loder_para = {'batch_size': args.batch_size, data_loder_para = {'batch_size': args.batch_size,
'shuffle': True, 'shuffle': True,
...@@ -136,7 +152,7 @@ def main(args): ...@@ -136,7 +152,7 @@ def main(args):
print('model name : BertAffinity') print('model name : BertAffinity')
print('task name : {}'.format(args.task)) print('task name : {}'.format(args.task))
if args.task == 'train': if args.task in ['train', 'train_z_10', 'train_z_100']:
train(args, model, dataset) train(args, model, dataset)
elif args.task in ['test']: elif args.task in ['test']:
...@@ -178,11 +194,11 @@ if __name__ == '__main__': ...@@ -178,11 +194,11 @@ if __name__ == '__main__':
# local test # local test
# args.task = 'train' args.task = 'train_z_10'
# args.savedir = 'local_test_train' args.savedir = 'local_test_train'
# args.epochs = 10 args.epochs = 10
# args.lr = 1e-5 args.lr = 1e-5
# args.config = './config/config_layer_3.json' args.config = './config/config_layer_3.json'
......
CUDA_VISIBLE_DEVICES=4 python run_interaction.py --b=32 --task=train --epochs=10 --lr=1e-5 --savedir=lr-1e-5-batch-32-e-10-layer9-0420 --config=./config/config_layer_9.json CUDA_VISIBLE_DEVICES=4 python run_interaction.py --b=32 --task=train_z --epochs=10 --lr=1e-5 --savedir=lr-1e-5-batch-32-e-10-layer9-0420 --config=./config/config_layer_9.json
\ No newline at end of file \ No newline at end of file
import numpy as np import numpy as np
from tqdm import tqdm
def z_score(data, save, enlarge): def z_score(data, save, enlarge):
with open(data, 'r') as f: with open(data, 'r') as f:
...@@ -14,7 +15,7 @@ def z_score(data, save, enlarge): ...@@ -14,7 +15,7 @@ def z_score(data, save, enlarge):
new_affinity *= enlarge new_affinity *= enlarge
new_affinity = list(new_affinity) new_affinity = list(new_affinity)
with open(save, 'w') as f: with open(save, 'w') as f:
for aff in new_affinity: for aff in tqdm(new_affinity):
f.write(str(aff) + '\n') f.write(str(aff) + '\n')
...@@ -22,7 +23,7 @@ def z_score(data, save, enlarge): ...@@ -22,7 +23,7 @@ def z_score(data, save, enlarge):
if __name__ == '__main__': if __name__ == '__main__':
data = '../data/test_ic50' data = '../data/train_ic50'
save = '../data/test_z_ic50' save = '../data/train_z_100_ic50'
enlarge = 100 enlarge = 100
z_score(data, save, enlarge) z_score(data, save, enlarge)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论