提交 d5064e86 作者: 朱学凯

add token type ids

上级 e92d1555
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="subword-nmt" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CPI.iml" filepath="$PROJECT_DIR$/.idea/CPI.iml" />
</modules>
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CPI.iml" filepath="$PROJECT_DIR$/.idea/CPI.iml" />
</modules>
</component>
</project>
\ No newline at end of file
Index: .idea/workspace.xml
++ /dev/null
Index: .idea/workspace.xml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.BaseRevisionTextPatchEP
<+><?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"true\" id=\"f877ac68-9cea-46d8-9125-207eebe5b5d6\" name=\"Default Changelist\" comment=\"\">\n <change beforePath=\"$PROJECT_DIR$/.idea/workspace.xml\" beforeDir=\"false\" afterPath=\"$PROJECT_DIR$/.idea/workspace.xml\" afterDir=\"false\" />\n <change beforePath=\"$PROJECT_DIR$/run_interaction.py\" beforeDir=\"false\" afterPath=\"$PROJECT_DIR$/run_interaction.py\" afterDir=\"false\" />\n </list>\n <option name=\"SHOW_DIALOG\" value=\"false\" />\n <option name=\"HIGHLIGHT_CONFLICTS\" value=\"true\" />\n <option name=\"HIGHLIGHT_NON_ACTIVE_CHANGELIST\" value=\"false\" />\n <option name=\"LAST_RESOLUTION\" value=\"IGNORE\" />\n </component>\n <component name=\"Git.Settings\">\n <option name=\"RECENT_GIT_ROOT_PATH\" value=\"$PROJECT_DIR$\" />\n </component>\n <component name=\"ProjectId\" id=\"1qpu2Wq6VU5TQVQOm73pQEwAahA\" />\n <component name=\"ProjectLevelVcsManager\">\n <ConfirmationsSetting value=\"1\" id=\"Add\" />\n </component>\n <component name=\"ProjectViewState\">\n <option name=\"hideEmptyMiddlePackages\" value=\"true\" />\n <option name=\"showLibraryContents\" value=\"true\" />\n </component>\n <component name=\"PropertiesComponent\">\n <property name=\"ASKED_ADD_EXTERNAL_FILES\" value=\"true\" />\n <property name=\"RunOnceActivity.OpenProjectViewOnStart\" value=\"true\" />\n <property name=\"RunOnceActivity.ShowReadmeOnStart\" value=\"true\" />\n <property name=\"WebServerToolWindowFactoryState\" value=\"false\" />\n <property name=\"restartRequiresConfirmation\" value=\"false\" />\n </component>\n <component name=\"RunManager\">\n <configuration name=\"run_interaction\" type=\"PythonConfigurationType\" factoryName=\"Python\" temporary=\"true\" nameIsGenerated=\"true\">\n <module name=\"CPI\" />\n <option name=\"INTERPRETER_OPTIONS\" value=\"\" />\n <option name=\"PARENT_ENVS\" value=\"true\" />\n <envs>\n <env name=\"PYTHONUNBUFFERED\" value=\"1\" />\n </envs>\n <option name=\"SDK_HOME\" value=\"\" />\n <option name=\"WORKING_DIRECTORY\" value=\"$PROJECT_DIR$\" />\n <option name=\"IS_MODULE_SDK\" value=\"true\" />\n <option name=\"ADD_CONTENT_ROOTS\" value=\"true\" />\n <option name=\"ADD_SOURCE_ROOTS\" value=\"true\" />\n <EXTENSION ID=\"PythonCoverageRunConfigurationExtension\" runner=\"coverage.py\" />\n <option name=\"SCRIPT_NAME\" value=\"$PROJECT_DIR$/run_interaction.py\" />\n <option name=\"PARAMETERS\" value=\"\" />\n <option name=\"SHOW_COMMAND_LINE\" value=\"false\" />\n <option name=\"EMULATE_TERMINAL\" value=\"false\" />\n <option name=\"MODULE_MODE\" value=\"false\" />\n <option name=\"REDIRECT_INPUT\" value=\"false\" />\n <option name=\"INPUT_FILE\" value=\"\" />\n <method v=\"2\" />\n </configuration>\n <recent_temporary>\n <list>\n <item itemvalue=\"Python.run_interaction\" />\n </list>\n </recent_temporary>\n </component>\n <component name=\"SpellCheckerSettings\" RuntimeDictionaries=\"0\" Folders=\"0\" CustomDictionaries=\"0\" DefaultDictionary=\"application-level\" UseSingleDictionary=\"true\" transferred=\"true\" />\n <component name=\"TaskManager\">\n <task active=\"true\" id=\"Default\" summary=\"Default task\">\n <changelist id=\"f877ac68-9cea-46d8-9125-207eebe5b5d6\" name=\"Default Changelist\" comment=\"\" />\n <created>1617788646167</created>\n <option name=\"number\" value=\"Default\" />\n <option name=\"presentableId\" value=\"Default\" />\n <updated>1617788646167</updated>\n <workItem from=\"1617788647548\" duration=\"5550000\" />\n </task>\n <servers />\n </component>\n <component name=\"TypeScriptGeneratedFilesManager\">\n <option name=\"version\" value=\"3\" />\n </component>\n <component name=\"Vcs.Log.Tabs.Properties\">\n <option name=\"TAB_STATES\">\n <map>\n <entry key=\"MAIN\">\n <value>\n <State />\n </value>\n </entry>\n </map>\n </option>\n </component>\n <component name=\"com.intellij.coverage.CoverageDataManagerImpl\">\n <SUITE FILE_PATH=\"coverage/CPI$run_interaction.coverage\" NAME=\"run_interaction Coverage Results\" MODIFIED=\"1617888322915\" SOURCE_PROVIDER=\"com.intellij.coverage.DefaultCoverageFileProvider\" RUNNER=\"coverage.py\" COVERAGE_BY_TEST_ENABLED=\"true\" COVERAGE_TRACING_ENABLED=\"false\" WORKING_DIRECTORY=\"$PROJECT_DIR$\" />\n </component>\n</project>
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
--- a/.idea/workspace.xml (revision d2a304581cb05de7f85d60774094ec940d9ff199)
+++ b/.idea/workspace.xml (date 1618051977139)
@@ -3,7 +3,6 @@
<component name="ChangeListManager">
<list default="true" id="f877ac68-9cea-46d8-9125-207eebe5b5d6" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
- <change beforePath="$PROJECT_DIR$/run_interaction.py" beforeDir="false" afterPath="$PROJECT_DIR$/run_interaction.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -65,7 +64,7 @@
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1617788646167</updated>
- <workItem from="1617788647548" duration="5550000" />
+ <workItem from="1617788647548" duration="5869000" />
</task>
<servers />
</component>
<changelist name="Uncommitted_changes_before_Update_at_2021_4_11,_11_03_上午_[Default_Changelist]" date="1618110223042" recycled="true" deleted="true">
<option name="PATH" value="$PROJECT_DIR$/.idea/shelf/Uncommitted_changes_before_Update_at_2021_4_11,_11_03_上午_[Default_Changelist]/shelved.patch" />
<option name="DESCRIPTION" value="Uncommitted changes before Update at 2021/4/11, 11:03 上午 [Default Changelist]" />
</changelist>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
......@@ -136,12 +136,13 @@ class Data_Encoder(data.Dataset):
# tokenization
d = self.dbpe.process_line(self.smile[index].strip()).split()
p = self.sps[index].strip().split(',')
y = np.float32(self.affinity[index].strip())
y = np.float64(self.affinity[index].strip())
input_seq = [self.begin_id] + d + [self.sep_id] + p + [self.sep_id]
token_type_ids = np.concatenate((np.zeros((len(d) + 2), dtype=np.int), np.ones((len(p) + 1), dtype=np.int)))
token_type_ids = np.pad(token_type_ids, (0, self.max_len-len(input_seq)), 'constant', constant_values=0)
input, input_mask = seq2emb_encoder(input_seq, self.max_len, self.vocab)
return input, y
return torch.from_numpy(input).long(), torch.from_numpy(token_type_ids).long(), y
if __name__ == "__main__":
......
......@@ -202,7 +202,6 @@ class BertEmbeddings(nn.Module):
if inputs_embeds is None:
inputs_embeds = self.word_embeddings(input_ids)
token_type_embeddings = self.token_type_embeddings(token_type_ids)
embeddings = inputs_embeds + token_type_embeddings
if self.position_embedding_type == "absolute":
position_embeddings = self.position_embeddings(position_ids)
......
......@@ -7,7 +7,7 @@ from modeling_bert import BertAffinityModel
from torch.utils.tensorboard import SummaryWriter
import os
from tqdm import tqdm
torch.set_default_tensor_type(torch.DoubleTensor)
def get_task(task_name):
if task_name.lower() == 'train':
......@@ -63,14 +63,14 @@ def train(args, model, dataset):
print('begin training')
# training
for epoch in range(args.epochs):
for i, (input, affinity) in enumerate(data_generator):
for i, (input, token_type_ids, affinity) in enumerate(data_generator):
# use cuda
# input model
if torch.cuda.is_available():
pred_affinity = model(input.cuda().long())
pred_affinity = model(input_ids=input.cuda(), token_type_ids=token_type_ids.cuda())
loss = loss_fct(pred_affinity, affinity.cuda().unsqueeze(-1))
else:
pred_affinity = model(input.long())
pred_affinity = model(input_ids=input, token_type_ids=token_type_ids)
loss = loss_fct(pred_affinity, affinity.unsqueeze(-1))
step += 1
writer.add_scalar('loss', loss, global_step=step)
......@@ -175,12 +175,11 @@ if __name__ == '__main__':
# local test
# args.task = 'train'
# args.savedir='local_test_train'
# args.epochs = 10
# args.lr = 1e-7
# args.config = './config/config_layer_3.json'
# args.shuffle = True
args.task = 'train'
args.savedir='local_test_train'
args.epochs = 10
args.lr = 1e-5
args.config = './config/config_layer_3.json'
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论