# 5. 训练模型
# 定义模型步长
num_steps = 100001
with tf.compat.v1.Session(graph=graph) as session:
init.run()
average_loss = 0
for step in range(num_skips):
batch_inputs ,batch_labels = generate_batch(batch_size,num_skips,skip_window)
feed_dict = {train_inputs:batch_inputs,train_labels:batch_labels}
_,loss_val = session.run([optimizer,loss],feed_dict=feed_dict)
average_loss += loss_val
if step % 2000 == 0:
if step > 0:
average_loss /= 2000
print("average loss at step:",step,":",average_loss)
average_loss = 0
if step % 10000 == 0:
sim = similary.eval()
valid_word = reverse_dictionary[valid_examples]
top_k = 8
nearest = (-sim[i,:]).argsort()[:top_k]
log_str = "Nearest to %s"% valid_word
for k in range(top_k):
close_word = reverse_dictionary[nearest[k]]
log_str = "%s %s,"%(log_str,close_word)
print(log_str)
final_embeddings = normnalized_embeddings.eval()
# 6. 输出向量
with open('output/word2vect.text',"w",encoding="utf-8") as fw2v:
fw2v.write(str(vocabulary_size) + " " + str(embedding_size) + "\n")
for i in range(final_embeddings.shape[0]):
sword = reverse_dictionary[i]
svector = ""
for j in range(final_embeddings.shape[1]):
svector = svector + " " + str(final_embeddings[i,j])
fw2v.write(sword,svector + "\n")