以下是一个使用TensorFlow实现的word2vec的简单示例代码:
import tensorflow as tf
# 定义训练数据
sentences = [["I", "love", "tensorflow"], ["tensorflow", "is", "awesome"]]
# 构建词汇表
vocab = []
for sentence in sentences:
vocab.extend(sentence)
vocab = list(set(vocab))
# 创建映射表
word2idx = {w: i for i, w in enumerate(vocab)}
idx2word = {i: w for i, w in enumerate(vocab)}
# 转换文本为索引序列
indexed_sentences = [[word2idx[word] for word in sentence] for sentence in sentences]
# 定义模型参数
embedding_dim = 128 # 词嵌入维度
window_size = 2 # 上下文窗口大小
# 定义占位符和变量
center_words = tf.placeholder(tf.int32, shape=[None])
target_words = tf.placeholder(tf.int32, shape=[None])
embeddings = tf.Variable(tf.random_uniform([len(vocab), embedding_dim], -1.0, 1.0))
softmax_weights = tf.Variable(tf.truncated_normal([len(vocab), embedding_dim], stddev=0.1))
softmax_biases = tf.Variable(tf.zeros(len(vocab)))
# 根据中心词预测上下文词的概率分布
center_embeds = tf.nn.embedding_lookup(embeddings, center_words)
loss_per_example = tf.nn.nce_loss(weights=softmax_weights,
biases=softmax_biases,
labels=target_words,
inputs=center_embeds,
num_sampled=len(vocab)//2,
num_classes=len(vocab))
loss = tf.reduce_mean(loss_per_example)
# 定义优化器
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss)
# 开始训练
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(10):
total_loss = 0
for centers, targets in indexed_sentences:
feed_dict = {center_words: centers, target_words: targets}
_, batch_loss = sess.run([train_op, loss], feed_dict=feed_dict)
total_loss += batch_loss
print("Epoch:", epoch+1, "Loss:", total_loss)
# 获取词向量矩阵
trained_embeddings = embeddings.eval()
# 输出词向量结果
for i, embedding in enumerate(trained_embeddings):
word = idx2word[i]
print("Word:", word, "Embedding:", embedding)
以上代码演示了使用TensorFlow实现word2vec的基本步骤,包括数据准备、模型定义、优化器配置和训练过程。请注意,此示例仅用于说明目的,实际应用中可能需要更复杂的模型和调整参数来获得更好的效果。
内容由零声教学AI助手提供,问题来源于学员提问