


具体怎么做呢?还是用LSTM。每个单词有不同的字母组成,比如 apple 由a p p l e构成,我们同样给这些字符词向量,这样形成了一个长度为5的序列,然后传入另外一个LSTM网络,只取最后输出的状态层作为它的一种字符表达,我们并不需要关心到底提取出来的字符表达是什么样的,在learning的过程中这些都是会被更新的参数,使得最终我们能够正确预测。

1 import torch
  2 import torch.nn.functional as F
  3 from torch import nn, optim
  4 from torch.autograd import Variable
  6 training_data = [("The dog ate the apple".split(),
  7                   ["DET", "NN", "V", "DET", "NN"]),
  8                  ("Everybody read that book".split(), ["NN", "V", "DET",
  9                                                        "NN"])]
 10 # 每个单词就用一个数字表示,每种词性也用一个数字表示
 11 word_to_idx = {}
 12 tag_to_idx = {}
 13 for context, tag in training_data:
 14     for word in context:
 15         if word not in word_to_idx:
 16             # 对词进行编码
 17             word_to_idx[word] = len(word_to_idx)
 18     for label in tag:
 19         if label not in tag_to_idx:
 20             # 对词性编码
 21             tag_to_idx[label] = len(tag_to_idx)
 22 alphabet = 'abcdefghijklmnopqrstuvwxyz'
 23 character_to_idx = {}
 24 for i in range(len(alphabet)):
 25     # 对字母编码
 26     character_to_idx[alphabet[i]] = i
 28 # 字符LSTM
 29 class CharLSTM(nn.Module):
 30     def __init__(self, n_char, char_dim, char_hidden):
 31         super(CharLSTM, self).__init__()
 32         self.char_embedding = nn.Embedding(n_char, char_dim)
 33         self.char_lstm = nn.LSTM(char_dim, char_hidden, batch_first=True)
 35     def forward(self, x):
 36         x = self.char_embedding(x)
 37         _, h = self.char_lstm(x)
 38         # 取隐层
 39         return h[0]
 42 class LSTMTagger(nn.Module):
 43     def __init__(self, n_word, n_char, char_dim, n_dim, char_hidden, n_hidden,
 44                  n_tag):
 45         super(LSTMTagger, self).__init__()
 46         self.word_embedding = nn.Embedding(n_word, n_dim)
 47         self.char_lstm = CharLSTM(n_char, char_dim, char_hidden)
 48         self.lstm = nn.LSTM(n_dim + char_hidden, n_hidden, batch_first=True)
 49         self.linear1 = nn.Linear(n_hidden, n_tag)
 51     def forward(self, x, word):
 52         char = torch.FloatTensor()
 53         for each in word:
 54             char_list = []
 55             for letter in each:
 56                 # 对词进行字母编码
 57                 char_list.append(character_to_idx[letter.lower()])
 58             char_list = torch.LongTensor(char_list)
 59             char_list = char_list.unsqueeze(0)
 60             if torch.cuda.is_available():
 61                 tempchar = self.char_lstm(Variable(char_list).cuda())
 62             else:
 63                 tempchar = self.char_lstm(Variable(char_list))
 64             tempchar = tempchar.squeeze(0)
 65             char = torch.cat((char, tempchar.cpu().data), 0)
 66         if torch.cuda.is_available():
 67             char = char.cuda()
 68         char = Variable(char)
 69         x = self.word_embedding(x)
 70         x = torch.cat((x, char), 1) # char编码与word编码cat
 71         x = x.unsqueeze(0)
 72         # 取输出层 句长*n_hidden
 73         x, _ = self.lstm(x)
 74         x = x.squeeze(0)
 75         x = self.linear1(x)
 76         y = F.log_softmax(x)
 77         return y
 80 model = LSTMTagger(
 81     len(word_to_idx), len(character_to_idx), 10, 100, 50, 128, len(tag_to_idx))
 82 if torch.cuda.is_available():
 83     model = model.cuda()
 84 criterion = nn.CrossEntropyLoss()
 85 optimizer = optim.SGD(model.parameters(), lr=1e-2)
 88 def make_sequence(x, dic):
 89     idx = [dic[i] for i in x]
 90     idx = Variable(torch.LongTensor(idx))
 91     return idx
 94 for epoch in range(300):
 95     print('*' * 10)
 96     print('epoch {}'.format(epoch + 1))
 97     running_loss = 0
 98     for data in training_data:
 99         word, tag = data
100         word_list = make_sequence(word, word_to_idx)
101         tag = make_sequence(tag, tag_to_idx)
102         if torch.cuda.is_available():
103             word_list = word_list.cuda()
104             tag = tag.cuda()
105         # forward
106         out = model(word_list, word)
107         loss = criterion(out, tag)
108         running_loss += loss.data[0]
109         # backward 三步常规操作
110         optimizer.zero_grad()
111         loss.backward()
112         optimizer.step()
113     print('Loss: {}'.format(running_loss / len(data)))
114 print()
115 input = make_sequence("Everybody ate the apple".split(), word_to_idx)
116 if torch.cuda.is_available():
117     input = input.cuda()
118 model.eval() #对dropout和batch normalization的操作在训练和测试的时候是不一样
119 out = model(input, "Everybody ate the apple".split())
120 print(out)

首先n_word 和 n_dim来定义单词的词向量维度,n_char和char_dim来定义字符的词向量维度,char_hidden表示CharLSTM输出的维度,n_hidden表示每个单词作为序列输入的LSTM输出维度,最后n_tag表示输出的词性的种类。




预测一下 Everybody ate the apple 这句话每个词的词性,一共有3种词性,DET,NN,V。最后得到的结果为:

