【PyTorch学习笔记】6.循环神经网络

痛定思痛。 2022-09-04 09:49 390阅读 0赞

文章目录

  • 40.时间序列表示
  • 41.循环神经网络
  • 42.RNN Layer使用
    • 42.1 nn.RNN
    • 42.2 nn.RNNCell
    1. 时间序列预测
  • 44.RNN训练难题
  • 45.LSTM Layer使用
    • 45.1 nn.LSTM
    • 45.2 nn.LSTMCell
  • 46.情感分类实战

根据龙良曲Pytorch学习视频整理,视频链接:
【计算机-AI】PyTorch学这个就够了!
(好课推荐)深度学习与PyTorch入门实战——主讲人龙良曲

40.时间序列表示

Sequence representation

  • [seq_len, feature_len]
  • [word, word_vec]
    one-hot
  • [words, word vec]
    sparse
    high-dim
    semantic similarity

word2vec vs Glove

  1. import torch
  2. import torch.nn as nn
  3. from torchnlp.word_to_vector import GloVe
  4. word_to_idx = { 'hello': 0, 'world': 1}
  5. lookup_tensor = torch.tensor([word_to_idx['hello']], dtype=torch.long)
  6. embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings
  7. hello_embed = embeds(lookup_tensor)
  8. print(hello_embed)
  9. """ tensor([[ 0.2565, -0.2827, -0.0259, -1.9533, 0.8330]], grad_fn=<EmbeddingBackward>) """
  10. vectors = GloVe()
  11. print(vectors['hello']) # 2GB文件

torchnlp包安装 pip install pytorch-nlp

41.循环神经网络

Weight sharing
Consistent memory

42.RNN Layer使用

input dim, hidden dim

  1. rnn = nn.RNN(100, 10) # word_dim, memory/h
  2. print(rnn._parameters.keys()) # odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
  3. print(rnn.weight_hh_l0.shape, rnn.weight_ih_l0.shape) # torch.Size([10, 10]) torch.Size([10, 100])
  4. print(rnn.bias_hh_l0.shape, rnn.bias_ih_l0.shape) # torch.Size([10]) torch.Size([10])

42.1 nn.RNN

  • .__init__
    (input_size, hidden_size, num_layers)
  • out, ht = forward(x, h0)
    x: [seq_len, b, word_vec]
    ho/ht: [num_layers, b, h_dim]
    out: [seq_len, b, h_dim]

Single layer RNN

  1. rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
  2. print(rnn) # RNN(100, 20)
  3. x = torch.randn(10, 3, 100)
  4. out, h = rnn(x, torch.zeros(1, 3, 20))
  5. print(out.shape, h.shape) # torch.Size([10, 3, 20]) torch.Size([1, 3, 20])

h是最后一个时间序列所有的memory状态;out是所有时间序列的最后一个memory状态

2 layer RNN

  1. rnn = nn.RNN(100, 10, num_layers=2) # word_dim, memory/h
  2. print(rnn._parameters.keys()) # odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0', 'weight_ih_l1', 'weight_hh_l1', 'bias_ih_l1', 'bias_hh_l1'])
  3. print(rnn.weight_hh_l0.shape, rnn.weight_ih_l0.shape) # torch.Size([10, 10]) torch.Size([10, 100])
  4. print(rnn.bias_hh_l0.shape, rnn.bias_ih_l0.shape) # torch.Size([10]) torch.Size([10])

[T, b, h_dim], [layers, b, h_dim]

  1. rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
  2. print(rnn) # RNN(100, 20, num_layers=4)
  3. x = torch.randn(10, 3, 100)
  4. out, h = rnn(x)
  5. print(out.shape, h.shape) # torch.Size([10, 3, 20]) torch.Size([4, 3, 20])

42.2 nn.RNNCell

  • __init__
    (input_size, hidden_size, num_layers)
  • ht = rnncell(xt, ht_1)
    x: [b, word_vec]
    ht_1/ht: [num_layers, b, h_dim]
    out = torch.stack([h1, h2,…ht])

Functional

  1. cell1 = nn.RNNCell(100, 30)
  2. cell2 = nn.RNNCell(30, 20)
  3. h1 = torch.zeros(3, 30)
  4. h2 = torch.zeros(3, 20)
  5. for xt in x:
  6. h1 = cell1(xt, h1)
  7. h2 = cell2(h1, h2)
  8. print(h1.shape) # torch.Size([3, 30])
  9. print(h2.shape) # torch.Size([3, 20])

43. 时间序列预测

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import torch
  4. import torch.nn as nn
  5. import torch.optim as optim
  6. num_time_steps = 50
  7. input_size = 1
  8. hidden_size = 16
  9. output_size = 1
  10. lr = 0.01
  11. class Net(nn.Module):
  12. def __init__(self):
  13. super(Net, self).__init__()
  14. self.rnn = nn.RNN(
  15. input_size=input_size,
  16. hidden_size=hidden_size,
  17. num_layers=1,
  18. batch_first=True
  19. )
  20. for p in self.rnn.parameters():
  21. nn.init.normal_(p, mean=0.0, std=0.001)
  22. self.linear = nn.Linear(hidden_size, output_size)
  23. def forward(self, x, hidden_prev):
  24. out, hidden_prev = self.rnn(x, hidden_prev)
  25. # [b, seq, h] => [seq, h] b=1
  26. out = out.view(-1, hidden_size)
  27. # [seq, h] => [seq, 1]
  28. out = self.linear(out)
  29. # [seq, 1] => [1, seq, 1]
  30. out = out.unsqueeze(dim=0)
  31. return out, hidden_prev
  32. model = Net()
  33. criterion = nn.MSELoss()
  34. optimizer = optim.Adam(model.parameters(), lr)
  35. hidden_prev = torch.zeros(1, 1, hidden_size)
  36. for iter in range(6000):
  37. start = np.random.randint(3, size=1)[0]
  38. time_steps = np.linspace(start, start + 10, num_time_steps)
  39. data = np.sin(time_steps)
  40. data = data.reshape(num_time_steps, 1)
  41. x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
  42. y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
  43. output, hidden_prev = model(x, hidden_prev)
  44. hidden_prev = hidden_prev.detach() # 改requirse_grad为false,切断反向传播
  45. loss = criterion(output, y)
  46. model.zero_grad()
  47. loss.backward()
  48. # 检查梯度信息
  49. # for p in model.parameters():
  50. # print(p.grad.norm())
  51. # torch.nn.utils.clip_grad_norm_(p, 10)
  52. optimizer.step()
  53. if iter % 100 == 0:
  54. print('Iteration: {} loss {}'.format(iter, loss.item()))
  55. start = np.random.randint(3, size=1)[0]
  56. time_steps = np.linspace(start, start + 10, num_time_steps)
  57. data = np.sin(time_steps)
  58. data = data.reshape(num_time_steps, 1)
  59. x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
  60. y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
  61. predictions = []
  62. input = x[:, 0, :] # [b, seq, feature] => [b, feature]
  63. for _ in range(x.shape[1]):
  64. input = input.view(1, 1, 1)
  65. (pred, hidden_prev) = model(input, hidden_prev)
  66. input = pred # tensor([[[0.8720]]], grad_fn=<UnsqueezeBackward0>)
  67. predictions.append(pred.detach().numpy().ravel()[0])
  68. x = x.data.numpy().ravel()
  69. plt.scatter(time_steps[:-1], x.ravel(), s=90)
  70. plt.plot(time_steps[:-1], x.ravel())
  71. plt.scatter(time_steps[1:], predictions)
  72. plt.show()

在这里插入图片描述

44.RNN训练难题

  • Gradient Exploding
    解决办法:Gradient Clipping
  • Gradient Vanishing
    解决办法:LSTM

45.LSTM Layer使用

45.1 nn.LSTM

  • __init__
    (input_size, hidden_size, num_layers)

LSTM.forward()

  • out, (ht, ct) = lstm(x, [ht_0, ct_0])
    x: [seq_len, b, word_vec]
    h/c: [num_layers, b, h_dim]
    out: [seq_len, b, h_dim]

    lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4)
    print(lstm) # LSTM(100, 20, num_layers=4)
    x = torch.rand(10, 3, 100)
    out, (h, c) = lstm(x)
    print(out.shape, h.shape, c.shape) # torch.Size([10, 3, 20]) torch.Size([4, 3, 20]) torch.Size([4, 3, 20])

45.2 nn.LSTMCell

  • __init__
    (input_size, hidden_size, num_layers)

LSTMCell.forward()

  • ht, ct = lstmcell(xt, [ht_0, ct_0])
    xt: [b, word_vec]
    h/c: [ b, h_dim]

Single layer

  1. cell = nn.LSTMCell(input_size=100, hidden_size=20)
  2. print(cell) # LSTMCell(100, 20)
  3. h = torch.zeros(3, 20)
  4. c = torch.zeros(3, 20)
  5. for xt in x:
  6. h, c = cell(xt, [h, c])
  7. print(h.shape, c.shape) # torch.Size([3, 20]) torch.Size([3, 20])

Two layers

  1. cell1 = nn.LSTMCell(input_size=100, hidden_size=30)
  2. cell2 = nn.LSTMCell(input_size=30, hidden_size=20)
  3. h1 = torch.zeros(3, 30)
  4. c1 = torch.zeros(3, 30)
  5. h2 = torch.zeros(3, 20)
  6. c2 = torch.zeros(3, 20)
  7. for xt in x:
  8. h1, c1 = cell1(xt, [h1, c1])
  9. h2, c2 = cell2(h1, [h2, c2])
  10. print(h2.shape, c2.shape) # torch.Size([3, 20]) torch.Size([3, 20])

46.情感分类实战

Google CoLab

  • Continuous 12 hours
  • free K80 for GPU
  • no need to cross GFW

没有谷歌账号不能使用Colaboratory,码一下

  1. import torch
  2. import torch.nn as nn
  3. import numpy as np
  4. from torchtext import data, datasets
  5. # load dataset
  6. TEXT = data.Field(tokenize='spacy')
  7. LABEL = data.LabelField(dtype=torch.float)
  8. train_data, test_data = datasets.IMDB.split(TEXT, LABEL)
  9. print('len of train data:', len(train_data))
  10. print('len of test data:', len(test_data))
  11. print(train_data.examples[15].text)
  12. print(train_data.examples[15].label)
  13. class RNN(nn.Module):
  14. def __init__(self, vocab_size, embedding_dim, hidden_dim):
  15. super(RNN, self).__init__()
  16. # [0-10001] => [100]
  17. self.embedding = nn.Embedding(vocab_size, embedding_dim)
  18. # [100] => [256]
  19. self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, bidirectional=True, dropout=0.5)
  20. # [25*2] => [1]
  21. self.fc = nn.Linear(hidden_dim*2, 1)
  22. self.dropout = nn.Dropout(0.5)
  23. def forward(self, x):
  24. # [seq, b, 1] => [seq, b, 100]
  25. embedding = self.dropout(self.embedding(x))
  26. # output: [seq, b, hid_dim*2]
  27. # hidden/h, [num_layer*2, b, hid_dim]
  28. # cell/c: [num_layer*2, b, hid_dim]
  29. output, (hidden, cell) = self.rnn(embedding)
  30. # [num_layer*2, b, hid_dim] => 2 of [b, hid_dim] => [b, hid_dim*2]
  31. hidden = torch.cat([hidden[-2], hidden[-1]], dim=1)
  32. # [b, hid_dim*2] => [b, 1]
  33. hidden = self.dropout(hidden)
  34. out = self.fc(hidden)
  35. return out
  36. # load word embedding
  37. rnn = RNN(len(TEXT.vocab), 100, 256)
  38. pretrained_embedding = TEXT.vocab.vectors
  39. print('pretrained_embedding:', pretrained_embedding.shape)
  40. rnn.embedding.weight.data.copy_(pretrained_embedding)
  41. print('embedding layer inited.')
  42. def binary_acc(preds, y):
  43. preds = torch.round((torch.sigmoid(preds)))
  44. correct = torch.eq(preds, y).float()
  45. acc = correct.sum() / len(correct)
  46. return acc
  47. def eval(rnn, iterator, criteon):
  48. avg_acc = []
  49. rnn.eval()
  50. with torch.no_grad():
  51. for batch in iterator:
  52. # [b, 1] => [b]
  53. pred = rnn(batch.text).squeeze(1)
  54. loss = criteon(pred, batch.label)
  55. acc = binary_acc(pred, batch.label).item()
  56. avg_acc.append(acc)
  57. avg_acc = np.array(avg_acc).mean()
  58. print('>>>test:', avg_acc)
  59. def train(rnn, iterator, optimizer, criteon):
  60. avg_acc = []
  61. rnn.train()
  62. for i, batch in enumerate(iterator):
  63. # [seq, b] => [b, 1] => [b]
  64. pred = rnn(batch.text).squeeze(1)
  65. loss = criteon(pred, batch.label)
  66. acc = binary_acc(pred, batch.label).item()
  67. avg_acc.append(acc)
  68. optimizer.zero_grad()
  69. loss.backward()
  70. optimizer.step()
  71. def main():
  72. pass
  73. if __name__ == '__main__':
  74. main()

只是抄了一下代码,并没有动脑子

发表评论

表情:
评论列表 (有 0 条评论,390人围观)

还没有评论,来说两句吧...

相关阅读

    相关 pytorch-循环神经网络

    循环神经网络 本节介绍循环神经网络,下图展示了如何基于循环神经网络实现语言模型。我们的目的是基于当前的输入与过去的输入序列,预测序列的下一个字符。循环神经网络引入一个隐藏