现在我们从 1 开始,但在脉冲再次将其推回之前迅速下降到 0。Elman 网络可以解释一些时间依赖性,但它不擅长记住长期依赖性,并且会收敛到该输入的“最常见输出”。
所以为了解决这个问题,我建议使用一个众所周知的网络,它可以很好地处理长期依赖关系,即长短期记忆 (LSTM) rnn,有关更多信息,请参阅torch.nn.LSTM。保持 "h_state = None" 并将 torch.nn.RNN 更改为 torch.nn.LSTM。
完整的代码和情节见下文
import torch
import numpy, math
import matplotlib.pyplot as plt
nofSequences = 5
maxLength = 130
# Generate training data
x_np = numpy.zeros((nofSequences,maxLength,1))
y_np = numpy.zeros((nofSequences,maxLength))
numpy.random.seed(1)
for i in range(0,nofSequences):
startPos = numpy.random.random()*50
for j in range(0,maxLength):
if j>=startPos and j<startPos+10:
x_np[i,j,0] = math.sin((j-startPos)*math.pi/10)
else:
x_np[i,j,0] = 0.0
if j<startPos+10:
y_np[i,j] = 1
else:
y_np[i,j] = 0
# Define the neural network
INPUT_SIZE = 1
class RNN(torch.nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = torch.nn.LSTM(
input_size=INPUT_SIZE,
hidden_size=16, # rnn hidden unit
num_layers=1, # number of rnn layer
batch_first=True,
)
self.out = torch.nn.Linear(16, 2)
def forward(self, x, h_state):
r_out, h_state = self.rnn(x, h_state)
outs = [] # save all predictions
for time_step in range(r_out.size(1)): # calculate output for each time step
outs.append(self.out(r_out[:, time_step, :]))
return torch.stack(outs, dim=1), h_state
# Learn the network
rnn = RNN()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
h_state = None # for initial hidden state
x = torch.Tensor(x_np) # shape (batch, time_step, input_size)
y = torch.Tensor(y_np).long()
torch.manual_seed(2)
numpy.random.seed(2)
for step in range(100):
prediction, h_state = rnn(x, h_state) # rnn output
# !! next step is important !!
h_state = None
慕标5832272
相关分类