PyTorch LSTM 有 nan for MSELoss

我的模型是:


class BaselineModel(nn.Module):

    def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):

        super(BaselineModel, self).__init__()

        self.num_layers = num_layers

        self.hidden_size = hidden_size


        self.lstm = nn.LSTM(input_size=feature_dim,

                            hidden_size=hidden_size, num_layers=num_layers)


    def forward(self, x, hidden):

        lstm_out, hidden = self.lstm(x, hidden)

        return lstm_out, hidden


    def init_hidden(self, batch_size):

        hidden = Variable(next(self.parameters()).data.new(

            self.num_layers, batch_size, self.hidden_size))

        cell = Variable(next(self.parameters()).data.new(

            self.num_layers, batch_size, self.hidden_size))

        return (hidden, cell)

训练看起来像这样:


train_loader = torch.utils.data.DataLoader(

    train_set, batch_size=BATCH_SIZE, shuffle=True, **params)


model = BaselineModel(batch_size=BATCH_SIZE)

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)

loss_fn = torch.nn.MSELoss(reduction='sum')


for epoch in range(250):


    # hidden = (torch.zeros(2, 13, 5),

    #           torch.zeros(2, 13, 5))

    # model.hidden = hidden

    for i, data in enumerate(train_loader):

        hidden = model.init_hidden(13)

        inputs = data[0]

        outputs = data[1]


        print('inputs',  inputs.size())

        # print('outputs', outputs.size())


        # optimizer.zero_grad()

        model.zero_grad()


        # print('inputs', inputs)

        pred, hidden = model(inputs, hidden)


        loss = loss_fn(pred, outputs)


        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()


        print('Epoch: ', epoch, '\ti: ', i, '\tLoss: ', loss)

我已经设置了渐变剪切,这似乎是推荐的解决方案。但即使在第一步之后,我也会得到:


Epoch: 0 i: 0 损失: 张量(nan, grad_fn=)


梵蒂冈之花
浏览 135回答 1
1回答

慕神8447489

我怀疑您的问题与您的输出有关/(如果您展示train_set的示例会有所帮助)。运行下面的代码段没有给出nan,但是在调用之前,我手动强制输出的形状:data[1]loss_fn(pred, outputs)class BaselineModel(nn.Module):    def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):        super(BaselineModel, self).__init__()        self.num_layers = num_layers        self.hidden_size = hidden_size        self.lstm = nn.LSTM(input_size=feature_dim,                            hidden_size=hidden_size, num_layers=num_layers)    def forward(self, x, hidden):        lstm_out, hidden = self.lstm(x, hidden)        return lstm_out, hidden    def init_hidden(self, batch_size):        hidden = Variable(next(self.parameters()).data.new(            self.num_layers, batch_size, self.hidden_size))        cell = Variable(next(self.parameters()).data.new(            self.num_layers, batch_size, self.hidden_size))        return (hidden, cell)model = BaselineModel(batch_size=32)optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)loss_fn = torch.nn.MSELoss(reduction='sum')hidden = model.init_hidden(10)model.zero_grad()pred, hidden = model(torch.randn(2,10,5), hidden)pred.size() #torch.Size([2, 10, 5])outputs = torch.zeros(2,10,5)loss = loss_fn(pred, outputs)lossloss.backward()torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)optimizer.step()print(loss)请注意,nan值的一个常见原因可能与学习阶段的数字稳定性有关,但通常您在看到背离发生之前具有第一步的值,而这里显然不是这种情况。
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python