gpt4 book ai didi

python - Keras vs PyTorch LSTM 不同的结果

转载 作者:行者123 更新时间:2023-12-03 16:18:02 27 4
gpt4 key购买 nike

尝试使用 Keras 和 PyTorch 在同一数据集上获得类似的结果。
数据

from numpy import array
from numpy import hstack

from sklearn.model_selection import train_test_split


# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the dataset
if end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)

def get_data():
# define input sequence
in_seq1 = array([x for x in range(0,500,10)])/1
in_seq2 = array([x for x in range(5,505,10)])/1
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))

n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps

# convert into input/output
X, y = split_sequences(dataset, n_timesteps)
print(X.shape, y.shape)
X_train,x_test,Y_train, y_test = train_test_split(X,y,test_size = 0.2,shuffle=False)

return X_train,x_test,Y_train, y_test
喀拉斯
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

from sklearn.metrics import mean_squared_error

import testing.TimeSeries.datacreator as dc # !!!!change this!!!!
X_train,x_test,Y_train, y_test = dc.get_data()

n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps

# define model
model = Sequential()
model.add(LSTM(1024, activation='relu',
input_shape=(n_timesteps, n_features),
kernel_initializer='uniform',
recurrent_initializer='uniform'))
model.add(Dense(512, activation='relu'))
model.add(Dense(1))
opt = keras.optimizers.Adam(lr=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=keras.optimizers.K.epsilon(),
decay=0.0,
amsgrad=False)
model.compile(optimizer=opt, loss='mse')
# fit model
model.fit(X_train, Y_train, epochs=200, verbose=1,validation_data=(x_test,y_test))

yhat = model.predict(x_test, verbose=0)

mean_squared_error(y_test, yhat)
PyTorch - 模块类
import numpy as np
import torch
import torch.nn.functional as F

from sklearn.metrics import mean_squared_error

import testing.TimeSeries.datacreator as dc # !!!! change this !!!!
X_train,x_test,Y_train, y_test = dc.get_data()
n_features = 2 # this is number of parallel inputs
n_timesteps = 3 # this is number of timesteps

class MV_LSTM(torch.nn.Module):
def __init__(self,n_features,seq_length):
super(MV_LSTM, self).__init__()
self.n_features = n_features # number of parallel inputs
self.seq_len = seq_length # number of timesteps
self.n_hidden = 1024 # number of hidden states
self.n_layers = 1 # number of LSTM layers (stacked)

self.l_lstm = torch.nn.LSTM(input_size = n_features,
hidden_size = self.n_hidden,
num_layers = self.n_layers,
batch_first = True)
# according to pytorch docs LSTM output is
# (batch_size,seq_len, num_directions * hidden_size)
# when considering batch_first = True
self.l_linear = torch.nn.Linear(self.n_hidden*self.seq_len, 512)
# self.l_linear1 = torch.nn.Linear(512, 512)
self.l_linear2 = torch.nn.Linear(512, 1)


def init_hidden(self, batch_size):
# even with batch_first = True this remains same as docs
hidden_state = torch.zeros(self.n_layers,batch_size,self.n_hidden).to(next(self.parameters()).device)
cell_state = torch.zeros(self.n_layers,batch_size,self.n_hidden).to(next(self.parameters()).device)
self.hidden = (hidden_state, cell_state)


def forward(self, x):
batch_size, seq_len, _ = x.size()

lstm_out, self.hidden = self.l_lstm(x,self.hidden)
# lstm_out(with batch_first = True) is
# (batch_size,seq_len,num_directions * hidden_size)
# for following linear layer we want to keep batch_size dimension and merge rest
# .contiguous() -> solves tensor compatibility error
x = lstm_out.contiguous().view(batch_size,-1)
x = F.relu(x)
x = F.relu(self.l_linear(x))
# x = F.relu(self.l_linear1(x))
x = self.l_linear2(x)
return x
PyTorch - 初始化和训练
# create NN
mv_net = MV_LSTM(n_features,n_timesteps)
criterion = torch.nn.MSELoss()
import keras # for epsilon constant
optimizer = torch.optim.Adam(mv_net.parameters(),
lr=1e-3,
betas=[0.9,0.999],
eps=keras.optimizers.K.epsilon(),
weight_decay=0,
amsgrad=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mv_net.to(device)


train_episodes = 200
batch_size = 32
eval_batch_size = 32

for t in range(train_episodes):
# TRAIN
mv_net.train()
for b in range(0,len(X_train),batch_size):
inpt = X_train[b:b+batch_size,:,:]
target = Y_train[b:b+batch_size]

x_batch = torch.tensor(inpt,dtype=torch.float32).to(device)
y_batch = torch.tensor(target,dtype=torch.float32).to(device)

mv_net.init_hidden(x_batch.size(0))

output = mv_net(x_batch)
loss = criterion(output.view(-1), y_batch)

loss.backward()
optimizer.step()
optimizer.zero_grad()

# EVAL
mv_net.eval()
mv_net.init_hidden(eval_batch_size)
acc = 0
for b in range(0,len(x_test),eval_batch_size):
inpt = x_test[b:b+eval_batch_size,:,:]
target = y_test[b:b+eval_batch_size]

x_batch = torch.tensor(inpt,dtype=torch.float32).to(device)
y_batch = torch.tensor(target,dtype=torch.float32).to(device)
mv_net.init_hidden(x_batch.size(0))

output = mv_net(x_batch)
acc += mean_squared_error(y_batch.cpu().detach().numpy(), output.view(-1).cpu().detach().numpy())
print('step:' , t , 'train loss:' , round(loss.item(),3),'eval acc:',round(acc/len(x_test),3))


mv_net.init_hidden(len(x_test))
val = torch.tensor(x_test,dtype=torch.float32).to(device)
otp = mv_net(val)
print(mean_squared_error(y_test, otp.view(-1).cpu().detach().numpy()))
结果
Keras 产生的测试 MSE 几乎为 0,但 PyTorch 大约为 6000,这太不同了
我在 PyTorch 代码中尝试了一些调整,但没有一个能让我接近类似的 keras,即使使用相同的优化参数
我看不出(有点教程)PyTorch 代码有什么问题

最佳答案

我知道已经晚了将近一年。但我遇到了同样的问题,我认为问题如下。从 keras 文档中它说:

return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.



这基本上意味着您的 self.l_linear 的输入形状需要 torch.nn.Linear(1024, 512)而不是 self.n_hidden*self.seq_len, 512 .

现在您还需要像 keras 一样做,并且只使用前向传递中的最后一个输出:
    def forward(self, x):        
batch_size, seq_len, _ = x.size()

lstm_out, self.hidden = self.l_lstm(x,self.hidden)

x = lstm_out[:,-1]
x = torch.nn.functional.relu(x)
x = torch.nn.functional.relu(self.l_linear(x))
x = self.l_linear2(x)
return x

当我运行您的示例时(我需要对其进行一些调整才能使其运行),我会得到非常相似的训练损失。

喀拉斯:

38/38 [==============================] - 0s 6ms/step - loss: 67.6081 - val_loss: 325.9259



火炬:

step: 199 train loss: 41.043 eval acc: 1142.688



我希望这可以帮助其他有类似问题的人。

PS还要注意,keras默认会重置隐藏状态(stateful=False)。

关于python - Keras vs PyTorch LSTM 不同的结果,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56915567/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com