- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我目前正在构建一个 LSTM 网络来使用 PyTorch 预测时间序列数据。关注 Roman's blog post ,我为单变量时间序列数据实现了一个简单的 LSTM,请参阅下面的类定义。然而,自从我停止向输入数据添加更多特征以来已经有几天了,比如一天中的一个小时、一周中的一天、一年中的一周等等。
class Model(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)
self.linear = nn.Linear(self.hidden_size, self.output_size)
def forward(self, input, future=0, y=None):
outputs = []
# reset the state of LSTM
# the state is kept till the end of the sequence
h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
h_t, c_t = self.lstm(input_t, (h_t, c_t))
output = self.linear(h_t)
outputs += [output]
for i in range(future):
if y is not None and random.random() > 0.5:
output = y[:, [i]] # teacher forcing
h_t, c_t = self.lstm(output, (h_t, c_t))
output = self.linear(h_t)
outputs += [output]
outputs = torch.stack(outputs, 1).squeeze(2)
return outputs
class Optimization:
"A helper class to train, test and diagnose the LSTM"
def __init__(self, model, loss_fn, optimizer, scheduler):
self.model = model
self.loss_fn = loss_fn
self.optimizer = optimizer
self.scheduler = scheduler
self.train_losses = []
self.val_losses = []
self.futures = []
@staticmethod
def generate_batch_data(x, y, batch_size):
for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):
x_batch = x[i : i + batch_size]
y_batch = y[i : i + batch_size]
yield x_batch, y_batch, batch
def train(
self,
x_train,
y_train,
x_val=None,
y_val=None,
batch_size=100,
n_epochs=20,
dropout=0.2,
do_teacher_forcing=None,
):
seq_len = x_train.shape[1]
for epoch in range(n_epochs):
start_time = time.time()
self.futures = []
train_loss = 0
for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
self.optimizer.zero_grad()
loss = self.loss_fn(y_pred, y_batch)
loss.backward()
self.optimizer.step()
train_loss += loss.item()
self.scheduler.step()
train_loss /= batch
self.train_losses.append(train_loss)
self._validation(x_val, y_val, batch_size)
elapsed = time.time() - start_time
print(
"Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."
% (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)
)
def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):
if do_teacher_forcing:
future = random.randint(1, int(seq_len) / 2)
limit = x_batch.size(1) - future
y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])
else:
future = 0
y_pred = self.model(x_batch)
self.futures.append(future)
return y_pred
def _validation(self, x_val, y_val, batch_size):
if x_val is None or y_val is None:
return
with torch.no_grad():
val_loss = 0
batch = 1
for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):
y_pred = self.model(x_batch)
loss = self.loss_fn(y_pred, y_batch)
val_loss += loss.item()
val_loss /= batch
self.val_losses.append(val_loss)
def evaluate(self, x_test, y_test, batch_size, future=1):
with torch.no_grad():
test_loss = 0
actual, predicted = [], []
for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):
y_pred = self.model(x_batch, future=future)
y_pred = (
y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred
)
loss = self.loss_fn(y_pred, y_batch)
test_loss += loss.item()
actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()
predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()
test_loss /= batch
return actual, predicted, test_loss
def plot_losses(self):
plt.plot(self.train_losses, label="Training loss")
plt.plot(self.val_losses, label="Validation loss")
plt.legend()
plt.title("Losses")
您可以找到一些帮助函数,它们可以帮助我在将数据输入到我的 LSTM 网络之前对其进行拆分和格式化。
def to_dataframe(actual, predicted):
return pd.DataFrame({"value": actual, "prediction": predicted})
def inverse_transform(scaler, df, columns):
for col in columns:
df[col] = scaler.inverse_transform(df[col])
return df
def split_sequences(sequences, n_steps):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the dataset
if end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):
y = df['value']
X = df.drop(columns = ['value'])
tarin_ratio = 1 - test_ratio
val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)
return X_train, y_train, X_val, y_val, X_test, y_test
我使用以下数据框来训练我的模型。
# df_train
value weekday monthday hour
timestamp
2014-07-01 00:00:00 10844 1 1 0
2014-07-01 00:30:00 8127 1 1 0
2014-07-01 01:00:00 6210 1 1 1
2014-07-01 01:30:00 4656 1 1 1
2014-07-01 02:00:00 3820 1 1 2
... ... ... ... ...
2015-01-31 21:30:00 24670 5 31 21
2015-01-31 22:00:00 25721 5 31 22
2015-01-31 22:30:00 27309 5 31 22
2015-01-31 23:00:00 26591 5 31 23
2015-01-31 23:30:00 26288 5 31 23
10320 rows × 4 columns
# x_train
weekday monthday hour
timestamp
2014-08-26 16:30:00 1 26 16
2014-08-18 16:30:00 0 18 16
2014-10-22 20:00:00 2 22 20
2014-12-10 08:00:00 2 10 8
2014-07-27 22:00:00 6 27 22
... ... ... ...
2014-08-24 05:30:00 6 24 5
2014-11-24 12:00:00 0 24 12
2014-12-18 06:00:00 3 18 6
2014-07-27 17:00:00 6 27 17
2014-12-05 21:00:00 4 5 21
6192 rows × 3 columns
# y_train
timestamp
2014-08-26 16:30:00 14083
2014-08-18 16:30:00 14465
2014-10-22 20:00:00 25195
2014-12-10 08:00:00 21348
2014-07-27 22:00:00 16356
...
2014-08-24 05:30:00 2948
2014-11-24 12:00:00 16292
2014-12-18 06:00:00 7029
2014-07-27 17:00:00 18883
2014-12-05 21:00:00 26284
Name: value, Length: 6192, dtype: int64
在将时间序列数据转换并拆分为更小的批次后,X 和 y 的训练数据集变为如下:
X_data shape is (6093, 100, 3)
y_data shape is (6093,)
tensor([[[-1.0097, 1.1510, 0.6508],
[-1.5126, 0.2492, 0.6508],
[-0.5069, 0.7001, 1.2238],
...,
[ 1.5044, -1.4417, -1.6413],
[ 1.0016, -0.0890, 0.7941],
[ 1.5044, -0.9908, -0.2087]],
[[-1.5126, 0.2492, 0.6508],
[-0.5069, 0.7001, 1.2238],
[-0.5069, -0.6526, -0.4952],
...,
[ 1.0016, -0.0890, 0.7941],
[ 1.5044, -0.9908, -0.2087],
[ 0.4988, 0.5874, 0.5076]],
[[-0.5069, 0.7001, 1.2238],
[-0.5069, -0.6526, -0.4952],
[ 1.5044, 1.2637, 1.5104],
...,
[ 1.5044, -0.9908, -0.2087],
[ 0.4988, 0.5874, 0.5076],
[ 0.4988, 0.5874, -0.6385]],
...,
[[ 1.0016, 0.9255, -1.2115],
[-1.0097, -0.9908, 1.0806],
[-0.0041, 0.8128, 0.3643],
...,
[ 1.5044, 0.9255, -0.9250],
[-1.5126, 0.9255, 0.0778],
[-0.0041, 0.2492, -0.7818]],
[[-1.0097, -0.9908, 1.0806],
[-0.0041, 0.8128, 0.3643],
[-0.5069, 1.3765, -0.0655],
...,
[-1.5126, 0.9255, 0.0778],
[-0.0041, 0.2492, -0.7818],
[ 1.5044, 1.2637, 0.7941]],
[[-0.0041, 0.8128, 0.3643],
[-0.5069, 1.3765, -0.0655],
[-0.0041, -1.6672, -0.4952],
...,
[-0.0041, 0.2492, -0.7818],
[ 1.5044, 1.2637, 0.7941],
[ 0.4988, -1.2163, 1.3671]]])
tensor([ 0.4424, 0.1169, 0.0148, ..., -1.1653, 0.5394, 1.6037])
最后,为了检查所有这些训练、验证和测试数据集的维度是否正确,我打印了它们的形状。
train shape is: torch.Size([6093, 100, 3])
train label shape is: torch.Size([6093])
val shape is: torch.Size([1965, 100, 3])
val label shape is: torch.Size([1965])
test shape is: torch.Size([1965, 100, 3])
test label shape is: torch.Size([1965])
当我尝试按如下方式构建模型时,我最终得到一个 RuntimeError 指向不一致的输入大小。
model_params = {'train_ratio': 0.8,
'validation_ratio': 0.2,
'sequence_length': 100,
'teacher_forcing': False,
'dropout_rate': 0.2,
'batch_size': 100,
'num_of_epochs': 5,
'hidden_size': 24,
'n_features': 3,
'learning_rate': 1e-3
}
train_ratio = model_params['train_ratio']
val_ratio = model_params['validation_ratio']
seq_len = model_params['sequence_length']
teacher_forcing = model_params['teacher_forcing']
dropout_rate = model_params['dropout_rate']
batch_size = model_params['batch_size']
n_epochs = model_params['num_of_epochs']
hidden_size = model_params['hidden_size']
n_features = model_params['n_features']
lr = model_params['learning_rate']
model = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
optimization = Optimization(model, loss_fn, optimizer, scheduler)
start_time = datetime.now()
optimization.train(x_train, y_train, x_val, y_val,
batch_size=batch_size,
n_epochs=n_epochs,
dropout=dropout_rate,
do_teacher_forcing=teacher_forcing)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-192-6fc406c0113d> in <module>
6
7 start_time = datetime.now()
----> 8 optimization.train(x_train, y_train, x_val, y_val,
9 batch_size=batch_size,
10 n_epochs=n_epochs,
<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)
68 train_loss = 0
69 for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
---> 70 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
71 self.optimizer.zero_grad()
72 loss = self.loss_fn(y_pred, y_batch)
<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)
93 else:
94 future = 0
---> 95 y_pred = self.model(x_batch)
96 self.futures.append(future)
97 return y_pred
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-189-c18d20430910> in forward(self, input, future, y)
17
18 for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
---> 19 h_t, c_t = self.lstm(input_t, (h_t, c_t))
20 output = self.linear(h_t)
21 outputs += [output]
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
963
964 def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
--> 965 self.check_forward_input(input)
966 if hx is None:
967 zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in check_forward_input(self, input)
789 def check_forward_input(self, input: Tensor) -> None:
790 if input.size(1) != self.input_size:
--> 791 raise RuntimeError(
792 "input has inconsistent input_size: got {}, expected {}".format(
793 input.size(1), self.input_size))
RuntimeError: input has inconsistent input_size: got 1, expected 3
我怀疑我当前的 LSTM 模型类不支持具有多个特征的数据,而且我最近一直在尝试不同的方法,但到目前为止都没有成功。随时分享您的想法或为我指出可以帮助我解决此问题的正确方向。
input_t
torch.Size([100, 1, 3])
h_t
torch.Size([100, 24])
c_t
torch.Size([100, 24])
最佳答案
经过几个星期的困惑,我解决了这个问题。这对我来说是一次富有成效的旅程,所以我想分享我的发现。如果您想查看完整的代码演练,请查看 my Medium post就此事。
就像在 Pandas 中一样,我发现当我坚持使用 PyTorch 方式时,事情往往会运行得更快、更顺畅。这两个库都依赖于 NumPy,我确信可以使用 NumPy 数组和函数显式地完成几乎所有的表和矩阵运算。然而,这样做确实消除了这些库提供的所有很好的抽象和性能改进,并将每一步都变成了 CS 练习。它很有趣,直到它不是。
PyTorch 的 TensorDataset 和 DataLoaders 类不是手动塑造所有训练和验证集以将它们传递给模型,而是极大地帮助了我。缩放用于训练和验证的特征和目标集,然后我们有 NumPy 数组。我们可以将这些数组转换为张量,并根据您的要求使用这些张量创建我们的 TensorDataset 或自定义数据集。最后,DataLoaders 允许我们以比其他方式少得多的麻烦来迭代这些数据集,因为它们已经提供了内置的批处理、改组和删除最后一个批处理选项。
train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
train = TensorDataset(train_features, train_targets)
train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)
val = TensorDataset(val_features, val_targets)
val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)
将我们的数据转换为可迭代数据集后,它们可以用于进行小批量训练。我们可以通过 DataLoaders 轻松地迭代它们,而不是显式定义批次或与矩阵运算搏斗,如下所示。
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-2)
train_losses = []
val_losses = []
train_step = make_train_step(model, criterion, optimizer)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
for epoch in range(n_epochs):
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
y_batch = y_batch.to(device)
loss = train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
train_losses.append(training_loss)
with torch.no_grad():
batch_val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.view([batch_size, -1, n_features]).to(device)
y_val = y_val.to(device)
model.eval()
yhat = model(x_val)
val_loss = criterion(y_val, yhat).item()
batch_val_losses.append(val_loss)
validation_loss = np.mean(batch_val_losses)
val_losses.append(validation_loss)
print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")
PyTorch 提供的另一个很酷的功能是
view()
函数,它允许更快和内存有效地 reshape 张量。因为我之前用
batch_first = True
定义了我的 LSTM 模型,特征集的批量张量必须具有(批量大小、时间步长、特征数量)的形状。上面代码中的行
x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
就是这样做。
关于machine-learning - 使用 PyTorch 向 LSTM 提供多个输入以进行时间序列预测,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65144346/
已关闭。此问题不符合Stack Overflow guidelines 。目前不接受答案。 这个问题似乎与 help center 中定义的范围内的编程无关。 . 已关闭 3 年前。 此帖子于去年编辑
据我所知,在使用 GPU 训练和验证模型时,GPU 内存主要用于加载数据,向前和向后。据我所知,我认为 GPU 内存使用应该相同 1) 训练前,2) 训练后,3) 验证前,4) 验证后。但在我的例子中
我正在尝试在 PyTorch 中将两个复数矩阵相乘,看起来 the torch.matmul functions is not added yet to PyTorch library for com
我正在尝试定义二分类问题的损失函数。但是,目标标签不是硬标签0,1,而是0~1之间的一个 float 。 Pytorch 中的 torch.nn.CrossEntropy 不支持软标签,所以我想自己写
我正在尝试让 PyTorch 与 DataLoader 一起工作,据说这是处理小批量的最简单方法,在某些情况下这是获得最佳性能所必需的。 DataLoader 需要一个数据集作为输入。 大多数关于 D
Pytorch Dataloader 的迭代顺序是否保证相同(在温和条件下)? 例如: dataloader = DataLoader(my_dataset, batch_size=4,
PyTorch 的负对数似然损失,nn.NLLLoss定义为: 因此,如果以单批处理的标准重量计算损失,则损失的公式始终为: -1 * (prediction of model for correct
在PyTorch中,new_ones()与ones()有什么区别。例如, x2.new_ones(3,2, dtype=torch.double) 与 torch.ones(3,2, dtype=to
假设我有一个矩阵 src带形状(5, 3)和一个 bool 矩阵 adj带形状(5, 5)如下, src = tensor([[ 0, 1, 2], [ 3, 4,
我想知道如果不在第 4 行中使用“for”循环,下面的代码是否有更有效的替代方案? import torch n, d = 37700, 7842 k = 4 sample = torch.cat([
我有三个简单的问题。 如果我的自定义损失函数不可微会发生什么? pytorch 会通过错误还是做其他事情? 如果我在我的自定义函数中声明了一个损失变量来表示模型的最终损失,我应该放 requires_
我想知道 PyTorch Parameter 和 Tensor 的区别? 现有answer适用于使用变量的旧 PyTorch? 最佳答案 这就是 Parameter 的全部想法。类(附加)在单个图像中
给定以下张量(这是网络的结果 [注意 grad_fn]): tensor([121., 241., 125., 1., 108., 238., 125., 121., 13., 117., 12
什么是__constants__在 pytorch class Linear(Module):定义于 https://pytorch.org/docs/stable/_modules/torch/nn
我在哪里可以找到pytorch函数conv2d的源代码? 它应该在 torch.nn.functional 中,但我只找到了 _add_docstr 行, 如果我搜索conv2d。我在这里看了: ht
如 documentation 中所述在 PyTorch 中,Conv2d 层使用默认膨胀为 1。这是否意味着如果我想创建一个简单的 conv2d 层,我必须编写 nn.conv2d(in_chann
我阅读了 Pytorch 的源代码,发现它没有实现 convolution_backward 很奇怪。函数,唯一的 convolution_backward_overrideable 函数是直接引发错
我对编码真的很陌生,现在我正在尝试将我的标签变成一种热门编码。我已经完成将 np.array 传输到张量,如下所示 tensor([4., 4., 4., 4., 4., 4., 4., 4., 4.
我正在尝试实现 text classification model使用CNN。据我所知,对于文本数据,我们应该使用一维卷积。我在 pytorch 中看到了一个使用 Conv2d 的示例,但我想知道如何
我有一个多标签分类问题,我正试图用 Pytorch 中的 CNN 解决这个问题。我有 80,000 个训练示例和 7900 个类;每个示例可以同时属于多个类,每个示例的平均类数为 130。 问题是我的
我是一名优秀的程序员,十分优秀!