gpt4 book ai didi

python - LSTM 实现/过拟合

转载 作者:行者123 更新时间:2023-12-01 04:25:38 25 4
gpt4 key购买 nike

我在实现 LSTM 时遇到问题。我不确定我是否有正确的实现,或者这只是一个过度拟合的问题。我正在使用 LSTM 进行论文评分,对分数从 0 到 10(或其他分数范围)的文本进行评分。我正在使用 ASAP kaggle competition data作为训练数据之一。

然而,主要目标是在大约 500 个样本的私有(private)数据集上实现良好的性能。 500 个样本包括验证集和训练集。我之前做过一些实验并让模型工作,但是在摆弄了一些东西之后,模型不再适合了。该模型根本没有改进。我还使用更多面向对象的代码以更简洁的方式重新实现了代码,但仍然无法重现我之前的结果。

但是,我正在让模型适合我的数据,只是存在巨大的过度拟合。我不确定这是某种实现问题还是只是过度拟合,但我无法让模型工作。在 ASAP 数据论文集 1 上使用 LSTM,我可以得到的最大值是 0.35 kappa。出于某种奇怪的原因,我可以得到一个单层全连接模型具有 0.75 kappa。我认为这是一个实现问题,但我不确定。

这是我的旧代码:

火车.py

import gensim
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import cohen_kappa_score
from torch import nn
import torch.utils.data as data_utils
from torch.optim import Adam

from dataset import AESDataset
from network import Network

from optimizer import Ranger
from qwk import quadratic_weighted_kappa, kappa

batch_size = 32

device = "cuda:0"
torch.manual_seed(1000)
# Load data from csv
file_name = "data/data_new.csv"
data = pd.read_csv(file_name)
arr = data.to_numpy()
text = arr[:, :2]
text = [str(line[0]) + str(line[1]) for line in text]
text = [gensim.utils.simple_preprocess(line) for line in text]

score = arr[:,2]

score = [sco*6 for sco in score]
score = np.asarray(score, dtype=int)


train_dataset = AESDataset(text_arr=text[:400], scores=score[:400])
test_dataset = AESDataset(text_arr=text[400:], scores=score[400:])

score = torch.tensor(score).view(-1,1).long().to(device)


train_loader = data_utils.DataLoader(train_dataset,shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = data_utils.DataLoader(test_dataset,shuffle=True,batch_size=batch_size, drop_last=True)

out_class = 61

epochs = 1000

model = Network(out_class).to(device)
model.load_state_dict(torch.load("model/best_model"))
y_onehot = torch.FloatTensor(batch_size, out_class).to(device)
optimizer = Adam(model.parameters())
criti = torch.nn.CrossEntropyLoss()
# model, optimizer = amp.initialize(model, optimizer, opt_level="O2")


step = 0

for i in range(epochs):
#Testing
if i % 1 == 0:
total_loss = 0
total_kappa = 0
total_batches = 0
model.eval()
for (text, score) in test_loader:

out = model(text)
out_score = torch.argmax(out, 1)
y_onehot.zero_()
y_onehot.scatter_(1, score, 1)
kappa_l = cohen_kappa_score(score.view(batch_size).tolist(), out_score.view(batch_size).tolist())
score = score.view(-1)
loss = criti(out, score.view(-1))
total_loss += loss
total_kappa += kappa_l
total_batches += 1
print(f"Epoch {i} Testing kappa {total_kappa/total_batches} loss {total_loss/total_batches}")
with open(f"model/epoch_{i}", "wb") as f:
torch.save(model.state_dict(),f)
model.train()
#Training

for (text, score) in train_loader:

optimizer.zero_grad()
step += 1
out = model(text)
out_score = torch.argmax(out,1)
y_onehot.zero_()
y_onehot.scatter_(1, score, 1)
kappa_l = cohen_kappa_score(score.view(batch_size).tolist(),out_score.view(batch_size).tolist())
loss = criti(out, score.view(-1))
print(f"Epoch {i} step {step} kappa {kappa_l} loss {loss}")
loss.backward()
optimizer.step()

数据集.py

import gensim
import torch
import numpy as np

class AESDataset(torch.utils.data.Dataset):
def __init__(self, text_arr, scores):
self.data = text_arr
self.scores = scores
self.w2v_model = ("w2vec_model_all")
self.max_len = 500
def __getitem__(self, item):
vector = []
essay = self.data[item]

pad_vec = [1 for i in range(300)]
for i in range(self.max_len - len(essay)):
vector.append(pad_vec)
for word in essay:
word_vec = pad_vec
try:
word_vec = self.w2v_model[word]
except:
#print(f"Skipping word as word {word} not in dictionary")
word_vec = pad_vec


vector.append(word_vec)
#print(len(vector))
vector = np.stack(vector)
tensor = torch.tensor(vector[:self.max_len]).float().to("cuda")
score = self.scores[item]
score = torch.tensor(score).long().to("cuda").view(1)

return tensor, score

def __len__(self):
return len(self.scores)


网络.py

import torch.nn as nn
import torch

import torch.nn.functional as F

class Network(nn.Module):
def __init__(self, output_size):
super(Network, self).__init__()
self.lstm = nn.LSTM(300,500,1, batch_first=True)
self.dropout = nn.Dropout(p=0.5)
#self.l2 = nn.L2
self.linear = nn.Linear(500,output_size)





def forward(self,x):
x, _ = self.lstm(x)
x = x[:,-1,:]
x = self.dropout(x)
x = self.linear(x)


return x

我的新代码: https://github.com/Clement-Hui/EssayGrading

最佳答案

我认为问题出在训练代码中,因为您使用的是 LSTM,您应该在每个 epoch 之后刷新隐藏和单元状态,并在每批之后将其从计算图中分离出来。

网络.py

import torch.nn as nn
import torch

import torch.nn.functional as F

class Network(nn.Module):
def __init__(self, output_size):
super(Network, self).__init__()
self.lstm = nn.LSTM(300,500,1, batch_first=True)
self.dropout = nn.Dropout(p=0.5)
#self.l2 = nn.L2
self.linear = nn.Linear(500,output_size)

def forward(self,x,hidden):
x, hidden = self.lstm(x,hidden)
x = x.contiguous().view(-1, 500)
x = self.dropout(x)
x = self.linear(x)
return x , hidden

def init_hidden(self,batch_size):
weights = next(self.parameters()).data
hidden = (weights.new(1 , batch_size,500).zero_().cuda(),
weights.new(1 , batch_size,500).zero_().cuda())
return hidden

火车.py
# your code for intializing the model and data and all other stuff
for i in range(epochs):

#Testing
if i % 1 == 0:
total_loss = 0
total_kappa = 0
total_batches = 0
model.eval()
val_h = model.init_hidden(batch_size) # intialize the hidden state
for (text, score) in test_loader:
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
val_h = tuple([each.data for each in val_h])
out , val_h = model(text,val_h)
out_score = torch.argmax(out, 1)
y_onehot.zero_()
y_onehot.scatter_(1, score, 1)
kappa_l = cohen_kappa_score(score.view(batch_size).tolist(), out_score.view(batch_size).tolist())
score = score.view(-1)
loss = criti(out, score.view(-1))
total_loss += loss
total_kappa += kappa_l
total_batches += 1
print(f"Epoch {i} Testing kappa {total_kappa/total_batches} loss {total_loss/total_batches}")
with open(f"model/epoch_{i}", "wb") as f:
torch.save(model.state_dict(),f)
model.train()

#Training
h = model.init_hidden(batch_size) # intialize the hidden state
for (text, score) in train_loader:
optimizer.zero_grad()
step += 1
# Creating new variables for the hidden state, otherwise
# we'd backprop through the entire training history
h = tuple([each.data for each in h])
out , h = model(text,h)
out_score = torch.argmax(out,1)
y_onehot.zero_()
y_onehot.scatter_(1, score, 1)
kappa_l = cohen_kappa_score(score.view(batch_size).tolist(),out_score.view(batch_size).tolist())
loss = criti(out, score.view(-1))
print(f"Epoch {i} step {step} kappa {kappa_l} loss {loss}")
loss.backward()
optimizer.step()

请让我知道提到的更改是否有效。

关于python - LSTM 实现/过拟合,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58781515/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com