gpt4 book ai didi

python - 无法弄清楚如何定义我的 y_test

转载 作者:行者123 更新时间:2023-11-30 09:14:22 26 4
gpt4 key购买 nike

我对 Python 和 sklearn 非常陌生,任何帮助将不胜感激。我之前唯一的经验是使用 mnist,我不确定如何在使用 csv 时定义 y_test。

我已经尝试过其他一些迭代,但到目前为止没有任何效果。我没有包括进口和公用事业。

dataDir = '/content/drive/My Drive/Colab Notebooks/Final/dataQ2/' # Directory with input files
trainFile = 'q2train.csv' # Training examples
labelFile = 'q2label.csv' # Test label
validFile = 'q2valid.csv' # Valid Files

train = pd.read_csv(dataDir+trainFile) # Read training data
valid = pd.read_csv(dataDir+validFile) # Read test data
label = pd.read_csv(dataDir+labelFile) # Unlabeled file data

x_train = train[list(train)[1:]].values
x_test = valid[list(train)[1:]].values

# Specify output directories
modelDir = 'model/' # directory for saved models
outputDir = 'output/' #directory for output files

# Create Directories if needed:
os.makedirs(os.path.dirname(modelDir), exist_ok=True)
os.makedirs(os.path.dirname(outputDir), exist_ok=True)

#Display directory names
print('Models saved in %s' %modelDir)
print('Outputs saved in %s' %outputDir)

models = {} #dictoionary of SciKit-Learn classifiers with non-default parameters
models['NB'] = MultinomialNB()
models['DT'] = DecisionTreeClassifier()
models['RF'] = RandomForestClassifier(n_estimators=100)
models['KNN'] = KNeighborsClassifier(n_neighbors=10, algorithm='brute')
models['SVM'] = SVC(kernel='poly', gamma='auto')
models['LRM'] = LogisticRegression()

#Define function to evaluate classification accuracy
def evaluatePredictions(modelName, actual, predicted):
"""Returns classification accuracy
-Saves confusion matrix in outputDir
-Displays classification report
-Saves predicted classes in pandas data frame 'predictedDF'"""
acc = accuracy_score(actual, predicted) # accuracy
print("Accuracy with test data: %4.2f%%\n" %(100*acc))
print("CONFUSION MATRIX (Rows correspond to True Values):\n")
cm = confusion_matrix(actual, predicted) #confusion_matrix
cm = pd.DataFrame(cm) #convert to pandas data frame
print(cm) # print confusion matrix
cm.to_csv(outputDir+modelName+'confusionMatrix.csv') # save confusion matrix
print("\nCLASSIFICATION REPORT:\n")
print(classification_report(actual, predicted)) #classification report
return acc #returns accuracy

def displayDigits(images, labels, nCols=10):
"""Displays images with labels (nCols per row)
-images: list of vectors with 784 (28/28) grayscale values
-labels: list of labels for images"""
nRows = np.ceil(len(labels)/nCols).astype('int') # number of rows
plt.figure(figsize=(2*nCols,2*nRows)) #figure size
for i in range(len(labels)):
plt.subplot(nRows,nCols,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(images[i].reshape(28, 28), interpolation='nearest')
plt.xlabel(str(labels[i]), fontsize=14)
plt.show()
return


def get_data(trainFile, test_prop=0.2, seed=2019): #I am pretty sure this is line is my issue.
"""returns data for training, testing, and data characteristics"""
data = data_sets[data_set_name]
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=test_prop,
random_state=seed)
nF = X.shape[1] # number of features
nC = len(np.unique(y)) # number of classes
nTrain, nTest = len(y_train), len(y_test)
print("\nData set: %s" %data_set_name)
print("\tNumber of features %d" %nF)
print("\tNumber of output classes = %d" %(nC))
print("\tNumber of training examples = %d" %(nTrain))
print("\tNumber of testing examples = %d" %(nTest))
return X_train, X_test, y_train, y_test, nF, nC, nTrain, nTest

#Train and test Scikit-Learn models
result = [] #stores accuracy and time for training models
predictedTest = pd.DataFrame()
predictedTest['label'] = y_test

for m in models_used:
model = models[m]
print("Training classifier:\n%s\n" %model)

#train model
st = time.time()
model.fit(x_train, y_train)
tTrain = time.time() - st
print("Time to train classifier: %4.2f seconds\n" %(tTrain))

#predict test examples with trained model
st = time.time() # start time for prediction
predicted = model.predict(x_test) #predict test labels with trained model
tTest = time.time() - st #time to predict test examples
print("Time to test classifier: %4.2f seconds\n" %(tTest))

#Save trained model
modelFile = modelDir + m + '.sav' #name for saved Scikit-Learn model file
pickle.dump(model, open(modelFile, 'wb')) #save model
print('Trained model saved as %s\n' %modelFile)

#evaluate prdeiction accuracy on test examples
acc = evaluatePredictions(m, y_test, predicted) # evaluate prediction accuracy

result.append([m, acc, tTrain, tTest]) #record results
predictedTest[m] = predicted #save predicted class
print(60*'='+'\n') #end training and testing for model

提前谢谢您。

最佳答案

我不确定我是否理解正确:当你从预测中得到答案时有效您可以将其传输到 pandadata 框架,然后将其转换为 csv 文件,如下所示:

y_csv={'answer':predict}
y_csv=pd.DataFrame(data=y_csv)
y_csv.to_csv('Filename',index=False)

关于python - 无法弄清楚如何定义我的 y_test,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59132167/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com