gpt4 book ai didi

java - 使用 Weka 对多个属性进行分类和预测

转载 作者:行者123 更新时间:2023-11-29 08:55:33 27 4
gpt4 key购买 nike

我需要使用 Java/Weka 以编程方式输入 6 个属性并根据该输入分类/预测 3 个属性。我已经想出如何预测 1 个(最后一个)属性,但如何更改它以同时训练和预测最后 3 个属性?

.arff 文件中的数字对应于数据库中的电影对象。

这是我的 Java 代码:

import java.io.BufferedReader;
import java.io.FileReader;

import weka.classifiers.meta.FilteredClassifier;
import weka.classifiers.trees.DecisionStump;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.classifiers.trees.RandomTree;
import weka.core.Instances;
import weka.filters.unsupervised.attribute.Remove;

public class WekaTrial {

/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {

// Create training data instance
Instances training_data = new Instances(
new BufferedReader(
new FileReader(
"C:/Users/Me/Desktop/File_Project/src/movie_training.arff")));
training_data.setClassIndex(training_data.numAttributes() - 1);

// Create testing data instance
Instances testing_data = new Instances(
new BufferedReader(
new FileReader(
"C:/Users/Me/Desktop/FileProject/src/movie_testing.arff")));
testing_data.setClassIndex(training_data.numAttributes() - 1);

// Print initial data summary
String summary = training_data.toSummaryString();
int number_samples = training_data.numInstances();
int number_attributes_per_sample = training_data.numAttributes();
System.out.println("Number of attributes in model = "
+ number_attributes_per_sample);
System.out.println("Number of samples = " + number_samples);
System.out.println("Summary: " + summary);
System.out.println();

// a classifier for decision trees:
J48 j48 = new J48();

// filter for removing samples:
Remove rm = new Remove();
rm.setAttributeIndices("1"); // remove 1st attribute

// filtered classifier
FilteredClassifier fc = new FilteredClassifier();
fc.setFilter(rm);
fc.setClassifier(j48);

// Create counters and print values
float correct = 0;
float incorrect = 0;

// train using stock_training_data.arff:
fc.buildClassifier(training_data);

// test using stock_testing_data.arff:
for (int i = 0; i < testing_data.numInstances(); i++) {
double pred = fc.classifyInstance(testing_data.instance(i));
System.out.print("Expected values: "
+ testing_data.classAttribute().value(
(int) testing_data.instance(i).classValue()));
System.out.println(", Predicted values: "
+ testing_data.classAttribute().value((int) pred));
// Increment correct/incorrect values
if (testing_data.classAttribute().value(
(int) testing_data.instance(i).classValue()) == testing_data.classAttribute().value((int) pred)) {
correct += 1;
} else {
incorrect += 1;
}
}

// Print correct/incorrect
float percent_correct = correct/(correct+incorrect)*100;
System.out.println("Number correct: " + correct + "\nNumber incorrect: " + incorrect + "\nPercent correct: " +
percent_correct + "%");

}

}

这是我的 .arff 训练文件(删除了多余的行):

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,18,18,18,18,18,18,18,18
28,18,36,18,53,10769,18,53,10769
37,37,37,28,12,14,28,12,14
27,53,27,18,10749,10769,27,53,27
12,12,12,35,10751,35,12,12,12
35,18,10749,18,18,18,35,18,10749
28,12,878,53,53,53,53,53,53
18,18,18,28,37,10769,18,18,18
18,53,18,28,12,35,18,53,18
28,80,53,80,18,10749,28,80,53
18,10749,18,18,10756,18,18,10756,18
18,10749,10769,28,12,878,18,10749,10769
18,10756,18,16,35,10751,16,35,10751
35,18,10751,35,18,10752,35,18,10751

和 .arff 测试文件:

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,27,53,18,53,10756,18,27,53
35,18,10749,18,10769,18,18,10769,18
16,878,53,16,18,16,16,18,16
35,10749,10757,18,18,18,18,18,18
80,18,10748,18,10749,18,18,10749,18
28,18,36,35,18,10751,28,18,36
18,10749,10769,35,18,10402,35,18,10402
28,12,878,18,10749,10769,18,10749,10769
35,10749,35,14,10402,10751,14,10402,10751

最佳答案

如果我对你的理解是正确的,你有一个“多类”或“多目标”问题。您有几个简单的选择来解决问题:

  1. 创建一个包含所有 3 个(decision_one、decision_two 和 decision_three 的串联)的新目标类

  2. 分别训练每个目标。

关于java - 使用 Weka 对多个属性进行分类和预测,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/20447933/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com