gpt4 book ai didi

r - R 中的朴素贝叶斯分类器

转载 作者:行者123 更新时间:2023-12-02 04:44:24 24 4
gpt4 key购买 nike

我正在尝试运行运行此代码的 NaiveBayes 分类器:

library(e1071)
filename <- "file.csv"
data <- read.csv(filename, sep=",")
trainData <- data[1:1000,,drop = FALSE]
testData <- data[1001:1500,,drop = FALSE]
model <- naiveBayes(trainData[,1:49], trainData[,50])
predicted <- predict(model, testData[,-50])

我收到这个错误:

Error in apply(log(sapply(seq_along(attribs), function(v) { : 
dim(X) must have a positive length

我检查了一下,没有发现任何缺失的属性。可能是什么问题?

一些示例数据,应 Mike 博士的要求。请注意,数据包含 50 列(在我原来的帖子中,我使用了 30 列,因为我试图简化事情)

structure(list(X = 1:10, X1 = c(12L, 6L, 6L, 11L, 17L, 7L, 5L, 
3L, 4L, 7L), X2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "A", class = "factor"), X3 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "us", class = "factor"),
X4 = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE,
TRUE), X5 = c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE,
FALSE, FALSE, TRUE), X6 = c(FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE), X7 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DAY", class = "factor"),
X8 = c(159L, 47L, 163L, 51L, 171L, 145L, 31L, 49L, 49L, 154L
), X9 = structure(c(2L, 5L, 2L, 4L, 3L, 2L, 1L, 5L, 1L, 2L
), .Label = c("100-150", "150-200", "50-100", "650-700",
"unknown"), class = "factor"), X10 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), X11 = c(0L, 1L, 0L, 2L, 0L, 0L, 0L,
1L, 0L, 0L), X12 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), X13 = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), X14 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X15 = c(1L, 1L, 2L,
2L, 0L, 1L, 0L, 2L, 2L, 2L), X16 = c(2L, 1L, 2L, 1L, 0L,
2L, 1L, 2L, 1L, 2L), X17 = c(6L, 4L, 6L, 7L, 0L, 4L, 3L,
4L, 4L, 8L), X18 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), X19 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X20 = c(1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X21 = c(7L, 6L, 7L,
3L, 0L, 6L, 6L, 2L, 7L, 7L), X22 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), X23 = c(2L, 2L, 2L, 2L, 0L, 2L, 2L,
2L, 2L, 2L), X24 = c(0L, 1L, 2L, 1L, 0L, 0L, 0L, 1L, 1L,
1L), X25 = c(2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), X26 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X27 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X28 = c(1L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 0L), X29 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), X30 = c(1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 2L,
0L), X31 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X32 = c(2L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L), X33 = c(0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X34 = c(1L, 1L, 2L, 3L, 0L,
2L, 0L, 2L, 2L, 1L), X35 = c(0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 2L, 0L), X36 = c(3L, 3L, 5L, 3L, 0L, 4L, 4L, 7L, 4L,
2L), X37 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X38 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X39 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X40 = c(3L, 1L, 4L, 3L, 0L,
2L, 0L, 3L, 1L, 0L), X41 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), X42 = c(3L, 6L, 3L, 2L, 0L, 2L, 0L, 3L, 2L,
3L), X43 = c(1L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L), X44 = c(1L,
2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), X45 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X46 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), X47 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), X48 = c(0L, 1L, 0L, 1L, 0L, 0L, 1L, 2L, 1L,
0L), X49 = c(2L, 0L, 1L, 1L, 0L, 0L, 2L, 3L, 0L, 4L), target = c(FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
)), .Names = c("X", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
"X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", "X16",
"X17", "X18", "X19", "X20", "X21", "X22", "X23", "X24", "X25",
"X26", "X27", "X28", "X29", "X30", "X31", "X32", "X33", "X34",
"X35", "X36", "X37", "X38", "X39", "X40", "X41", "X42", "X43",
"X44", "X45", "X46", "X47", "X48", "X49", "target"), row.names = c(NA,
10L), class = "data.frame")

根据 Mike 博士的建议,我分解了数据。我仍然得到完全相同的错误。
所以现在的代码是:

data <- read.csv(filename, sep=",")
data[, -which(sapply(data, is.numeric))]<-
as.data.frame(lapply((data[,-which(sapply(data, is.numeric))]), as.factor))
trainData <- data[1:1000,,drop = FALSE]
testData <- data[1001:1500,,drop = FALSE]
model <- naiveBayes(trainData[,1:49], trainData[,50])
predicted <- predict(model, testData[,-50])

最佳答案

您应该确保使用分类变量的因子。以下小示例使用您的数据。

data$target[1:5]<-TRUE
data$target[6:10]<-FALSE
data<-data[sample(1:10),]
# Make sure the target is a factor
data$target<-as.factor(data$target)
# Work with a subset since we only have 10 data points.
data2<-data[,c(2,49:50,51)]
model<-naiveBayes(target~., data=data2[1:5,])
predict(model, data2[6:10,])
table(predict(model, data2[6:10,]), data[6:10, 5])
# FALSE TRUE
# FALSE 0 0
# TRUE 0 5

因此,请务必转换分类变量和对因子的响应,并使用变量的子集。

关于r - R 中的朴素贝叶斯分类器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/20091614/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com