gpt4 book ai didi

mysql - 检查插入数据的完整性 - sqlite3

转载 作者:行者123 更新时间:2023-11-29 23:46:42 24 4
gpt4 key购买 nike

我有一个 csv 格式的“大”文件(159 G),我已使用 Sqlite3 和 C 将其插入到数据库表中。现在,实际的 csv 文件有 1,232,799,308 行(10 亿+)。但仅插入了 (123,254,689)。我有三个问题:

1) 如何将插入的数据与 csv 文件中的实际数据进行比较?我的意思是我想知道感兴趣的数据是否来自一个 block (即从第 1 行到第 100,000,000 行)还是随机的?

2) 有没有办法根据采样率(平均值、标准差和误差幅度)知道插入的行是否代表整个总体?

3) 代码是否有问题,只插入了 10% 的行!?

提前感谢大家的帮助:)

代码在C:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <sqlite3.h>

#define INPUTDATA "/nobackup/DSS/clusterdata-2011-1/working_data/task_usage.csv"
#define DATABASE "/nobackup/DSS/clusterdata-2011-1/working_data/google.db"
#define TABLE "CREATE TABLE IF NOT EXISTS task_usageBS(startendtime TEXT, jobID TEXT, taskindex TEXT, machineID TEXT, CPUusage TEXT, MEMusage TEXT, assignedMEM TEXT, unmappedpagecacheMEMusage TEXT, pagecacheMEMusage TEXT, maxMEMusage TEXT, diskIOtimeMEAN TEXT, localdiskspaceusedMEAN TEXT, CPUusagemax TEXT, diskIOtimemax TEXT, cyclesperinstructionCPI TEXT, MEMaccessesperinstructionMAI TEXT, samplingrate TEXT, aggregationtype TEXT, PRIMARY KEY (startendtime, jobID, taskindex), FOREIGN KEY(jobID) REFERENCES job_events(jobID), FOREIGN KEY(taskindex) REFERENCES task_events(taskindex), FOREIGN KEY(machineID) REFERENCES machine_events(machineID))"
#define BUFFER_SIZE 1024

int main(int argc, char **argv) {

sqlite3 * db;
sqlite3_stmt * stmt;
char * sErrMsg = 0;
const char * tail = 0;
int nRetCode;
int n = 0;

clock_t cStartClock;

FILE * pFile;
char sInputBuf [BUFFER_SIZE] = "\0";

char * sTS = 0; /* startendtime */
char * sJI = 0; /* jobID */
char * sTI = 0; /* taskindex */
char * sMI = 0; /* machineid */
char * sCUM = 0; /* CPUusageMEAN */
char * sMU = 0; /* MEMusage */
char * sAM = 0; /* assignedMEM */
char * sUPCMU = 0; /* unmappedpagecacheMEMusage */
char * sPCMU = 0; /* pagecacheMEMusage */
char * sMMU = 0; /* maxMEMusage */
char * sDIOTM = 0; /* diskIOtimeMEAN */
char * sLDSUM = 0; /* localdiskspaceusedMEAN */
char * sCUMAX = 0; /* CPUusagemax */
char * sDIOTMAX = 0; /* diskIOtimemax */
char * sCPI = 0; /* cyclesperinstructionCPI */
char * sMAPI = 0; /* MEMaccessesperinstructionMAI */
char * sSR = 0; /* samplingrate */
char * sAT = 0; /* aggregationtype */



char sSQL [BUFFER_SIZE] = "\0";

/*********************************************/
/* Open the Database and create the Schema */
sqlite3_open(DATABASE, &db);
sqlite3_exec(db, TABLE, NULL, NULL, &sErrMsg);

/*********************************************/
/* Open input file and import into Database*/
cStartClock = clock();

sprintf(sSQL, "INSERT INTO task_usageBS VALUES (@TS, @JI, @TI, @MI, @CUM, @MU, @AM, @UPCMU, @PCMU, @MMU, @DIOTM, @LDSUM, @CUMAX, @DIOTMAX, @CPI, @MAPI, @SR, @AT)");
sqlite3_prepare_v2(db, sSQL, BUFFER_SIZE, &stmt, &tail);
//sqlite3_exec(db, sSQL, NULL, NULL, &sErrMsg);

sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, &sErrMsg);

pFile = fopen (INPUTDATA,"r");
while (!feof(pFile)) {

fgets (sInputBuf, BUFFER_SIZE, pFile);

sTS = strtok (sInputBuf, ",");
sJI = strtok (NULL, ",");
sTI = strtok (NULL, ",");
sMI = strtok (NULL, ",");
sCUM = strtok (NULL, ",");
sMU = strtok (NULL, ",");
sAM = strtok (NULL, ",");
sUPCMU = strtok (NULL, ",");
sPCMU = strtok (NULL, ",");
sMMU = strtok (NULL, ",");
sDIOTM = strtok (NULL, ",");
sLDSUM = strtok (NULL, ",");
sCUMAX = strtok (NULL, ",");
sDIOTMAX = strtok (NULL, ",");
sCPI = strtok (NULL, ",");
sMAPI = strtok (NULL, ",");
sSR = strtok (NULL, ",");
sAT = strtok (NULL, ",");

sqlite3_bind_text(stmt, 1, sTS, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 2, sJI, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 3, sTI, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 4, sMI, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 5, sCUM, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 6, sMU, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 7, sAM, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 8, sUPCMU, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 9, sPCMU, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 10, sMMU, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 11, sDIOTM, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 12, sLDSUM, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 13, sCUMAX, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 14, sDIOTMAX, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 15, sCPI, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 16, sMAPI, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 17, sSR, -1, SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 18, sAT, -1, SQLITE_TRANSIENT);


sqlite3_step(stmt);

sqlite3_clear_bindings(stmt);
sqlite3_reset(stmt);

n++;

}
fclose (pFile);

sqlite3_exec(db, "END TRANSACTION", NULL, NULL, &sErrMsg);

printf("Imported %d records in %4.2f seconds\n", n, (clock() - cStartClock) / (double)CLOCKS_PER_SEC);

sqlite3_finalize(stmt);
sqlite3_close(db);

return 0;
}

最佳答案

如何在sqlite3中使用.import导入数据。

sqlite3 可以从 CSV 文件导入数据,假设满足很少的条件,例如。数字字段中的小数点分隔符与您的区域设置匹配,文本编码也与您的区域设置匹配。当然,目标表中的列数应与 CVS 文件中的字段数匹配。

如果满足这些条件,您可以简单地调用以下代码:

sqlite3 db.sqlite
.import data.csv my_table

其中 db.sqlite 是您的数据库文件,其中包含所有必需的架构,data.csv 是包含源数据的 CSV 文件,my_table 是目标表。

关于mysql - 检查插入数据的完整性 - sqlite3,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25894700/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com