gpt4 book ai didi

c - 如何在 ANSI C 中对字符串结构数组使用快速排序

转载 作者:行者123 更新时间:2023-11-30 20:40:00 25 4
gpt4 key购买 nike

我有一个包含 300 万行的字符串结构。我正在尝试对文件进行排序:

aaaaa

aaaab

aaacc

等等。

我正在尝试进行冒泡排序。我尝试了 10 行,它成功了,但是当我尝试整个 300 万行文件时,它花了 30 多分钟,并且仍在处理中。我决定尝试快速排序。但是,我遇到了一个问题,上面写着:

需要“const char **”,但参数类型为“structlines *”

我该如何解决这个问题?这是我正在做的事情:

#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>

void swap_str_ptrs(char const **arg1, char const **arg2)
{
const char *tmp = *arg1;
*arg1 = *arg2;
*arg2 = tmp;
}

void quicksort_strs(char const *args[], unsigned int len)
{
unsigned int i, pvt=0;

if (len <= 1)
return;

// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);

// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}

// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);

// and invoke on the subsequences. does NOT include the pivot-slot
quicksort_strs(args, pvt++);
quicksort_strs(args+pvt, len - pvt);
}

void main()
{
FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file im reading
typedef struct lines
{
char lines[100]; //size of each line
} lines;
int i = 0;

char buf[256];
static lines myDNA[3354419]; //creates the 3m spots for all lines
while (fgets (buf, sizeof(buf), dnaFile))
{
if (i > 0)
strcpy(myDNA[i].lines, buf); //inserting each line into the struct array

i++;
}

// this is the bubblesort approach, works, but it takes too lon
/**int a;
int total;
char temp[150];
char report[100][150];

for(a=0; a<3354419; a++)
{
for(total=a+1; total<=3354419; total++)
{
if(strcmp(myDNA[a].lines,myDNA[total].lines)>0)
{
strcpy(temp,myDNA[a].lines);
strcpy(myDNA[a].lines,myDNA[total].lines);
strcpy(myDNA[total].lines,temp);
}
}
}*/

quicksort_strs(myDNA, 3354419); //attempt at quicksort, which crashes

}

使用 QSORT

#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>

int compare_function(const void *a,const void *b) {
return (strcmp((char *)a,(char *)b));
}

void main()
{
FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file with 3 million lines
typedef struct lines
{
char lines[100];
} lines;
int i = 0;

char buf[256];
static lines myDNA[3354419]; // array holding the 3 million lines
while (fgets (buf, sizeof(buf), dnaFile))
{
if (i > 0)
strcpy(myDNA[i].lines, buf); //putting each line into array

i++;
}

qsort(myDNA, 1000, 100, compare_function); //qsort works for first 1k lines, after, messed up

int a;
for (a = 0; a < 1000; a++){
printf("%s", myDNA[a].lines); //printing lines
}

}

最佳答案

我稍微修改了问题代码。根据我的测试,以下代码似乎可以根据需要运行(如问题所述)。

#include <stdio.h>  // printf(), fprintf(), fclose(), feof(), fgets(), fopen()
#include <string.h> // memset(), strcmp(), strdup()
#include <stdlib.h> // malloc(), qsort(), free()
#include <errno.h> // errno, ENOMEM, EIO

#define MAX_FILE_LINES 3354419
#define MAX_LINE_SIZE (255+1)

int compare_function(const void *a, const void *b)
{
return(strcmp(*(const char **)a, *(const char **)b));
}

int main(int argC, char *argV[])
{
int rCode = 0;
char *filePath = "hs_alt_HuRef_chr2.fa";
FILE *dnaFile = NULL;
char **myDNA = NULL;
int myDNAcnt = 0;
int index;

/** Allow user to specify the file path on the command-line. **/
if(argC > 1)
filePath=argV[1];

/** Allocate an array (to hold the 3 million lines). **/
errno=0;
myDNA=malloc(MAX_FILE_LINES * sizeof(*myDNA));
if(NULL == myDNA)
{
rCode=errno?errno:ENOMEM;
fprintf(stderr, "malloc() failed. errno:%d\n", errno);
goto CLEANUP;
}
memset(myDNA, 0, MAX_FILE_LINES * sizeof(*myDNA));

/** Open the file. **/
errno=0;
dnaFile=fopen(filePath, "r");
if(NULL == dnaFile)
{
rCode=errno;
fprintf(stderr, "fopen() failed to open \"%s\". errno:%d\n", filePath, errno);
goto CLEANUP;
}

/** Read the file into the array, allocating dynamic memory for each line. **/
for(myDNAcnt=0; myDNAcnt < MAX_FILE_LINES; ++myDNAcnt)
{
char buf[MAX_LINE_SIZE];
char *cp;

if(NULL == fgets(buf, sizeof(buf), dnaFile))
{
if(feof(dnaFile))
break;

rCode=EIO;
fprintf(stderr, "fgets() failed.\n");
goto CLEANUP;
}

cp=strchr(buf, '\n');
if(cp)
*cp='\0';

errno=0;
myDNA[myDNAcnt] = strdup(buf);
if(NULL == myDNA[myDNAcnt])
{
rCode=errno;
fprintf(stderr, "strdup() failed. errno:%d\n", errno);
goto CLEANUP;
}
}

/** Sort the array. **/
qsort(myDNA, myDNAcnt, sizeof(*myDNA), compare_function);

/** Print the resulting sorted array. **/
for(index=0; index < myDNAcnt; index++)
{
printf("%8d: %s\n",index, myDNA[index]); //printing lines
}

CLEANUP:

/** Close the file. **/
if(dnaFile)
fclose(dnaFile);

/** Free the array. **/
if(myDNA)
{
for(index=0; index < myDNAcnt; index++)
{
free(myDNA[index]);
}

free(myDNA);
}

return(rCode);
}

关于c - 如何在 ANSI C 中对字符串结构数组使用快速排序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24847177/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com