gpt4 book ai didi

c - 简单C程序中有关字符指针的内存问题

转载 作者:行者123 更新时间:2023-11-30 16:42:28 26 4
gpt4 key购买 nike

问题

当我使用大型文本文件(源自项目古腾堡:爱丽丝梦游仙境)时,会出现内存问题,但在一些较小的文本文件(两行测试文本文件和 Maya Angelou 诗)中不会发生内存问题)。

在大型文本文件中,我收到段错误,并且在使用 Valgrind 时报告“大小 1 的无效写入”和“大小 1 的无效读取”。经过检查,它似乎在我编写的一个函数中,该函数从传入的行中获取每个单词。它似乎提示在分配大小为 50 的 block 后单个地址位置为 0 字节。

我查看了我的代码,其中我将 50 个字符分配给 char 指针,但我不确定大文本文件中出了什么问题,而较小的文本文件中没有出问题。另外,看起来奇怪的是,当我在 Debug模式下运行时,我写的它会走到最后并到达我用 feof(fp) 验证的 EOF。

我希望有人能够发现发生了什么以及我错过了什么,因为我不经常用 C 编程。预先感谢您为了解正在发生的事情提供的任何帮助。

计划概述

我取出了我正在编写的程序部分,并将其放入一个简单的 main 中,以使事情变得更清晰,并希望更容易发现问题。该程序大致分为:

  • 传入文本文件
  • 使用“r+”打开文本文件指针
  • 使用 fgets 逐行循环
  • 将每行中的“\n”或“\r”替换为“\0”
  • 循环遍历行并提取每个单词(由 isspace() 标记)直到到达“\0”
  • 哈希单词
  • 使用了自由指针
  • 关闭文件指针

Valgrind 输出显示 getWord() 函数中发生的问题。我已经查看过它并尝试逐个字符地输出它并检查它,但我不明白为什么会发生段错误,而且只在大文本文件中发生。

代码

ma​​in.c

/*
* License: GPLv3
*
* File: main.c
*
* Description: A program.
*
* Author: Brandon Authier (hblkr)
* Date: 6 Aug 2017
* Version: 1.0
*
*/

// Includes for program to run
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>

// Global debug
bool DEBUG = false;

/* A djb2 hash function sourced online.
*/
unsigned long hash(unsigned char *str)
{
unsigned long hash = 5381;
int c;

while (c = *str++)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

return hash;
}

/*
* Using a file pointer, get the first word and then pass a copy of the word
* back
*
* @returns: a boolean of true when word is built
*
* TODO: Find issue in here causing memory error
*
*/
bool getWord(char* line, int* idx, char* word)
{
int wordIdx = 0;

// Build word character by character
for ( ; line[*idx] != '\0'; *idx = (*idx + 1))
{
if (isalpha(line[*idx]) || (line[*idx] == '-'))
{
word[wordIdx++] = tolower(line[*idx]);
}
else if (isspace(line[*idx]))
{
*idx += 1;
return true;
}
}

return true;
}

/*
* Process file. Tokenize each line and process each word.
*
* TODO: Process file.
*/
void processFile(FILE* textFp)
{
// Variables to hold:
// a line for text
// a word once it is parsed
// an index to keep track of the line
char line[1024] = "";
unsigned char* word = malloc(sizeof(unsigned char) * 50);
int* lineIdx = malloc(sizeof(int));
int lineCount = 1;

// Set the line index to keep track of the line
*lineIdx = 0;

while (fgets(line, sizeof(line), textFp) != NULL)
{
// Get line character Count
int charcount = 0;
int wordCount = 1;

for(int m = 0; line[m] != '\0'; m++)
{
// By counting spaces, you can get a rough estimate of how many words
// are in each line. (totalSpaces + 1)
if ((line[m] == ' ') && (line[m - 1] != ' '))
{
wordCount++;
}

if(line[m] != '\n' && line[m] != '\r')
{
charcount++;
}
else
{
line[m] = '\0';
}
}

if (DEBUG == true)
{
fprintf(stdout, "line %d:\n", lineCount);
fprintf(stdout, " words in line: %d\n", wordCount);
fprintf(stdout, " charcount: %d\n", charcount);
fprintf(stdout, " lineIdx: %d\n", *lineIdx);
fprintf(stdout, " value: \"%s\"\n\n", line);
}

// Get word
while (*lineIdx < (charcount - 1))
{
// Sanitize word
for (int i = 0; i < 50; i++)
{
word[i] = '\0';
}

getWord(line, lineIdx, word);
unsigned long hash_output = hash(word);

if (DEBUG == true)
{
fprintf(stdout, "key: %10d,\t", hash_output);
fprintf(stdout, "value: %8s,\t", word);
fprintf(stdout, "lineIdx: %2d\n", *lineIdx);
}
} // End while for word

if (DEBUG == true) { fprintf(stdout, "\n========\n\n"); }

// Reset line index to 0 for new line
*lineIdx = 0;
lineCount++;
} // End while for line

if (DEBUG == true) { if (feof(textFp)) { fprintf(stderr, "Reached FEOF.\n"); } }

// Free pointers
free(lineIdx);
free(word);
}


// Main
int main (int argc, char* argv[])
{
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// VERIFY COMMAND LINE ARGUMENTS NECESSARY FOR PROGRAM
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

// User did not pass in any argument
if (argc == 1)
{
fprintf(stderr, "usage: main afile.txt\n");
exit(-1);
}

// Grab text file, possibly turn on debug, and ignore other arguments
if (argc >= 3)
{
// For debug purposes
if (strcmp("-d", argv[2]) == 0)
{
DEBUG = true;
fprintf(stdout, "+++++++++++++++++++++++++++++++++++++++\n");
fprintf(stdout, "+ [DEBUGGING ON] +\n");
fprintf(stdout, "+++++++++++++++++++++++++++++++++++++++\n\n");
}
}


//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// PROCESS PASSED IN TEXT FILE
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

// Open file for reading
FILE* fp = fopen(argv[1], "r+");

// If fp is NULL, file does not exist
if (fp == 0)
{
fprintf(stderr, "File does not exist.\n");
exit(1);
}
if (DEBUG == true) { fprintf(stdout, "File exists.\n"); }
if (DEBUG == true)
{
fprintf(stdout, "\n");
fprintf(stdout, "================================================================================\n");
}

// Process file
processFile(fp);

// Close file pointer
if (fclose(fp) != 0)
{
fprintf(stderr, "File did not close.\n");
}
if (DEBUG == true) { fprintf(stdout, "File closed.\n"); }
if (DEBUG == true)
{
fprintf(stdout, "================================================================================\n");
fprintf(stdout, "\n");
}


//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// FREE ALL MEMORY THAT HASN'T BEEN FREED YET
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


exit(0);
}

以下函数似乎是问题出现的地方。

getWord()

/*
* Using a file pointer, get the first word and then pass a copy of the word
* back
*
* @returns: a boolean of true when word is built
*
* TODO: Find issue in here causing memory error
*
*/
bool getWord(char* line, int* idx, char* word)
{
int wordIdx = 0;

// Build word character by character
for ( ; line[*idx] != '\0'; *idx = (*idx + 1))
{
if (isalpha(line[*idx]) || (line[*idx] == '-'))
{
word[wordIdx++] = tolower(line[*idx]);
}
else if (isspace(line[*idx]))
{
*idx += 1;
return true;
}
}

return true;
}

错误输出

编译然后运行后,这是我不在 Debug模式下运行时得到的输出(调试对我来说实际上只是详细模式):

./main alice.txt

Segmentation fault (core dumped)

valgrind -q --leak-check=full ./main alice.txt

==7320== Invalid write of size 1
==7320== at 0x400A24: getWord (in /tmp/main)
==7320== by 0x400C7B: processFile (in /tmp/main)
==7320== by 0x400F32: main (in /tmp/main)
==7320== Address 0x51f62e2 is 0 bytes after a block of size 50 alloc'd
==7320== at 0x4C28BF6: malloc (vg_replace_malloc.c:299)
==7320== by 0x400AE5: processFile (in /tmp/main)
==7320== by 0x400F32: main (in /tmp/main)
==7320==
==7320== Invalid read of size 1
==7320== at 0x400972: hash (in /tmp/main)
==7320== by 0x400C87: processFile (in /tmp/main)
==7320== by 0x400F32: main (in /tmp/main)
==7320== Address 0x51f62e2 is 0 bytes after a block of size 50 alloc'd
==7320== at 0x4C28BF6: malloc (vg_replace_malloc.c:299)
==7320== by 0x400AE5: processFile (in /tmp/main)
==7320== by 0x400F32: main (in /tmp/main)
==7320==

文本文件

以下是我测试过该程序的 3 个:

test.txt

This isn't only a test, it's a lot of fun!How did I get-here?... Well, I'm not sure either.

ma​​ya.txt

Pretty women wonder where my secret lies.I'm not cute or built to suit a fashion model's sizeBut when I start to tell them,They think I'm telling lies.I say,It's in the reach of my armsThe span of my hips,The stride of my step,The curl of my lips.I'm a womanPhenomenally.Phenomenal woman,That's me.I walk into a roomJust as cool as you please,And to a man,The fellows stand orFall down on their knees.Then they swarm around me,A hive of honey bees.I say,It's the fire in my eyes,And the flash of my teeth,The swing in my waist,And the joy in my feet.I'm a womanPhenomenally.Phenomenal woman,That's me.Men themselves have wonderedWhat they see in me.They try so muchBut they can't touchMy inner mystery.When I try to show themThey say they still can't see.I say,It's in the arch of my back,The sun of my smile,The ride of my breasts,The grace of my style.I'm a womanPhenomenally.Phenomenal woman,That's me.Now you understandJust why my head's not bowed.I don't shout or jump aboutOr have to talk real loud.When you see me passingIt ought to make you proud.I say,It's in the click of my heels,The bend of my hair,the palm of my hand,The need of my care,'Cause I'm a womanPhenomenally.Phenomenal woman,That's me.

爱丽丝.txt

这是text

最佳答案

comment , Brandon Authier声称发布的代码接近 MCVE ( Minimal, Complete, Verifiable Example ) — 它只有 227 行。

我认为 227 行的文件是必要的两倍多;它不是 MCVE。

下面的代码保存在文件 so-4578-8729-mcve.c 中。它有 96 行,使用以下命令编译时,可以在运行 macOS Sierra 10.12.6 的 Mac 上使用 GCC 7.2.0 和 Valgrind 3.13.0.SVN 进行干净的编译:

$ gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes \
> -Wstrict-prototypes so-4578-8729-mcve.c -o so-4578-8729-mcve
$

它在“爱丽丝梦游仙境”中的 Valgrind 下运行干净:

$ valgrind --suppressions=etc/suppressions-macos-10.12.5 -- \
> so-4578-8729-mcve src/data-files/alice-in-wonderland-pg19033.txt
==12363== Memcheck, a memory error detector
==12363== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==12363== Using Valgrind-3.13.0.SVN and LibVEX; rerun with -h for copyright info
==12363== Command: so-4578-8729-mcve src/data-files/alice-in-wonderland-pg19033.txt
==12363==
==12363==
==12363== HEAP SUMMARY:
==12363== in use at exit: 18,188 bytes in 161 blocks
==12363== total heap usage: 180 allocs, 19 frees, 28,482 bytes allocated
==12363==
==12363== LEAK SUMMARY:
==12363== definitely lost: 0 bytes in 0 blocks
==12363== indirectly lost: 0 bytes in 0 blocks
==12363== possibly lost: 0 bytes in 0 blocks
==12363== still reachable: 0 bytes in 0 blocks
==12363== suppressed: 18,188 bytes in 161 blocks
==12363==
==12363== For counts of detected and suppressed errors, rerun with: -v
==12363== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
$

已修复的代码包括 BLUEPIXY 标识的错误修复在 comment 。相对于 unsigned char 与(plain)char 而言,它也更干净。它没有调试代码或注释。

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static bool getWord(char *line, int *idx, char *word)
{
int wordIdx = 0;

for ( ; line[*idx] != '\0'; *idx = (*idx + 1))
{
if (isalpha((unsigned char)line[*idx]) || (line[*idx] == '-'))
{
word[wordIdx++] = tolower((unsigned char)line[*idx]);
}
else if (isspace((unsigned char)line[*idx]))
{
*idx += 1;
return true;
}
}

return true;
}

static void processFile(FILE *textFp)
{
char line[1024] = "";
char *word = malloc(sizeof(unsigned char) * 50);
int *lineIdx = malloc(sizeof(int));
int lineCount = 1;

*lineIdx = 0;

while (fgets(line, sizeof(line), textFp) != NULL)
{
int charcount = 0;
int wordCount = 1;

for (int m = 0; line[m] != '\0'; m++)
{
if ((line[m] == ' ') && (m == 0 || line[m - 1] != ' '))
{
wordCount++;
}
if (line[m] != '\n' && line[m] != '\r')
{
charcount++;
}
else
{
line[m] = '\0';
}
}

while (*lineIdx < (charcount - 1))
{
for (int i = 0; i < 50; i++)
{
word[i] = '\0';
}
getWord(line, lineIdx, word);
}

*lineIdx = 0;
lineCount++;
}

free(lineIdx);
free(word);
}

int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s afile.txt\n", argv[0]);
exit(-1);
}

FILE *fp = fopen(argv[1], "r+");

if (fp == 0)
{
fprintf(stderr, "Failed to open file '%s' for reading\n", argv[1]);
exit(1);
}

processFile(fp);

if (fclose(fp) != 0)
fprintf(stderr, "Failed to close file '%s'.\n", argv[1]);

return(0);
}

这几乎是最小的;仍可进一步减少。 -Wmissing-prototypes -Wstrict-prototypes 选项要求将函数声明为static — 或在定义之前声明。由于它们不需要在此源文件之外可见,因此它们被设为静态。这样做的一个优点是,编译器可以告诉我没有使用 hash() 的结果,因此可以删除该调用,当删除该调用时,hash() 函数未使用,因此可以将其删除。并不是每个人都使用如此严格的编译选项,但我更喜欢他们给我的保证。

关于c - 简单C程序中有关字符指针的内存问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45788729/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com