gpt4 book ai didi

c++ - 从文件中读取时遇到问题。似乎过早地达到了 EOF

转载 作者:行者123 更新时间:2023-11-28 08:25:25 28 4
gpt4 key购买 nike

我确信这个问题可以相对容易地解决,但我正在努力寻找问题所在。我的代码只是从文件中读取所有单词,然后将每个单词、单词位置、句子的开头和结尾存储在一个数组中。数组输出到另一个文本文件。

我可以读入直到最后一句话的所有信息,然后我有一个错误。有什么想法吗?

/**
* Programmer: fryeguy
* Course:
* Program: TxtCrawl for MicroSearch
*
* Algorithm:
* TxtCrawl is the component of MicroSearch that reads text
* documents for search terms and stores them for
* indexing
*
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
* 2. Initiate output file for writing.
* 3. Open input file for reading words.
* 4. Until reaching EOF:
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
* 4.c. Reset "get pointer" to startSentence location.
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
* 5. Write wordsFromDoc array to file
* 6. When EOF is reached close the files.
*/

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>

using namespace std;

struct wordProps // stores word info to be placed in array
{
string theWord; // stores the word
int wordPos; // stores the position of word
int startSent; // stores the start point of the sentence
int endSent; // stores the end point of the sentence
};

void countWords(string, int&, int&);

int main()
{

ifstream iFile; // file stream for reading in data
ofstream oFile; // file stream for writing data

string iFileName = "TextFile2.txt"; // name of test file to read from
string oFileName = "OutputFile.txt"; // name of test file to write to
string aLine = ""; // stores a line preceeding a newline character (\n)
string aWord = ""; // stores words from doc for indexing
int charCount = 0; // count of characters in doc
int wordCount = 0; // count of words in doc
int aLineWordCount = 0; // count of words in a single line being processed
int wordBegin = 0; // stores location of word in doc
int startSentence = 0; // stores pointer value for start of sentence
int endSentence = 0; // stores pointer value for end of sentence

/**
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
*/
countWords(iFileName, charCount, wordCount);
cout << "charCount: " << charCount << endl; // DEBUG CODE
cout << "wordCount: " << wordCount << endl; // DEBUG CODE
wordProps wordsFromDoc[wordCount];
cout<< "length of array: " << (sizeof(wordsFromDoc) / sizeof(*wordsFromDoc)) << endl; // DEBUG CODE

/**
* 2. Initiate output file for writing
*/
oFile.open (oFileName.c_str()); // setup output file and write header
oFile << setw(20) << left << "File Name: " << iFileName << endl;
oFile << setw(20) << "---------------------------------------" << endl << endl;

/**
* 3. Open input file for reading words
*/
iFile.open (iFileName.c_str());
if (!iFile.is_open())
cout << "No such file exists!" << endl;
else
{
/**
* 4. Until reaching EOF:
*/
// I have been attempting different counting methods assuming the eof was being reached prematurely
// The results really have not varied with this code
// while (iFile.tellg() != charCount)
while (!iFile.eof())
{
//cout << "count: " << count << endl;
/**
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
*/
startSentence = iFile.tellg();
cout << "startSentence: " << startSentence << endl; // DEBUG CODE

/**
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
*/
getline(iFile, aLine, '.');
cout << aLine << endl; // DEBUG CODE
endSentence = iFile.tellg();
aLine.clear();
cout << "endSentence: " << endSentence << endl; // DEBUG CODE

if (!iFile.is_open())
{
cout << "The if, iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
iFile.close();
iFile.open (iFileName.c_str());
}

/**
* 4.c. Reset "get pointer" to startSentence location.
*/
iFile.seekg(startSentence);
cout << "iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE

/**
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
*/

// As the last line is about to be read there is an error of some sort.
// My guess is that somehow I exceed the end of the file but my startSentence
// and endSentence variables are pointing where I think they should.

for ( ; iFile.tellg() < endSentence; aLineWordCount++)
{
wordsFromDoc[aLineWordCount].wordPos = iFile.tellg();
cout << "wordPos: " << wordsFromDoc[aLineWordCount].wordPos << endl; // DEBUG CODE
iFile >> wordsFromDoc[aLineWordCount].theWord;
cout << "theWord: " << wordsFromDoc[aLineWordCount].theWord << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].startSent = startSentence;
cout << "startSent: " << wordsFromDoc[aLineWordCount].startSent << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].endSent = endSentence;
cout << "endSent: " << wordsFromDoc[aLineWordCount].endSent << endl << endl; // DEBUG CODE
cout << "aLineWordCount: " << aLineWordCount << endl;
} // end for

} // end while !=iFile.eof

// THIS section of code is never reached because of the hang up above.
/**
* 5. Write wordsFromDoc array to file
*/
for (int count = 0; count < aLineWordCount; count++)
{
oFile << setw(20) << left
<< wordsFromDoc[count].theWord << " "
<< wordsFromDoc[count].wordPos << " "
<< wordsFromDoc[count].startSent << " "
<< wordsFromDoc[count].endSent << endl;
}

} // end else

/**
* 6. When EOF is reached close the files.
*/
iFile.close();
oFile.close();

// DEBUG CDODE for verifying results
// for (int count = 0; count < wordCount; count++) {
// cout << "theWord: " << wordsFromDoc[count].theWord << endl;
// cout << "wordPos: " << wordsFromDoc[count].wordPos << endl;
// cout << "startSent: " << wordsFromDoc[count].startSent << endl;
// cout << "endSent: " << wordsFromDoc[count].endSent << endl << endl;
// }

}

/**
* Implement countWords function
*/
void countWords(string theFileName, int &charCount, int &wordCount)
{
string theWord = "";
char theChar = ' ';
fstream inFile;

//count the chars
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
inFile.get(theChar);
while (!inFile.eof())
{
charCount++;
inFile.get(theChar);
}
}
inFile.close();

// count the words
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
while (!inFile.eof())
{
inFile >> theWord;
wordCount++;
}
}
inFile.close();
}

最佳答案

Istream

我查过了。 Istream 没有 getgetline 的化身 一次处理多个分隔符 1 .

其他人也有同样的问题2 . char-by-char IO最多实用的解决方案。其他解决方案涉及编写增强版本 当前的 Istream 方法。

一个想法

  1. 一次将整个文件读入内存。
  2. 删除换行符(任何 CR 或 LF)。
  3. 将文档拆分为以每个特殊字符结尾的行句号分隔符,在将文档写回磁盘时,在每个分隔符后放置一致的标记(LF 或 ETX '\003')。
  4. 现在可以照常处理文档了;但改用已知标记作为分隔符的期间。
  5. 删除包含重新定界文档的临时文件。

一次阅读整个文档不是问题,因为它都在 无论如何,内存最终;将所有单词连接在一起的字符串等于整个文件。一旦将重新分隔的文档写入磁盘,就可以释放内存。

注意事项

1 Istream::获取
2 getline 的多个定界符(在 Code Guru 上的讨论)

关于c++ - 从文件中读取时遇到问题。似乎过早地达到了 EOF,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4234026/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com