gpt4 book ai didi

c - 连续读取并删除文件中 CR 和 CRLF 之间的字符到缓冲区,然后写入新文件

转载 作者:行者123 更新时间:2023-11-30 17:35:26 26 4
gpt4 key购买 nike

我正在尝试将大小为 836 的文件连续分配到大小为 200 的缓冲区中,并搜索 CR 和 CRLF 之间的字符,然后跳过它们并写入新文件!

像这样的文件

CRCRLF1bb8CRCRLF!DOC...........text text etx>html text ...........text text etx...........text text etx....>CRCRLF1704CRCRLFtexte classes=====.......>.................>CRCRLF0CRCRLFCRCRLF/EOF
#include <stdlib.h>
#include <stdio.h>
int main()
{
const int BUF_SIZE = 200;
FILE *fptr_in;
FILE *fptr_out;
char buffer[BUF_SIZE + 1]={0};
char CRLF[BUF_SIZE]={0};
char lastChar = '\0';
int i = 0, j = 0, z = 0, n = 0, sub;
size_t result = 0;
long lSize;
if((fptr_in = fopen("LogFile_ProxyBufferContents_FJ_small.html", "r")) == NULL){
printf ("\nError opening file");
return 0;
}
else{
while(fgetc(fptr_in) != EOF){
n++;
}
if(feof(fptr_in)){
printf_s("\nTotal number of bytes read: %d", n);
printf_s(" Bytes.\n");
}
}
if((fptr_out = fopen("LogFile_ProxyBufferContents_Out.html", "w")) == NULL){
fclose(fptr_in);
return 0;
}
// Obtain the File size
fseek(fptr_in, 0, SEEK_END);
lSize = ftell(fptr_in);
rewind(fptr_in);
// Buffer Null check
if(buffer == NULL){
fclose(fptr_in);
return 0;
}
// Read File into Buffer by result size
while((result = fread_s(buffer, bufSize, 1, bufSize, fptr_in)) != 0){
while(i < (long)result){
if(buffer[i] == '\r' && buffer[i + 1] == '\n'){
if(buffer[i + 6] == '\n'){
i += 6;
}
else if(buffer[i + 6] == '\r'){
i += 7;
}
}
else{
sub = z -i;
CRLF[j] = buffer[i];
j++;
}
i++;
}

fclose(fptr_in);
fclose(fptr_out);
//printf("\nBuffer after removing CRLF %s\n", CRLF);
system("pause");
return 0;
}

那么如果开头的 CR CRLF block 没有问题,但最后没有使用 buffer [i+6] 就无法计算出来?

我的想法是,如果 ii+1 为 true,则检查 next 6 buffer[i] != '\0' ,(我使用 6,因为每次保证 CR 和 CRLF 之间有 3 或 4 个字符)如果是这样,例如,如果缓冲区的结尾是 CR 之后的索引 5,并且下一个 CRLF 位于索引 2 处的下一个缓冲区中,意思是:缓冲区一:texte...text>CR170\0,下一个缓冲区2是4CRCRLF,然后是html的其余部分...我是编程新手,如何跳转到下一个读取 buffer2 并跳过 CR 之前的字符并将其他地方的处理工作保留到文件中,对不起我的英语。请问有什么帮助吗?

编辑:也许我无法正确解释...我想找到第一个 CRLf 和第二个 CRLf,然后跳过中间的十六进制数字,在我的情况下,它是通过代理的套接字缓冲区,它总是带有由 CRLF 包围的 block 的长度。你能建议我应该做什么吗?如果 block 长度位于顶部或中间或最后是可以的,但是如果 block 长度被分成一半我堆栈!!

++++示例文件: https://drive.google.com/file/d/0Bw62NZwp1GSnaG1ydXVHREZibEE/edit?usp=sharing

最佳答案

我的问题的完整代码,我想分享以帮助某人......

#include <stdio.h>
#include <ctype.h>
#include <tchar.h>
#include <windows.h>

const char* LOG_FILE = "ORIGIN.html";
const char* OUT_FILE = "Out.html";

//#define BUF_SIZE 200
#define BUF_SIZE 4096

// Search for CRLF line endings, if CRLF found its return the position of the next CRNL, else return none found
int find_CRLF(TCHAR* buffer, int size) {
int result;
for(TCHAR* pos = buffer; size > 1; ++pos, --size) {
// Return the line followed by CRLF
if(pos[0] == '\r') {
if(pos[1] == '\n') {
result = pos - buffer;
return result;
}
}
}
// None is found
return -1;
}

// Check the given string whether a Hex number
int is_HEX(TCHAR* buffer, int size) {
if(! size){
return 0;
}
for(int i = 0; i < size; ++i) {
if(! iswxdigit(buffer[i])) {
return 0;
}
}
//If buffer is_HEX return True
return 1;
}

// Write the line from input to output if isn't HEX
// buffer input data assumed to start at new line after CRLF or begining of File
int get_Line(TCHAR* buffer, int size, TCHAR* out, int* byteCopied) {
// Declare line by CRLF endings
int line_end = find_CRLF(buffer, size);
// If no end could be found return -1
if(line_end < 0){
return line_end;
}
// If line is_HEX skip 2 lines
if(is_HEX(buffer, line_end)) {
fprintf(stderr, "Hex skipped\n");
return line_end + 2; /* skip */
}
// Else !is_HEX, copy buffer
for(int i = 0; i < line_end + 2; ++i, ++(*byteCopied)) { // Dereference pointer & increment the value pointer
if(buffer[i] == '\r') {
return line_end + 2;
}
out[i] = buffer[i]; /* copy buffer*/
}
return line_end + 2;
}

// Return number of bytes processed, & reminder is non COMPLETE LINES
int filter_Buffer(TCHAR* buffer, int size, TCHAR* out, int* byteCopied) {
TCHAR* pos = buffer;
*byteCopied = 0;
for(;;) {
int next_start = get_Line(pos, size, out + *byteCopied, byteCopied);//out+address, value)
if(next_start < 0) {
return pos - buffer;
}
pos += next_start;
size -= next_start;
}
}

// Handle the Reminder, return Number of unprocessed Bytes in in_buf or -1 on EOF
int filter_BufferFile(FILE* input, FILE* output, TCHAR* in_buf, int in_pos) {
TCHAR out_buf[BUF_SIZE];
int size = fread(in_buf + in_pos, 1, BUF_SIZE - in_pos, input);
size += in_pos;
if(! size) {
return -1;
}
int byteCopied;
int end_pos = filter_Buffer(in_buf, size, out_buf, &byteCopied);
if(end_pos) {
for(int i = 0; i < size - end_pos; ++i) {
in_buf[i] = in_buf[i + end_pos];
}
}
else {
if(size == BUF_SIZE) {
fwrite(in_buf, 1, size, output);
return 0;
}
else {
/* no newline at EOF */
get_Line(in_buf, size, out_buf, &byteCopied);
}
}
//If no check for buffer[i&i+1]!=\r&\n -> minus 2 bytes
//fwrite(out_buf, 1, byteCopied-2, output);
fwrite(out_buf, 1, byteCopied, output);
return size - end_pos;
}

// Call untill the whole input file is processed
void filter_File(FILE* input, FILE* output) {
char in_buf[BUF_SIZE];
int in_pos = 0;
do {
in_pos = filter_BufferFile(input, output, in_buf, in_pos);
} while(in_pos >= 0);
}

int main(void) {

FILE* input = fopen(LOG_FILE, "r");
if(! input) {
return 1;
}

FILE* output = fopen(OUT_FILE, "w");
if(! output) {
return 1;
}

filter_File(input, output);

fclose(output);
fclose(input);

system("pause");
}

演示文件:

Download the file

关于c - 连续读取并删除文件中 CR 和 CRLF 之间的字符到缓冲区,然后写入新文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23004161/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com