gpt4 book ai didi

c - 电子邮件内容分类

转载 作者:行者123 更新时间:2023-11-30 17:28:09 24 4
gpt4 key购买 nike

我想将电子邮件的主题内容分离到一个文本文件中,将其他标题字段分离到下一个文本文件中,最后将邮件正文分离到另一个文本文件中。我的代码可以提取具有单行内容的电子邮件字段。但它没有如果字段有多于一行,则提取。(这是必需的,因为诸如“主题”、“至”等字段可能有多行。)请帮助我...我的代码如下:

程序名称:f2all.c

# include <stdio.h>
# include <string.h>

int main (int argc, char **argv) {

if (argc < 5) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file output_file\n",
argv[0]);
return 1;
}

FILE *ifp = fopen(argv[1],"r");
FILE *ofp1 = fopen(argv[2],"w");/*this points to a file(eg:f.txt),which should contain`contents of subject field only*/
FILE *ofp2= fopen(argv[3],"w");/*this points to a file(eg:g.txt),which should contain contents of all other other header field only*/
FILE *ofp3= fopen(argv[4],"w");/*this points to a file(eg:h.txt),which should contain contents of message body only*/

char *buf = NULL;
char *buf1 = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0;
size_t n = 0;
char *ptr = NULL;

if (ifp==NULL)
{
printf("\nFile cannot be opened\n");
return 1;
}
else
{
while ((read = getline (&buf, &n, ifp)) != -1)
{
if (((ptr=strstr(buf,"Subject:")) != 0))
{
fprintf(ofp1,"%s",(ptr+8)); /* use (ptr + 8) to trim 'Subject:` away */
}
if ((ptr=strstr(buf,"subject :")) != 0)
{
fprintf(ofp1,"%s",(ptr+9));
}

if (((ptr=strstr(buf,"Date:")) != 0)||((ptr=strstr(buf,"From:")) != 0)||((ptr=strstr(buf,"X-cc:")) != 0))
{
fprintf(ofp2,"%s",(ptr+5));
}
if ((ptr=strstr(buf,"X-To:")) != 0)
{
fprintf(ofp2,"%s",(ptr+5));
}
else
{
strcpy(buf1,buf);
fprintf(ofp1,"%s",buf1);

}
}
}
if (buf) /* free memory allocated by getline for buf */
free (buf);
fclose(ofp1);
fclose(ofp2);
fclose(ofp3);
fclose(ifp);

return 0;
}

我编译并运行程序如下:

princy@PRINCY:~/minipjt/SUBJECT$ cc f2all.cf2all.c:在函数“main”中:f2all.c:85:9:警告:内置函数“free”的隐式声明不兼容[默认启用]princy@PRINCY:~/minipjt/SUBJECT$ ./a.out 8.txt f.txt g.txt h.txt段错误(核心转储)

最佳答案

您可以在扫描文件时建立上下文,然后根据该上下文打印到输出文件。否则,您的条件将仅适用于每个 header 条目的第一行。

从这个角度来看,保留输出文件指针数组是有意义的。

您的代码从 header 条目中删除关键字。这意味着读取输出文件时上下文将丢失:xy@example.com 是发件人、收件人还是首选回复地址?

下面基于您的示例实现。

#define _GNU_SOURCE 1

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

enum {
SUBJECT,
HEADER,
BODY,
NONE = -1
};

/*
* Check whether the line starts with any of the given keywords in
* kw. If so, return a pointer to the char after the colon. If not,
* return NULL. The array kw must be terminated with NULL.
*/
const char *is_header(const char *line, const char **kw)
{
while (*kw) {
int l = strlen(*kw);

if (strncmp(line, *kw, l) == 0 && line[l] == ':') {
/* Note: Could use strncasecmp here for case-insenitive matching */
return line + l + 1;
}
kw++;
}

return NULL;
}

const char *header_subject[] = {
"Subject", NULL
};

const char *header_other[] = {
"From", "To", "Date", /* ... */ NULL
};

int main(int argc, char **argv)
{
if (argc < 5) {
fprintf(stderr,
"Error: insufficient input. Usage: %s input_file output_file\n",
argv[0]);
return 1;
}

FILE *ifp;
FILE *ofp[3];

ifp = fopen(argv[1], "r");
ofp[0] = fopen(argv[2], "w");
ofp[1] = fopen(argv[3], "w");
ofp[2] = fopen(argv[4], "w");

/* Omitted: Error checking for file opening / creatinon */

char *buf = NULL;
size_t n;
int context = NONE;

while (getline(&buf, &n, ifp) != -1) {
const char *line = buf;

if (context != BODY) {
/* Check for context if we are not already in the body */
const char *p;

/* Strip white space from string */
while (*line == ' ' || *line == '\t') line++;

if (*line == '\n' || *line == '\r') {
context = BODY; /* An empty line starts the body ... */
continue; /* ... but we don't print it. */
}

p = is_header(buf, header_subject);
if (p) {
line = p;
while (*line == ' ' || *line == '\t') line++;
context = SUBJECT;
}

p = is_header(buf, header_other);
if (p) {
line = p;
while (*line == ' ' || *line == '\t') line++;
context = HEADER;
}
}

if (context != NONE) fprintf(ofp[context], "%s", line);
}

if (buf) free(buf);
fclose(ofp[0]);
fclose(ofp[1]);
fclose(ofp[2]);
fclose(ifp);

return 0;
}

关于c - 电子邮件内容分类,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/26117243/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com