gpt4 book ai didi

c - 检测文件 A-Z 和 AA-ZZ 中的字母

转载 作者:塔克拉玛干 更新时间:2023-11-03 04:44:38 26 4
gpt4 key购买 nike

我的问题是一个相当复杂的问题。我正在尝试创建一个程序,该程序将从标准输入(控制台)或文件中读取输入,然后检测每个字母和每个字母对的出现次数,输出将转到逗号分隔值 (output.csv) 文件或格式化文本文件(使用选项 -c)

我想让它检测 AA-ZZ,其中 ab =/= ba这意味着输出 .csv 将有 704 行。第一个值将是字母或字母对。第二个值是找到的那个字母的总数,第三个是这个特定字母或字母对占字母总数的百分比。

示例 .csv 文件

301108,"letters"
"a",23858,7.92340
"b",5017,1.66618
"c",6697,2.22412
"d",15027,4.99057
"e",36243,12.03654
"f",6113,2.03017
"g",6699,2.22478
"h",19838,6.58833
"i",19165,6.36483
"j",662,0.21985
"k",3070,1.01957
"l",12294,4.08292
"m",7309,2.42737
"n",20475,6.79989
"o",23600,7.83772
"p",4825,1.60241
"q",180,0.05978
"r",15584,5.17555
"s",18060,5.99785
"t",29362,9.75132
"u",9107,3.02450
"v",2433,0.80802
"w",8111,2.69372
"x",412,0.13683
"y",6809,2.26131
"z",158,0.05247
228914,"letter-pairs"
"aa",0,0.00000
"ab",391,0.17081
"ac",806,0.35210
"ad",1261,0.55086
"ae",1,0.00044
"af",200,0.08737
"ag",483,0.21100
"ah",19,0.00830
"ai",1152,0.50325
"aj",6,0.00262
"ak",346,0.15115
"al",1438,0.62818
"am",528,0.23065
"an",5387,2.35329
"ao",8,0.00349
"ap",369,0.16120
"aq",0,0.00000
"ar",1920,0.83874
"as",2409,1.05236
"at",3019,1.31884
"au",313,0.13673
"av",464,0.20270
"aw",392,0.17124
"ax",7,0.00306
"ay",974,0.42549
"az",30,0.01311
"ba",324,0.14154
"bb",73,0.03189
"bc",0,0.00000
"bd",3,0.00131
"be",1537,0.67143
"bf",0,0.00000
"bg",0,0.00000
"bh",0,0.00000
"bi",153,0.06684
"bj",11,0.00481
"bk",0,0.00000
"bl",480,0.20969
"bm",2,0.00087
"bn",1,0.00044
"bo",948,0.41413
"bp",0,0.00000
"bq",0,0.00000
"br",294,0.12843
"bs",45,0.01966
"bt",17,0.00743
"bu",780,0.34074
"bv",0,0.00000
"bw",2,0.00087
"bx",0,0.00000
"by",292,0.12756
"bz",0,0.00000
"ca",808,0.35297
"cb",0,0.00000
"cc",95,0.04150
"cd",4,0.00175
"ce",921,0.40233
"cf",0,0.00000
"cg",0,0.00000
"ch",1183,0.51679
"ci",257,0.11227
"cj",0,0.00000
"ck",1087,0.47485
"cl",264,0.11533
"cm",0,0.00000
"cn",10,0.00437
"co",1120,0.48927
"cp",0,0.00000
"cq",5,0.00218
"cr",287,0.12537
"cs",10,0.00437
"ct",325,0.14197
"cu",229,0.10004
"cv",0,0.00000
"cw",0,0.00000
"cx",0,0.00000
"cy",16,0.00699
"cz",0,0.00000
"da",367,0.16032
"db",10,0.00437
"dc",3,0.00131
"dd",122,0.05330
"de",1298,0.56703
"df",16,0.00699
"dg",66,0.02883
"dh",2,0.00087
"di",779,0.34030
"dj",0,0.00000
"dk",3,0.00131
"dl",148,0.06465
"dm",23,0.01005
"dn",189,0.08256
"do",1011,0.44165
"dp",4,0.00175
"dq",2,0.00087
"dr",353,0.15421
"ds",249,0.10877
"dt",1,0.00044
"du",104,0.04543
"dv",33,0.01442
"dw",2,0.00087
"dx",0,0.00000
"dy",256,0.11183
"dz",0,0.00000
"ea",1852,0.80904
"eb",70,0.03058
"ec",660,0.28832
"ed",3495,1.52677
"ee",1093,0.47747
"ef",301,0.13149
"eg",204,0.08912
"eh",56,0.02446
"ei",346,0.15115
"ej",3,0.00131
"ek",34,0.01485
"el",1346,0.58799
"em",639,0.27914
"en",2680,1.17075
"eo",57,0.02490
"ep",358,0.15639
"eq",18,0.00786
"er",4790,2.09249
"es",2117,0.92480
"et",976,0.42636
"eu",7,0.00306
"ev",651,0.28439
"ew",297,0.12974
"ex",240,0.10484
"ey",836,0.36520
"ez",12,0.00524
"fa",377,0.16469
"fb",3,0.00131
"fc",0,0.00000
"fd",0,0.00000
"fe",536,0.23415
"ff",352,0.15377
"fg",0,0.00000
"fh",1,0.00044
"fi",444,0.19396
"fj",0,0.00000
"fk",0,0.00000
"fl",200,0.08737
"fm",0,0.00000
"fn",6,0.00262
"fo",1112,0.48577
"fp",0,0.00000
"fq",0,0.00000
"fr",351,0.15333
"fs",15,0.00655
"ft",251,0.10965
"fu",287,0.12537
"fv",0,0.00000
"fw",2,0.00087
"fx",0,0.00000
"fy",8,0.00349
"fz",0,0.00000
"ga",456,0.19920
"gb",0,0.00000
"gc",0,0.00000
"gd",2,0.00087
"ge",752,0.32851
"gf",0,0.00000
"gg",111,0.04849
"gh",989,0.43204
"gi",280,0.12232
"gj",0,0.00000
"gk",0,0.00000
"gl",214,0.09348
"gm",9,0.00393
"gn",59,0.02577
"go",642,0.28045
"gp",0,0.00000
"gq",0,0.00000
"gr",450,0.19658
"gs",189,0.08256
"gt",14,0.00612
"gu",100,0.04368
"gv",0,0.00000
"gw",1,0.00044
"gx",0,0.00000
"gy",7,0.00306
"gz",0,0.00000
"ha",2977,1.30049
"hb",16,0.00699
"hc",0,0.00000
"hd",1,0.00044
"he",9214,4.02509
"hf",4,0.00175
"hg",0,0.00000
"hh",2,0.00087
"hi",2805,1.22535
"hj",0,0.00000
"hk",0,0.00000
"hl",13,0.00568
"hm",34,0.01485
"hn",13,0.00568
"ho",1320,0.57664
"hp",0,0.00000
"hq",0,0.00000
"hr",236,0.10310
"hs",21,0.00917
"ht",697,0.30448
"hu",533,0.23284
"hv",0,0.00000
"hw",3,0.00131
"hx",0,0.00000
"hy",137,0.05985
"hz",0,0.00000
"ia",119,0.05198
"ib",93,0.04063
"ic",606,0.26473
"id",1157,0.50543
"ie",570,0.24900
"if",496,0.21668
"ig",722,0.31540
"ih",0,0.00000
"ii",42,0.01835
"ij",0,0.00000
"ik",151,0.06596
"il",1063,0.46437
"im",1087,0.47485
"in",5058,2.20956
"io",569,0.24856
"ip",169,0.07383
"iq",6,0.00262
"ir",759,0.33157
"is",2164,0.94533
"it",3017,1.31796
"iu",9,0.00393
"iv",370,0.16163
"iw",0,0.00000
"ix",35,0.01529
"iy",0,0.00000
"iz",66,0.02883
"ja",41,0.01791
"jb",0,0.00000
"jc",0,0.00000
"jd",0,0.00000
"je",40,0.01747
"jf",0,0.00000
"jg",0,0.00000
"jh",0,0.00000
"ji",32,0.01398
"jj",0,0.00000
"jk",0,0.00000
"jl",0,0.00000
"jm",0,0.00000
"jn",0,0.00000
"jo",248,0.10834
"jp",0,0.00000
"jq",0,0.00000
"jr",0,0.00000
"js",0,0.00000
"jt",0,0.00000
"ju",301,0.13149
"jv",0,0.00000
"jw",0,0.00000
"jx",0,0.00000
"jy",0,0.00000
"jz",0,0.00000
"ka",6,0.00262
"kb",3,0.00131
"kc",2,0.00087
"kd",1,0.00044
"ke",844,0.36870
"kf",26,0.01136
"kg",0,0.00000
"kh",0,0.00000
"ki",286,0.12494
"kj",0,0.00000
"kk",0,0.00000
"kl",68,0.02971
"km",0,0.00000
"kn",287,0.12537
"ko",81,0.03538
"kp",0,0.00000
"kq",0,0.00000
"kr",2,0.00087
"ks",141,0.06160
"kt",2,0.00087
"ku",4,0.00175
"kv",0,0.00000
"kw",6,0.00262
"kx",0,0.00000
"ky",152,0.06640
"kz",0,0.00000
"la",1087,0.47485
"lb",9,0.00393
"lc",10,0.00437
"ld",896,0.39141
"le",1955,0.85403
"lf",251,0.10965
"lg",4,0.00175
"lh",8,0.00349
"li",1160,0.50674
"lj",0,0.00000
"lk",113,0.04936
"ll",1987,0.86801
"lm",64,0.02796
"ln",19,0.00830
"lo",1030,0.44995
"lp",40,0.01747
"lq",0,0.00000
"lr",4,0.00175
"ls",230,0.10047
"lt",172,0.07514
"lu",157,0.06858
"lv",55,0.02403
"lw",72,0.03145
"lx",0,0.00000
"ly",981,0.42855
"lz",0,0.00000
"ma",838,0.36608
"mb",148,0.06465
"mc",4,0.00175
"md",0,0.00000
"me",1834,0.80117
"mf",41,0.01791
"mg",0,0.00000
"mh",0,0.00000
"mi",597,0.26080
"mj",0,0.00000
"mk",0,0.00000
"ml",8,0.00349
"mm",89,0.03888
"mn",29,0.01267
"mo",714,0.31191
"mp",293,0.12800
"mq",0,0.00000
"mr",65,0.02839
"ms",318,0.13892
"mt",3,0.00131
"mu",243,0.10615
"mv",0,0.00000
"mw",0,0.00000
"mx",0,0.00000
"my",167,0.07295
"mz",0,0.00000
"na",223,0.09742
"nb",4,0.00175
"nc",553,0.24158
"nd",4516,1.97279
"ne",1596,0.69721
"nf",61,0.02665
"ng",2782,1.21530
"nh",9,0.00393
"ni",554,0.24201
"nj",90,0.03932
"nk",173,0.07557
"nl",149,0.06509
"nm",6,0.00262
"nn",103,0.04500
"no",1466,0.64042
"np",12,0.00524
"nq",23,0.01005
"nr",6,0.00262
"ns",509,0.22235
"nt",2310,1.00911
"nu",112,0.04893
"nv",40,0.01747
"nw",20,0.00874
"nx",10,0.00437
"ny",356,0.15552
"nz",3,0.00131
"oa",175,0.07645
"ob",147,0.06422
"oc",178,0.07776
"od",507,0.22148
"oe",252,0.11009
"of",1653,0.72211
"og",120,0.05242
"oh",128,0.05592
"oi",230,0.10047
"oj",1,0.00044
"ok",423,0.18479
"ol",725,0.31671
"om",1924,0.84049
"on",2891,1.26292
"oo",1207,0.52727
"op",345,0.15071
"oq",9,0.00393
"or",2181,0.95276
"os",486,0.21231
"ot",1103,0.48184
"ou",3592,1.56915
"ov",339,0.14809
"ow",1425,0.62250
"ox",23,0.01005
"oy",333,0.14547
"oz",9,0.00393
"pa",413,0.18042
"pb",3,0.00131
"pc",0,0.00000
"pd",0,0.00000
"pe",918,0.40102
"pf",4,0.00175
"pg",0,0.00000
"ph",39,0.01704
"pi",388,0.16950
"pj",0,0.00000
"pk",3,0.00131
"pl",439,0.19178
"pm",3,0.00131
"pn",0,0.00000
"po",579,0.25293
"pp",338,0.14765
"pq",0,0.00000
"pr",536,0.23415
"ps",99,0.04325
"pt",219,0.09567
"pu",164,0.07164
"pv",0,0.00000
"pw",3,0.00131
"px",0,0.00000
"py",28,0.01223
"pz",0,0.00000
"qa",0,0.00000
"qb",0,0.00000
"qc",0,0.00000
"qd",0,0.00000
"qe",0,0.00000
"qf",0,0.00000
"qg",0,0.00000
"qh",0,0.00000
"qi",0,0.00000
"qj",0,0.00000
"qk",0,0.00000
"ql",0,0.00000
"qm",0,0.00000
"qn",0,0.00000
"qo",0,0.00000
"qp",0,0.00000
"qq",0,0.00000
"qr",0,0.00000
"qs",0,0.00000
"qt",0,0.00000
"qu",180,0.07863
"qv",0,0.00000
"qw",0,0.00000
"qx",0,0.00000
"qy",0,0.00000
"qz",0,0.00000
"ra",896,0.39141
"rb",53,0.02315
"rc",159,0.06946
"rd",469,0.20488
"re",3957,1.72860
"rf",53,0.02315
"rg",96,0.04194
"rh",19,0.00830
"ri",1096,0.47878
"rj",0,0.00000
"rk",158,0.06902
"rl",173,0.07557
"rm",179,0.07820
"rn",393,0.17168
"ro",1391,0.60765
"rp",96,0.04194
"rq",0,0.00000
"rr",277,0.12101
"rs",671,0.29312
"rt",614,0.26822
"ru",226,0.09873
"rv",57,0.02490
"rw",37,0.01616
"rx",0,0.00000
"ry",620,0.27084
"rz",0,0.00000
"sa",899,0.39272
"sb",14,0.00612
"sc",330,0.14416
"sd",15,0.00655
"se",1837,0.80248
"sf",35,0.01529
"sg",9,0.00393
"sh",1172,0.51198
"si",779,0.34030
"sj",2,0.00087
"sk",99,0.04325
"sl",248,0.10834
"sm",146,0.06378
"sn",82,0.03582
"so",1022,0.44646
"sp",451,0.19702
"sq",15,0.00655
"sr",1,0.00044
"ss",651,0.28439
"st",2202,0.96193
"su",504,0.22017
"sv",0,0.00000
"sw",125,0.05461
"sx",0,0.00000
"sy",55,0.02403
"sz",0,0.00000
"ta",774,0.33812
"tb",5,0.00218
"tc",208,0.09086
"td",1,0.00044
"te",2166,0.94621
"tf",25,0.01092
"tg",0,0.00000
"th",9263,4.04650
"ti",1533,0.66968
"tj",0,0.00000
"tk",0,0.00000
"tl",475,0.20750
"tm",25,0.01092
"tn",37,0.01616
"to",3522,1.53857
"tp",1,0.00044
"tq",0,0.00000
"tr",677,0.29574
"ts",733,0.32021
"tt",555,0.24245
"tu",392,0.17124
"tv",0,0.00000
"tw",200,0.08737
"tx",0,0.00000
"ty",267,0.11664
"tz",1,0.00044
"ua",101,0.04412
"ub",121,0.05286
"uc",567,0.24769
"ud",204,0.08912
"ue",166,0.07252
"uf",103,0.04500
"ug",472,0.20619
"uh",0,0.00000
"ui",144,0.06291
"uj",0,0.00000
"uk",2,0.00087
"ul",941,0.41107
"um",205,0.08955
"un",1200,0.52421
"uo",23,0.01005
"up",577,0.25206
"uq",0,0.00000
"ur",1040,0.45432
"us",969,0.42330
"ut",1429,0.62425
"uu",0,0.00000
"uv",23,0.01005
"uw",1,0.00044
"ux",2,0.00087
"uy",7,0.00306
"uz",20,0.00874
"va",129,0.05635
"vb",0,0.00000
"vc",0,0.00000
"vd",0,0.00000
"ve",1899,0.82957
"vf",0,0.00000
"vg",0,0.00000
"vh",0,0.00000
"vi",304,0.13280
"vj",0,0.00000
"vk",0,0.00000
"vl",0,0.00000
"vm",0,0.00000
"vn",0,0.00000
"vo",66,0.02883
"vp",0,0.00000
"vq",0,0.00000
"vr",0,0.00000
"vs",0,0.00000
"vt",0,0.00000
"vu",3,0.00131
"vv",0,0.00000
"vw",0,0.00000
"vx",0,0.00000
"vy",15,0.00655
"vz",0,0.00000
"wa",2115,0.92393
"wb",2,0.00087
"wc",1,0.00044
"wd",18,0.00786
"we",1168,0.51024
"wf",46,0.02009
"wg",0,0.00000
"wh",1155,0.50456
"wi",1177,0.51417
"wj",0,0.00000
"wk",4,0.00175
"wl",36,0.01573
"wm",0,0.00000
"wn",337,0.14722
"wo",813,0.35516
"wp",0,0.00000
"wq",0,0.00000
"wr",46,0.02009
"ws",100,0.04368
"wt",4,0.00175
"wu",1,0.00044
"wv",0,0.00000
"ww",1,0.00044
"wx",0,0.00000
"wy",52,0.02272
"wz",0,0.00000
"xa",33,0.01442
"xb",0,0.00000
"xc",43,0.01878
"xd",0,0.00000
"xe",34,0.01485
"xf",0,0.00000
"xg",0,0.00000
"xh",8,0.00349
"xi",45,0.01966
"xj",0,0.00000
"xk",0,0.00000
"xl",0,0.00000
"xm",0,0.00000
"xn",0,0.00000
"xo",0,0.00000
"xp",65,0.02839
"xq",0,0.00000
"xr",0,0.00000
"xs",0,0.00000
"xt",63,0.02752
"xu",5,0.00218
"xv",18,0.00786
"xw",0,0.00000
"xx",43,0.01878
"xy",0,0.00000
"xz",0,0.00000
"ya",35,0.01529
"yb",127,0.05548
"yc",5,0.00218
"yd",10,0.00437
"ye",353,0.15421
"yf",1,0.00044
"yg",1,0.00044
"yh",8,0.00349
"yi",115,0.05024
"yj",0,0.00000
"yk",0,0.00000
"yl",43,0.01878
"ym",21,0.00917
"yn",3,0.00131
"yo",1063,0.46437
"yp",4,0.00175
"yq",0,0.00000
"yr",43,0.01878
"ys",409,0.17867
"yt",97,0.04237
"yu",0,0.00000
"yv",7,0.00306
"yw",44,0.01922
"yx",0,0.00000
"yy",0,0.00000
"yz",3,0.00131
"za",8,0.00349
"zb",0,0.00000
"zc",0,0.00000
"zd",0,0.00000
"ze",96,0.04194
"zf",0,0.00000
"zg",0,0.00000
"zh",0,0.00000
"zi",7,0.00306
"zj",0,0.00000
"zk",0,0.00000
"zl",4,0.00175
"zm",0,0.00000
"zn",0,0.00000
"zo",1,0.00044
"zp",0,0.00000
"zq",0,0.00000
"zr",0,0.00000
"zs",0,0.00000
"zt",0,0.00000
"zu",0,0.00000
"zv",2,0.00087
"zw",0,0.00000
"zx",0,0.00000
"zy",10,0.00437
"zz",10,0.00437

选项:

-c//以CSV格式输出

-f//接受输入文件,如果没有指定输入是stdin

-o//带有结果的输出文件,如果使用-c,则为CSV格式

这是我用来获取参数的 main.c

//main.c

int main(int argc, char * argv[]){

char opt;
char *filename_in, *filename_out;
int i, flagC=0,flagF=0,flagO=0;

while((opt = getopt(argc,argv,"cf:o:")) != -1){
switch(opt){
case 'c':
flagC=1;
break;
case 'o':
flagO=1;
filename_out = optarg;
break; //etc with all options...

要打开文件并读取它,我有这个函数可以将读取的每个字符转换为小写,但我需要实现一些读取字符的函数来记录我记录了多少和出现的次数。

int openFile(char *filename_in)
{
char ch;
FILE *fp;
int pairCount[26][26] = {{0}};
int newch;
int oldch = '\0';

fp = fopen(filename_in,"r");

if( fp == NULL )
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}

while( ( ch = fgetc(fp) ) != EOF ) //prints the whole file to console in lower case
printf("%c",tolower(ch));

while( ( ch = fgetc(fp) ) != EOF )
{
// get the lowercase version of the character
newch = tolower(ch);

// if both old and new chars are valid, update the pair count
if ( oldch >= 'a' && oldch <= 'z' && newch >= 'a' && newch <= 'z' )
pairCount[oldch - 'a'][newch - 'a']++;

// keep a copy of the char for the next pair
oldch = newch;
}

int i,j;

for (i = 0; i < 26; i++) { //print 2d array aa-zz
for (j = 0; j < 26; j++) {
printf("%c%c,%d\n",i+'a',j+'a',pairCount[i][j]);
}
}

fclose(fp);
return 0;
}

这是我在网上找到的一种打印出可能有用的字符出现的方法,但我不知道如何用字母对实现它...

void find_frequency(char s[], int count[]) {
int c = 0;

while (s[c] != '\0') {
if (s[c] >= 'a' && s[c] <= 'z' )
count[s[c]-'a']++;
c++;
}
}

我想使我的问题在范围上更简洁。我将如何检测字母对并存储它们?我可以使用 702 if 语句来做到这一点,但这显然不是一个好方法解决它的方法..我一直在绞尽脑汁想弄清楚如何潜在地使用通配符来检测它。

TLDR; 我将如何检测字母对并存储它们?

我认为最好的方法是逐个读取一个字符并将信息保存在一个数组中。我已经研究了 3 天的好方法,但我感到很沮丧。也许我应该使用结构。

最佳答案

使用二维数组,其中行由一对中的第一个字符索引,列由一对中的第一个字符索引。所以数组将被声明为

int pairCount[26][26] = {{0}};

并且您需要跟踪前一个字符。所以

int oldch, newch;

oldch = '\0'; // init old char to invalid value
while( ( ch = fgetc(fp) ) != EOF )
{
// get the uppercase version of the character
newch = toupper(ch);

// if both old and new chars are valid, update the pair count
if ( oldch >= 'A' && oldch <= 'Z' && newch >= 'A' && newch <= 'Z' )
pairCount[oldch - 'A'][newch - 'A']++;

// keep a copy of the char for the next pair
oldch = newch;
}

关于c - 检测文件 A-Z 和 AA-ZZ 中的字母,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28667269/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com