gpt4 book ai didi

c# - 如何根据每列的最大长度对齐 txt 文件中不同长度的列?

转载 作者:行者123 更新时间:2023-11-30 16:41:17 27 4
gpt4 key购买 nike

有一个由 '' 分隔并由 分隔的 18 列的 txt 文件, 其中每一行代表 sqlite 查询的插入语句 :

    (1999,1999,1999,1999,1999,0,0,'flaggr.png',261,     'Βάκχειος',             'Spl-up','B ',  'Pagrati/Athens,Attica,Greece',     'N/A',   'Hellenic Mythology',      '','', ''),
(2000,2000,2000,2000,2000,0,2010,'flagru.png',3340, 'Анклав Снов', 'Act', 'G/D ', 'Bryansk,Russia', '2008-2009(as Vampire''s Crypt),2010-present', 'N/A', '','', ''),
(2001,2001,2001,2001,2001,0,2002,'flagru.png',271, 'Аркона', 'Act','P/FO ', 'Moscow,Russia', '2002(as Гиперборея),2002-present', 'Slavic Pism and FOtales, Legends, Mythology', '', '', ''),
(2002,2002,2002,2002,2002,0,1988,'flagru.png',470, 'Аспид', 'Spl-up','PROG ', 'Volgodonsk,Rostovregion,Russia', '1988-1997,2010-?', 'Politics, Horror, Death', '', '', ''),
(2003,2003,2003,2003,2003,0,2000,'flagua.png',359, 'Ірій', 'Unknown','FO D /G ', 'Lviv,Ukraine', '2000-?', 'Slavic mythology, Ukrainian FOlore', '', '', ''),
(2004,2004,2004,2004,2004,0,2011,'flagru.png',3036579, 'Лесьяр', 'Act','P FO ', 'Moscow,Russia', '2011-present', 'Pism, FOlore, Social matters, Feelings', '', '', ''),
(2005,2005,2005,2005,2005,0,2003,'flagru.png',218, 'М8Л8ТХ', 'Act','B with RAC', 'Tver,Ukraine(posterior),Russia', '2003-present', 'National Pride, National Socialism, Hatred, War, Intolerance, Pism', '', '', ''),
(2006,2006,2006,2006,2006,0,0,'flagru.png',354037, 'Рельос', 'Act','PR/POST-/ (early), G/POST-, Ambient (later)', 'Baltiisk,Kaliningradregion,Russia', 'N/A', 'N/A', '', '',''),
(2007,2007,2007,2007,2007,0,2006,'flagru.png',32937, 'Сивый Яр', 'Act','P/POST-B ', 'Vyritsa,Leningradregion,Russia', '2006-present', 'Pism, Pride, Heritage, Poetry, Slavonic Mythology', '', '', ''),
(2008,2008,2008,2008,2008,0,2001,'flagru.png',44, 'Темнозорь', 'Act','FO/B ', 'Moscow,Russia', '2001-present', 'Nature, Slavonic Pism, War, Right-wing nationalism', '4394', '', ''),
(2009,2009,2009,2009,2009,0,1993,'flagru.png',80, 'Эпидемия', 'Act','Pow ', 'Moscow,Russia', '1993-present', 'Fantasy, Tolkien, Elves', '', '', ''),
(2010,2010,2010,2010,2010,0,0,'flagjp.png',354039, 'こくまろみるく', 'Act','G/Pow ', 'N/A,Japan', 'N/A', 'Bizarre, Macabre', '', '', ''),
(2011,2011,2011,2011,2011,0,2012,'flagus.png',38723, 'מזמור', 'Act','B/Drone/D ', 'Portland,Oregon,United States', '2012-present', 'N/A', '', '', ''),
(2012,2012,2012,2012,2012,0,2004,'flaglb.png',67, 'دمار', 'Spl-up','B/Death ', 'Hamra,Beirut,Lebanon', '2004-2006', 'War, Pride, Blasphemy, Supremacy', '', '', ''),
(2013,2013,2013,2013,2013,0,2006,'flagcn.png',760, '原罪', 'Act','B (early), G/B (later)', 'Chengdu,SichuanProvince,China', '2006-present', 'Misanthropy, Hatred, Depression, War, Revelation', '', '', ''),
(2014,2014,2014,2014,2014,0,1995,'flagtw.png',443, '閃靈', 'Act','Melodic B/Death/FO ', 'Taipei,Taiwan', '1995-present', 'Taiwanese Myths and Legends, Anti-Fascism, History', '4443', '', ''),
(2015,2015,2015,2015,2015,0,2001,'flagjp.png',31450, '電気式華憐音楽集団', 'Act','Pow/G', 'N/A,Japan', '2001-present', 'Anime, Fantasy, Liberty', '', '', '');

对齐所有列的最佳方式是什么,例如前两行变为:

(1999,1999,1999,1999,1999,0,0,   'flaggr.png',261,  'Βάκχειος',     'Spl-up',   'B ',   'Pagrati/Athens,Attica,Greece', 'N/A',                                          'Hellenic Mythology',   '','', ''),
(2000,2000,2000,2000,2000,0,2010,'flagru.png',3340, 'Анклав Снов', 'Act', 'G/D ', 'Bryansk,Russia', '2008-2009(as Vampire''s Crypt),2010-present', 'N/A', '','', ''),

我在想:

  1. 使用逗号作为分隔符拆分文件中的所有行字符串
  2. 计算每列的最大长度并将其存储在内存中
  3. 再次循环文件,但这次使用计算出的最大长度并写入输出

我得到的代码类似于以下内容,但是我意识到一个问题,有些列在单引号内有逗号,例如 'bla1,bla2,bla3' (columns 12到 18 可以有内部逗号...)所以如果我使用逗号拆分字符串,我将不会得到 18 列。

在那个问题之后我不知道如何继续......用逗号分隔的方式是什么,但考虑到一些字符串的单引号?

    private static void AdjustColumnsInFile(string filePath, string outputFile)
{
//array to store max size of each column
int[] sizes = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
foreach (var line in File.ReadLines(filePath))
{
var words = line.Split(',');
if (words.Length == 18)
{
var i = 0;
//get max value of each column
foreach (var word in words)
{
sizes[i] = sizes[i] < word.Length ? word.Length : sizes[i];
i++;
}
}
}

...

using (var sw = new StreamWriter(outputFile))
{
foreach (var l in newLines)
{
sw.WriteLine($"{l}");
}
}
}

最佳答案

据我所知,您唯一的问题是如何在逗号上拆分字符串,因为某些逗号可能出现在 '' 引号内。您可以使用正则表达式来做到这一点:

,(?=(?:[^\']*\'[^\']*\')*[^\']*$)

它基本上匹配后跟零个或偶数 个引号 (') 的逗号。如果逗号出现在 '' 引号内 - 在有效字符串中,它后面将跟有奇数个引号,因此不会匹配。

剩下的应该很简单,首先计算尺寸:

//array to store max size of each column
int[] sizes = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
foreach (var line in File.ReadLines(filePath)) {
var tmp = line.Trim(); // remove leading and trailing whitespace
tmp = tmp.Remove(tmp.Length - 2, 2); // remove closing ) and , or ;
tmp = tmp.Remove(0, 1); // remove opening (
// split by comma
var words = Regex.Split(tmp, @",(?=(?:[^\']*\'[^\']*\')*[^\']*$)");
if (words.Length == 18) {
for (int i = 0; i < words.Length; i++) {
var word = words[i].Trim(); // remove whitespace
sizes[i] = sizes[i] < word.Length ? word.Length : sizes[i];
}
}
else throw new Exception("Invalid number of columns");
}

然后重复并将空格附加到与预期大小不匹配的列:

using (var writer = new StreamWriter(outputFile)) {
foreach (var line in File.ReadLines(filePath)) {
var tmp = line.Trim(); // remove trailing whitespace
bool hadTrailingComma = tmp.EndsWith(",");
tmp = tmp.Remove(tmp.Length - 2, 2); // remove closing ) and , or ;
tmp = tmp.Remove(0, 1); // remove opening (
var words = Regex.Split(tmp, @",(?=(?:[^\']*\'[^\']*\')*[^\']*$)");
var newLine = String.Join(",", words.Select((w, i) =>
{
w = w.Trim();
var targetSize = sizes[i];
if (w.Length < targetSize)
return w + new string(' ', targetSize - w.Length); // append spaces until max length
return w;
}));

writer.WriteLine($"({newLine}){(hadTrailingComma ? "," : ";")}");
}
}

请注意,由于诸如 こくまろみるく 之类的 unicode 字符,您的输出文件可能看起来没有正确对齐,而实际上它是(也就是说 - 每列的字符大小相同)。

关于c# - 如何根据每列的最大长度对齐 txt 文件中不同长度的列?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49623603/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com