gpt4 book ai didi

c# - SevenZipSharp 无法解压某些 tar 文件

转载 作者:行者123 更新时间:2023-11-30 14:45:31 27 4
gpt4 key购买 nike

我使用 SevenZipSharp 打包到 7z 文件并从各种文件中解包。它多年来一直运作良好。

今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一文件。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为使用 7-zip 软件解压缩也可以。

最佳答案

经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的存档都是正确的,但对于 .tar 存档是错误的,因为 .tar 文件头以存档的文件名开头:TAR @ Wikipedia .签名“ustar”(如果存在)位于地址 257 (0x0101)。

SevenZipSharp 知道这一点并在该地址检查“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 文件的名称是“x42202.tar”。而 .dmg 文件的 header ( Apple Disk Image ) 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是成功检测到了文件类型,只是检测结果有误。
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用我在互联网上找到的下载的 .dmg 文件进行了确认。)

编辑 07.12.2021:签名实际上是“koly”,但所谓的 header 长 512 字节,位于文件的 END。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header (“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。

因此我们修改了 FileSignatureChecker.cs 中的代码,以避免对 .tar 文件进行错误的文件类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到.显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。

2018-11-16 更新
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。

2021-12-16 更新
该错误仍然存​​在于 github 存储库中 https://github.com/squid-box/SevenZipSharp ,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。

原始代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}

#region Get file signature

var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);

#endregion

InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;

foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
return Formats.InSignatureFormats[expectedSignature];
}
}
}

// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}

#region SpecialDetect
try
{
SpecialDetect (stream, 257, InArchiveFormat.Tar);
}
catch (ArgumentException) { }
if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
return InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bool istar = true;
for (int i = 0; i < 1024; i++)
{
istar = istar && buf[i] == 0;
}
if (istar)
{
return InArchiveFormat.Tar;
}
}
#endregion
#endregion

#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion

foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion

throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}

修改代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}

#region Get file signature

var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);

#endregion Get file signature

InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;

InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);

foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
break;
}
}
}

// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}

#region SpecialDetect

if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
{
enSpecialFormat = InArchiveFormat.Tar;
}
else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
enSpecialFormat = InArchiveFormat.Hfs;
}

#region Last resort for tar - can mistake

bool bPossiblyTAR = false;
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bPossiblyTAR = true;
for (int i = 0; i < 1024; i++)
{
bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
}
}

// TAR header starts with the filename of the archive.
// The filename can be anything, including the Identifiers of the various archive formats.
// This means that a TAR can be misinterpreted as any type of archive.
if (enSpecialFormat == InArchiveFormat.Tar
|| bPossiblyTAR)
{
var fs = stream as FileStream;
if (fs != null)
{
string sStreamFilename = fs.Name;
if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
enDetectedFormat = InArchiveFormat.Tar;
}
}

#endregion Last resort for tar - can mistake

if (enDetectedFormat != (InArchiveFormat)(-1))
return enDetectedFormat;
if (enSpecialFormat != (InArchiveFormat)(-1))
return enSpecialFormat;

#endregion SpecialDetect

#region Check if it is an SFX archive or a file with an embedded archive.

if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);

#endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}

#endregion Check if it is an SFX archive or a file with an embedded archive.

throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}

关于c# - SevenZipSharp 无法解压某些 tar 文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53323255/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com