gpt4 book ai didi

c++ - 如何将 minizip 包装器转换为 unicode?

转载 作者:太空狗 更新时间:2023-10-29 20:05:15 30 4
gpt4 key购买 nike

我正在尝试使用来自 [http://www.zlib.net/] 的 minzip 包装器来压缩文件夹。只要文件名是英文,它就可以正常工作。有没有人试过修改 minizip 以支持 unicode?

修改后的代码贴在下面。问题出在这个函数上,第二个参数将 const char* 作为输入。当我进行转换时,它会丢失数据并且文件名不一样。

例如:Chinese-统一码.txt 变成 Chinese-t+ƒS+Çtáü.txt inside zip。

err = zipOpenNewFileInZip3_64(  zf,outstr.c_str(),&zi,
NULL,0,NULL,0,NULL /* comment*/,
(opt_compress_level != 0) ? Z_DEFLATED : 0,
opt_compress_level,0,
/* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */
-MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY,
password,crcFile, zip64);








minizip.c
Version 1.1, February 14h, 2010
sample part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html )

Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html )

Modifications of Unzip for Zip64
Copyright (C) 2007-2008 Even Rouault

Modifications for Zip64 support on both zip and unzip
Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com )
*/


#if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__))
#ifndef __USE_FILE_OFFSET64
#define __USE_FILE_OFFSET64
#endif
#ifndef __USE_LARGEFILE64
#define __USE_LARGEFILE64
#endif
#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif
#ifndef _FILE_OFFSET_BIT
#define _FILE_OFFSET_BIT 64
#endif
#endif

#ifdef __APPLE__
// In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions
#define FOPEN_FUNC(filename, mode) fopen(filename, mode)
#define FTELLO_FUNC(stream) ftello(stream)
#define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin)
#else
#define FOPEN_FUNC(filename, mode) fopen64(filename, mode)
#define FTELLO_FUNC(stream) ftello64(stream)
#define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin)
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <fcntl.h>
#ifdef _WIN32
# include <direct.h>
# include <io.h>
#define GetCurrentDir _getcwd
#else
# include <unistd.h>
# include <utime.h>
# include <sys/types.h>
# include <sys/stat.h>
#endif

#include "zip.h"
#include "Shlwapi.h"

#ifdef _WIN32
#define USEWIN32IOAPI
#include "iowin32.h"
#endif

#include <windows.h>
#include <string>
#include <iostream>
#include <list>
#include <fstream>
#include <sstream>
#include <set>
using namespace std;

#define WRITEBUFFERSIZE (16384)
#define MAXFILENAME (256)

#ifdef _WIN32
uLong filetime(
wchar_t *f, /* name of file to get info on */
tm_zip *tmzip, /* return value: access, modific. and creation times */
uLong *dt) /* dostime */
{
int ret = 0;
{
FILETIME ftLocal;
HANDLE hFind;
_WIN32_FIND_DATAW ff32;

hFind = FindFirstFileW(f,&ff32);
if (hFind != INVALID_HANDLE_VALUE)
{
FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal);
FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0);
FindClose(hFind);
ret = 1;
}
}
return ret;
}
#else
#ifdef unix || __APPLE__
uLong filetime(f, tmzip, dt)
char *f; /* name of file to get info on */
tm_zip *tmzip; /* return value: access, modific. and creation times */
uLong *dt; /* dostime */
{
int ret=0;
struct stat s; /* results of stat() */
struct tm* filedate;
time_t tm_t=0;

if (strcmp(f,"-")!=0)
{
char name[MAXFILENAME+1];
int len = strlen(f);
if (len > MAXFILENAME)
len = MAXFILENAME;

strncpy(name, f,MAXFILENAME-1);
/* strncpy doesnt append the trailing NULL, of the string is too long. */
name[ MAXFILENAME ] = '\0';

if (name[len - 1] == '/')
name[len - 1] = '\0';
/* not all systems allow stat'ing a file with / appended */
if (stat(name,&s)==0)
{
tm_t = s.st_mtime;
ret = 1;
}
}
filedate = localtime(&tm_t);

tmzip->tm_sec = filedate->tm_sec;
tmzip->tm_min = filedate->tm_min;
tmzip->tm_hour = filedate->tm_hour;
tmzip->tm_mday = filedate->tm_mday;
tmzip->tm_mon = filedate->tm_mon ;
tmzip->tm_year = filedate->tm_year;

return ret;
}
#else
uLong filetime(f, tmzip, dt)
char *f; /* name of file to get info on */
tm_zip *tmzip; /* return value: access, modific. and creation times */
uLong *dt; /* dostime */
{
return 0;
}
#endif
#endif

void addFileToList(list<wstring>& fileList, const wstring& directory, const set<wstring>& excludeFilterSet, const wstring& fileName )
{
wstring fileExtension = fileName.substr(fileName.find_last_of(L".") + 1);
if (!fileExtension.empty())
{
if (excludeFilterSet.find(fileExtension) != excludeFilterSet.end()) return;
}

fileList.push_back(directory + fileName);
}

void GetFileListing(list<wstring>& fileList, wstring directory,const set<wstring>& excludeFilterSet,bool recursively=true)
{
directory = directory + L"\\";
wstring filter = directory + L"*";

_WIN32_FIND_DATAW FindFileData;
HANDLE hFind = FindFirstFileW(filter.c_str(), &FindFileData);

if (hFind == INVALID_HANDLE_VALUE)
{
DWORD dwError = GetLastError();
if (dwError != ERROR_FILE_NOT_FOUND)
{
//cout << "Invalid file handle for filter " << filter << ". Error is " << GetLastError() << endl;
}
return;
}

do
{
if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
if ((recursively) && (wcscmp(FindFileData.cFileName, L".") != 0) && (wcscmp(FindFileData.cFileName, L"..") != 0))
{
GetFileListing(fileList, directory + FindFileData.cFileName, excludeFilterSet);
}
}
else
{
addFileToList(fileList, directory, excludeFilterSet,FindFileData.cFileName);
}
} while (FindNextFileW(hFind, &FindFileData) != 0);

DWORD dwError = GetLastError();
FindClose(hFind);

if (dwError != ERROR_NO_MORE_FILES)
{
//cout << "FindNextFile error. Error is "<< dwError << endl;
}
}


int check_exist_file(wchar_t* filename)
{
FILE* ftestexist;
int ret = 1;
//ftestexist = FOPEN_FUNC(filename,"rb");
ftestexist = _wfopen(filename,L"rb");
if (ftestexist==NULL)
ret = 0;
else
fclose(ftestexist);
return ret;
}

/* calculate the CRC32 of a file,
because to encrypt a file, we need known the CRC32 of the file before */
int getFileCrc(const wchar_t * filenameinzip,void*buf,unsigned long size_buf,unsigned long* result_crc)
{
unsigned long calculate_crc=0;
int err=ZIP_OK;
//FILE * fin = FOPEN_FUNC(filenameinzip,"rb");
FILE * fin = _wfopen(filenameinzip,L"rb");

unsigned long size_read = 0;
unsigned long total_read = 0;
if (fin==NULL)
{
err = ZIP_ERRNO;
}

if (err == ZIP_OK)
do
{
err = ZIP_OK;
size_read = (int)fread(buf,1,size_buf,fin);
if (size_read < size_buf)
if (feof(fin)==0)
{
printf("error in reading %s\n",filenameinzip);
err = ZIP_ERRNO;
}

if (size_read>0)
calculate_crc = crc32(calculate_crc,(const Bytef *)buf,size_read);
total_read += size_read;

} while ((err == ZIP_OK) && (size_read>0));

if (fin)
fclose(fin);

*result_crc=calculate_crc;
printf("file %s crc %lx\n", filenameinzip, calculate_crc);
return err;
}

int isLargeFile(const wchar_t * filename)
{
int largeFile = 0;
ZPOS64_T pos = 0;
//FILE* pFile = FOPEN_FUNC(filename, "rb");
FILE* pFile = _wfopen(filename, L"rb");

if(pFile != NULL)
{
int n = FSEEKO_FUNC(pFile, 0, SEEK_END);
pos = FTELLO_FUNC(pFile);

printf("File : %s is %lld bytes\n", filename, pos);

if(pos >= 0xffffffff)
largeFile = 1;

fclose(pFile);
}

return largeFile;
}

void split( const wstring& text, wchar_t delimiter,set<wstring>& result )
{
wstring::size_type start = 0;
wstring::size_type end = text.find( delimiter, start );
wstring token;

while( end != wstring::npos )
{
token = text.substr( start, end - start );
token.erase(0,2);
result.insert( token );
start = end + 1;
end = text.find( delimiter, start );
}

// Add the last string
token = text.substr(start);
token = token.erase(0,2);
result.insert(token);
}

//Do not call me.
long getUTF8size(const wchar_t *string){
if (!string)
return 0;
long res=0;
for (;*string;string++){
if (*string<0x80)
res++;
else if (*string<0x800)
res+=2;
else
res+=3;
}
return res;
}

char *WChar_to_UTF8(const wchar_t *string){
long fSize=getUTF8size(string);
char *res=new char[fSize+1];
res[fSize]=0;
if (!string)
return res;
long b=0;
for (;*string;string++,b++){
if (*string<0x80)
res[b]=(char)*string;
else if (*string<0x800){
res[b++]=(*string>>6)|192;
res[b]=*string&63|128;
}else{
res[b++]=(*string>>12)|224;
res[b++]=((*string&4095)>>6)|128;
res[b]=*string&63|128;
}
}
return res;
}


std::string utf8_encode(const std::wstring &wstr)
{
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
std::string strTo( size_needed, 0 );
WideCharToMultiByte (CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
return strTo;
}


wstring zipper( const wstring& directoryPath, const wstring& strExcludeFilter, wstring & zipFileName )
{
int opt_overwrite=0,opt_compress_level=Z_BEST_COMPRESSION,opt_exclude_path=0,err=0,size_buf=0;
void* buf=NULL;
const char* password=NULL;
list<wstring> fileList;
DWORD dwRet;

wchar_t cCurrentPath[MAX_PATH];
dwRet = GetCurrentDirectoryW(MAX_PATH, cCurrentPath);
if( dwRet == 0 )
{
return wstring();
}

// Change the directory to the current folder
_wchdir(directoryPath.c_str());
set<wstring> excludeFilterSet;
split(strExcludeFilter,'|',excludeFilterSet);

GetFileListing(fileList, directoryPath,excludeFilterSet);
opt_overwrite = 1;

size_buf = WRITEBUFFERSIZE;
buf = (void*)malloc(size_buf);
if (buf==NULL) return wstring();

wchar_t tempDirPath[MAX_PATH];
dwRet = GetTempPathW (MAX_PATH, tempDirPath);
if( dwRet == 0 ) return wstring();

wstring directoryName,zipFilePath;
_WIN32_FIND_DATAW FindFileData;
HANDLE hFind = FindFirstFileW(directoryPath.c_str(), &FindFileData);
if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
directoryName = FindFileData.cFileName;
}

zipFilePath = wstring(tempDirPath)+L"\\"+directoryName+L".zip";
zipFile zf;
int errclose;

#ifdef USEWIN32IOAPI
zlib_filefunc64_def ffunc;
fill_win32_filefunc64W (&ffunc);
zf = zipOpen2_64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0,NULL,&ffunc);
# else
zf = zipOpen64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0);
# endif

if (zf == NULL)
{
//printf("error opening %s\n",filename_try);
err= ZIP_ERRNO;
}
else
{
//printf("creating %s\n",filename_try);
}

for(list<wstring>::iterator it = fileList.begin() ; it!=fileList.end();++it)
{
FILE * fin;
int size_read;
//const char* filenameinzip = (*it).c_str();
wstring filenameinzip = (*it).c_str();
wchar_t szOut[MAX_PATH];

PathRelativePathToW(szOut,
directoryPath.c_str(),
FILE_ATTRIBUTE_DIRECTORY,
filenameinzip.c_str(),
FILE_ATTRIBUTE_NORMAL);

wchar_t *savefilenameinzip;
zip_fileinfo zi;
unsigned long crcFile=0;
int zip64 = 0;

zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour =
zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0;
zi.dosDate = 0;
zi.internal_fa = 0;
zi.external_fa = 0;
filetime(szOut,&zi.tmz_date,&zi.dosDate);

if ((password != NULL) && (err==ZIP_OK))
err = getFileCrc(szOut,buf,size_buf,&crcFile);

zip64 = isLargeFile(szOut);

/* The path name saved, should not include a leading slash. */
/*if it did, windows/xp and dynazip couldn't read the zip file. */
savefilenameinzip = szOut;
while( savefilenameinzip[0] == '\\' || savefilenameinzip[0] == '/' )
{
savefilenameinzip++;
}

string outstr = utf8_encode(savefilenameinzip);
//char * op = (char*)outstr.c_str();


err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi,
NULL,0,NULL,0,NULL /* comment*/,
(opt_compress_level != 0) ? Z_DEFLATED : 0,
opt_compress_level,0,
/* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */
-MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY,
password,crcFile, zip64);

if (err != ZIP_OK)
{
//printf("error in opening %s in zipfile\n",szOut);
}
else
{
//fin = FOPEN_FUNC(szOut,"rb");
fin = _wfopen(szOut,L"rb");

if (fin==NULL)
{
err=ZIP_ERRNO;
//printf("error in opening %s for reading\n",szOut);
}
}

if (err == ZIP_OK)
do
{
err = ZIP_OK;
size_read = (int)fread(buf,1,size_buf,fin);
if (size_read < size_buf)
if (feof(fin)==0)
{
//printf("error in reading %s\n",szOut);
err = ZIP_ERRNO;
}

if (size_read>0)
{
err = zipWriteInFileInZip (zf,buf,size_read);
if (err<0)
{
//printf("error in writing %s in the zipfile\n",szOut);
}

}
} while ((err == ZIP_OK) && (size_read>0));

if (fin)
fclose(fin);

if (err<0)
err=ZIP_ERRNO;
else
{
err = zipCloseFileInZip(zf);
if (err!=ZIP_OK)
{
//printf("error in closing %s in the zipfile\n",szOut);
}
}
}

errclose = zipClose(zf,NULL);
if (errclose != ZIP_OK)
{
//printf("error in closing %s\n",filename_try);
}

free(buf);
// Change back the executabe context
_wchdir(cCurrentPath);
return zipFilePath;
}

最佳答案

在 ZIP 文件中存储 UTF-8 文件名的官方方法,根据 the standard , 正在设置“通用位 11”。查看 minizip 源代码,在我看来 minizip 不会随时为您设置此位,并且 zipOpenNewFileInZip3_64 没有提供传递此位的方法。然而,有一个 zipOpenNewFileInZip4_64 接受另外两个参数,versionMadeByflagBase。因此,您可以通过更改调用来根据标准存储 UTF-8 文件名

err = zipOpenNewFileInZip4_64(zf, outstr.c_str(), […], crcFile, 36, 1<<11, zip64);

这是假设 outstr 实际上确实包含文件名的有效 UTF-8 编码,源代码建议它应该如此,但我尚未验证。我建议您打印 outstr 字节的十六进制值来验证这一点。除非我在这个过程中把字符串乱码了,否则你的“统一码.txt”应该变成十六进制UTF-8的e7 b5 b1 e4 b8 80 e7 a2 bc 2e 74 78 74

有关此 versionMadeBy 字段(我在调用中将其设置为 36)的详细信息,请参阅标准的第 4.4.2 节。这取决于您使用的平台,zipfi 参数(在您的情况下为 &zi)的文件属性的格式,以及所有内容都遵循的标准版本。当您使用 unicode 文件名时,我会说您使用的是标准的 6.3.* 版,因此低字节应该是 36。由于 minizip.c 包装器根本不存储任何文件属性,因此您不必在那里指定平台。从这些行可以看出缺少属性:

    zi.internal_fa = 0;
zi.external_fa = 0;

请注意,尽管该标准确实提供了一种表示 unicode 文件名的方法,但该部分是在 2006 年才添加的,并且可能仍有许多 ZIP 应用程序不支持它。因此,即使您的存档是正确的,您的解压缩实用程序仍可能会错误地解压缩此文件,将 UTF-8 字节解释为代码页 437 或拉丁语 1 或类似内容。

关于c++ - 如何将 minizip 包装器转换为 unicode?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14625784/

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com