gpt4 book ai didi

linux - 以纯文本形式存档/打包包含内容的目录?

转载 作者:太空宇宙 更新时间:2023-11-04 12:31:32 26 4
gpt4 key购买 nike

在 Linux/bash 下,如何获得目录内容的纯文本表示? (请注意,这里的“纯文本”是指“UTF-8”)。

换句话说,我如何将一个目录(包含内容——包括二进制文件)“打包”或“归档”为一个纯文本文件——这样我就可以稍后“解压”它,并获得与它相同的目录内容?

最佳答案

我对此感兴趣了一段时间,我想我终于设法编写了一个在 Python 2.7 和 3.4 中都有效的脚本——但是,我仍然想知道是否还有其他东西可以做到相同的。这是一个要点(还有一些评论):

https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632

否则,我将在此处(下方)发布一个略微删节的版本以供引用。

用法是:归档/打包成.json文本文件:

python archdir2text-json.py -a /tmp > myarchdir.json

... 并将 .json 文本文件解压到当前(调用)目录中:

python archdir2text-json.py -u myarchdir.json

二进制文件作为 base64 处理。

这是脚本:

archdir2text-json.py

#!/usr/bin/env python

import pprint, inspect
import argparse
import os
import stat
import errno
import base64
import codecs

class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter):
def _fill_text(self, text, width, indent):
if text.startswith('R|'):
paragraphs = text[2:].splitlines()
rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs]
rebrokenstr = []
for tlinearr in rebroken:
if (len(tlinearr) == 0):
rebrokenstr.append("")
else:
for tlinepiece in tlinearr:
rebrokenstr.append(tlinepiece)
return '\n'.join(rebrokenstr)
return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent)

textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))

cwd = os.getcwd()

if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return os.path.basename(p).startswith('.') #linux-osx

def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
cleared_contents = [contents
for contents in os.listdir(path)
if not(
os.path.isdir(os.path.join(path, contents))
and
folder_is_hidden(os.path.join(path, contents))
)]
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in cleared_contents
]
except OSError as e:
if e.errno == errno.ENOTDIR:
hierarchy['type'] = 'file'
else:
hierarchy['type'] += " " + str(e)
if hierarchy['type'] == 'file':
isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode)
if isfifo:
ftype = "fifo"
else:
try:
data = open(hierarchy['path'], 'rb').read()
ftype = "bin" if is_binary_string(data) else "txt"
if (ftype == "txt"):
hierarchy['content'] = data.decode("utf-8")
else:
hierarchy['content'] = base64.b64encode(data).decode("utf-8")
except Exception as e:
ftype = str(e)
hierarchy['ftype'] = ftype
return hierarchy

def recurse_unpack(inobj, relpath=""):
if (inobj['type'] == "folder"):
rpname = relpath + inobj['name']
sys.stderr.write("folder name: " + rpname + os.linesep);
os.mkdir(rpname)
for tchild in inobj['children']:
recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep)
elif (inobj['type'] == "file"):
rfname = relpath + inobj['name']
sys.stderr.write("file name: " + rfname + os.linesep)
if inobj['ftype'] == "txt":
with codecs.open(rfname, "w", "utf-8") as text_file:
text_file.write(inobj['content'])
elif inobj['ftype'] == "bin":
with open(rfname, "wb") as bin_file:
bin_file.write(base64.b64decode(inobj['content']))

if __name__ == '__main__':
import json
import sys

parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4

see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""")

parser.add_argument('input_paths', type=str, nargs='*', default=['.'],
help='Paths to files/directories to include in the archive; or path to .json archive file')

group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)")
group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory")

args = parser.parse_args()

if (args.archive):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep)
path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths]
outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': '))
print(outjson)
elif (args.unpack):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
for vp in valid_input_paths:
with open(vp) as data_file:
data = json.load(data_file)
for datachunk in data:
recurse_unpack(datachunk)

关于linux - 以纯文本形式存档/打包包含内容的目录?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43144458/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com