gpt4 book ai didi

python - 持久化 hashlib 状态

转载 作者:太空宇宙 更新时间:2023-11-03 11:11:24 30 4
gpt4 key购买 nike

我想创建一个 hashlib 实例,update() 它,然后以某种方式保留它的状态。稍后,我想使用此状态数据重新创建对象,并继续 update() 它。最后,我想获取总累积运行数据的 hexdigest()。状态持久性必须在多次运行中存活。

例子:

import hashlib
m = hashlib.sha1()
m.update('one')
m.update('two')
# somehow, persist the state of m here

#later, possibly in another process
# recreate m from the persisted state
m.update('three')
m.update('four')
print m.hexdigest()
# at this point, m.hexdigest() should be equal to hashlib.sha1().update('onetwothreefour').hextdigest()

编辑:

我在 2010 年没有找到使用 Python 执行此操作的好方法,最终我用 C 语言编写了一个小的帮助应用程序来完成此操作。但是,下面有一些我当时无法获得或不知道的很好的答案。

最佳答案

您可以使用 ctypes 以这种方式完成,不需要 C 中的辅助应用程序:-

rehash.py

#! /usr/bin/env python

''' A resumable implementation of SHA-256 using ctypes with the OpenSSL crypto library

Written by PM 2Ring 2014.11.13
'''

from ctypes import *

SHA_LBLOCK = 16
SHA256_DIGEST_LENGTH = 32

class SHA256_CTX(Structure):
_fields_ = [
("h", c_long * 8),
("Nl", c_long),
("Nh", c_long),
("data", c_long * SHA_LBLOCK),
("num", c_uint),
("md_len", c_uint)
]

HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH

#crypto = cdll.LoadLibrary("libcrypto.so")
crypto = cdll.LoadLibrary("libeay32.dll" if os.name == "nt" else "libssl.so")

class sha256(object):
digest_size = SHA256_DIGEST_LENGTH

def __init__(self, datastr=None):
self.ctx = SHA256_CTX()
crypto.SHA256_Init(byref(self.ctx))
if datastr:
self.update(datastr)

def update(self, datastr):
crypto.SHA256_Update(byref(self.ctx), datastr, c_int(len(datastr)))

#Clone the current context
def _copy_ctx(self):
ctx = SHA256_CTX()
pointer(ctx)[0] = self.ctx
return ctx

def copy(self):
other = sha256()
other.ctx = self._copy_ctx()
return other

def digest(self):
#Preserve context in case we get called before hashing is
# really finished, since SHA256_Final() clears the SHA256_CTX
ctx = self._copy_ctx()
hashbuff = HashBuffType()
crypto.SHA256_Final(hashbuff, byref(self.ctx))
self.ctx = ctx
return str(bytearray(hashbuff))

def hexdigest(self):
return self.digest().encode('hex')

#Tests
def main():
import cPickle
import hashlib

data = ("Nobody expects ", "the spammish ", "imposition!")

print "rehash\n"

shaA = sha256(''.join(data))
print shaA.hexdigest()
print repr(shaA.digest())
print "digest size =", shaA.digest_size
print

shaB = sha256()
shaB.update(data[0])
print shaB.hexdigest()

#Test pickling
sha_pickle = cPickle.dumps(shaB, -1)
print "Pickle length:", len(sha_pickle)
shaC = cPickle.loads(sha_pickle)

shaC.update(data[1])
print shaC.hexdigest()

#Test copying. Note that copy can be pickled
shaD = shaC.copy()

shaC.update(data[2])
print shaC.hexdigest()


#Verify against hashlib.sha256()
print "\nhashlib\n"

shaD = hashlib.sha256(''.join(data))
print shaD.hexdigest()
print repr(shaD.digest())
print "digest size =", shaD.digest_size
print

shaE = hashlib.sha256(data[0])
print shaE.hexdigest()

shaE.update(data[1])
print shaE.hexdigest()

#Test copying. Note that hashlib copy can NOT be pickled
shaF = shaE.copy()
shaF.update(data[2])
print shaF.hexdigest()


if __name__ == '__main__':
main()

resumable_SHA-256.py

#! /usr/bin/env python

''' Resumable SHA-256 hash for large files using the OpenSSL crypto library

The hashing process may be interrupted by Control-C (SIGINT) or SIGTERM.
When a signal is received, hashing continues until the end of the
current chunk, then the current file position, total file size, and
the sha object is saved to a file. The name of this file is formed by
appending '.hash' to the name of the file being hashed.

Just re-run the program to resume hashing. The '.hash' file will be deleted
once hashing is completed.

Written by PM 2Ring 2014.11.14
'''

import cPickle as pickle
import os
import signal
import sys

import rehash

quit = False

blocksize = 1<<16 # 64kB
blocksperchunk = 1<<8

chunksize = blocksize * blocksperchunk

def handler(signum, frame):
global quit
print "\nGot signal %d, cleaning up." % signum
quit = True


def do_hash(fname, filesize):
hashname = fname + '.hash'
if os.path.exists(hashname):
with open(hashname, 'rb') as f:
pos, fsize, sha = pickle.load(f)
if fsize != filesize:
print "Error: file size of '%s' doesn't match size recorded in '%s'" % (fname, hashname)
print "%d != %d. Aborting" % (fsize, filesize)
exit(1)
else:
pos, fsize, sha = 0, filesize, rehash.sha256()

finished = False
with open(fname, 'rb') as f:
f.seek(pos)
while not (quit or finished):
for _ in xrange(blocksperchunk):
block = f.read(blocksize)
if block == '':
finished = True
break
sha.update(block)

pos += chunksize
sys.stderr.write(" %6.2f%% of %d\r" % (100.0 * pos / fsize, fsize))
if finished or quit:
break

if quit:
with open(hashname, 'wb') as f:
pickle.dump((pos, fsize, sha), f, -1)
elif os.path.exists(hashname):
os.remove(hashname)

return (not quit), pos, sha.hexdigest()


def main():
if len(sys.argv) != 2:
print "Resumable SHA-256 hash of a file."
print "Usage:\npython %s filename\n" % sys.argv[0]
exit(1)

fname = sys.argv[1]
filesize = os.path.getsize(fname)

signal.signal(signal.SIGINT, handler)
signal.signal(signal.SIGTERM, handler)

finished, pos, hexdigest = do_hash(fname, filesize)
if finished:
print "%s %s" % (hexdigest, fname)
else:
print "sha-256 hash of '%s' incomplete" % fname
print "%s" % hexdigest
print "%d / %d bytes processed." % (pos, filesize)


if __name__ == '__main__':
main()

演示

import rehash
import pickle
sha=rehash.sha256("Hello ")
s=pickle.dumps(sha.ctx)
sha=rehash.sha256()
sha.ctx=pickle.loads(s)
sha.update("World")
print sha.hexdigest()

输出

a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e

注:感谢PM2Ring的精彩代码。

关于python - 持久化 hashlib 状态,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2130892/

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com