作者热门文章
- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我想计算一组结果的哈希 (sha256),我知道该怎么做:
SELECT digest( string_agg(id_,':'), 'sha256') from mytable order by id_;
它有效,但它首先连接所有记录,最后计算哈希值。我希望它是增量的,以避免大型数据集(数百万行)可能出现的内存问题,例如:
SELECT digest_agg(id_, ':', 'sha256') from mytable order by id_;
确保哈希在途中逐行递增计算。
最佳答案
好的,我终于为此编写了一个 c 扩展。我将它张贴在这里以防它对某人有用。
/* ------- OS Includes ------------ */
#include <stdio.h>
#include <openssl/rsa.h>
#include <openssl/pem.h>
#include <openssl/err.h>
/* ----- PostgreSQL Includes -------*/
#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
PG_MODULE_MAGIC;
typedef struct sha256_state
{
SHA256_CTX* sha256;
bool has_data;
} SHA256_STATE;
/* --------- Prototypes ---------- */
void _PG_init(void);
void _PG_fini(void);
Datum dv_sha256_agg_sfunc(PG_FUNCTION_ARGS);
Datum dv_sha256_agg_final(PG_FUNCTION_ARGS);
SHA256_STATE* sha256_init(void);
void sha256_update(SHA256_STATE* state, char* string, int32 string_size);
void sha256_final(SHA256_STATE* state, char outputBuffer[65]);
/* ------ Version1 convention ---- */
PG_FUNCTION_INFO_V1(dv_sha256_agg_sfunc);
PG_FUNCTION_INFO_V1(dv_sha256_agg_final);
// -----------------------------------------------------------
// Implementations
// -----------------------------------------------------------
void _PG_init(void) {/* Do nothing */}
void _PG_fini(void) {/* Do nothing */}
Datum dv_sha256_agg_sfunc(PG_FUNCTION_ARGS) {
SHA256_STATE* state;
bool is_first = PG_ARGISNULL(0);
text* string;
int32 string_size;
char* data;
char SEP[] = {'\n'}; // Hardcoded separator (can be improved)
if (is_first) {
// First iteration: Create the state
state = sha256_init();
}
else {
// Next iterations: Restore the state
state = (SHA256_STATE*) PG_GETARG_POINTER(0);
}
if (PG_ARGISNULL(1)) {
PG_RETURN_POINTER(state);
}
// Get current chunk
string = PG_GETARG_TEXT_PP(1);
string_size = VARSIZE_ANY_EXHDR(string);
data = VARDATA_ANY(string);
// Add separator
if (state->has_data) {
sha256_update(state, SEP, 1);
}
// Update state
sha256_update(state, data, string_size);
state->has_data = true;
// Updated state
PG_RETURN_POINTER(state);
}
Datum dv_sha256_agg_final(PG_FUNCTION_ARGS) {
SHA256_STATE* state;
char out[65];
text* hash = (text*) palloc(65 + VARHDRSZ);
if (PG_ARGISNULL(0)) {
PG_RETURN_NULL();
}
else {
state = (SHA256_STATE*) PG_GETARG_POINTER(0);
sha256_final(state, out);
SET_VARSIZE(hash, 65 + VARHDRSZ);
memcpy(VARDATA(hash), out, 65);
PG_RETURN_TEXT_P(hash);
}
}
SHA256_STATE* sha256_init() {
SHA256_STATE* state = (SHA256_STATE*) palloc(sizeof(SHA256_STATE));
state->sha256 = (SHA256_CTX*) palloc(sizeof(SHA256_CTX));
SHA256_Init(state->sha256);
state->has_data = false;
return state;
}
void sha256_update(SHA256_STATE* state, char* string, int32 string_size) {
SHA256_Update(state->sha256, string, string_size);
}
void sha256_final(SHA256_STATE* state, char outputBuffer[65]) {
int i;
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_Final(hash, state->sha256);
for(i = 0; i < SHA256_DIGEST_LENGTH; i++) {
sprintf(outputBuffer + (i * 2), "%02x", hash[i]);
}
outputBuffer[64] = 0;
}
聚合定义:
CREATE FUNCTION dv_sha256_agg_sfunc(state internal, input text)
RETURNS internal AS 'MODULE_PATHNAME',
'dv_sha256_agg_sfunc'
LANGUAGE C VOLATILE;
CREATE FUNCTION dv_sha256_agg_final(state internal)
RETURNS text AS 'MODULE_PATHNAME',
'dv_sha256_agg_final'
LANGUAGE C VOLATILE;
CREATE AGGREGATE dv_sha256_agg(input text) (
SFUNC = dv_sha256_agg_sfunc,
STYPE = internal,
FINALFUNC = dv_sha256_agg_final
);
测试:
select
dv_sha256_agg(id_::text),
encode(digest(string_agg(id_::text,E'\n'),'sha256'),'hex')
from
generate_series(1,100) id_(id_);
结果
dv_sha256_agg | encode
------------------------------------------------------------------+------------------------------------------------------------------
4187fe63fa78d8b4333e6ffc9122e0273ddf90251ced32e1e5b398639c193c87 | 4187fe63fa78d8b4333e6ffc9122e0273ddf90251ced32e1e5b398639c193c87
(1 row)
注意事项:
关于postgresql - 如何在不首先连接所有内容的情况下逐步散列 postgresql 中的聚合结果,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59109684/
我是一名优秀的程序员,十分优秀!