gpt4 book ai didi

c - Hashtable的有效实现,具有可识别缓存的本地性(对本地性敏感的哈希表)

转载 作者:行者123 更新时间:2023-12-02 04:07:43 26 4
gpt4 key购买 nike

我正在尝试使用C数据结构(哈希表)。我没有使用任何预先构建的哈希表库(如STL),因为我想对它的工作方式有更好的了解。
因此,在这里我创建了一个哈希表,其中包含元素列表,其中每个元素都包含一个键和一个字符串元素数据(字符数组),以及字符串元素数据的长度。

我的实现有效,但是在与一位同事讨论之后,我被告知我的实现效率不高,尤其是在以下情况下:我的实现不支持缓存,导致哈希表查找效率不高。我不太明白他的解释。

所以我想知道,了解缓存的位置实现实际上意味着什么?

如何在查找时使用该缓存感知的位置属性来使哈希表实现变得更加高效?是否有更好的方法来为此构建结构,以及组织元素(进行查找)的更好方法?

这是我到目前为止所做的:

哈希表

struct hashelt {
struct hashelt* he_next; /* One way linked list pointer */
char* he_key; /* Pointer to element's key */
char* he_data; /* Pointer to element's data */
int he_data_len; /* Length of element's data */
};

typedef struct hashelt hashelt;

struct hashtbl {
hashelt** ht_links; /* Array of pointers to elemsnts */
int ht_prime; /* Length of hash table - it is a prime number */

};

typedef struct hashtbl hashtbl;

int prime_check( int num );

hashtbl* hashtbl_init( int tbl_size );

int hashtbl_key_convert( hashtbl* htable, char* hkey );

hashelt* hashtbl_lookup( hashtbl* htable, char* hkey, char* hdata );

int hashtbl_add( hashtbl* htable, char* hkey, char* hdata );

void hashtbl_print( hashtbl* htable );

HASHTBL.c
/*
* prime_check
* Check if num is a prime or not.
* num Number to use as an upper bound.
*
* The functions returns 0 for a prime and 1 otherwise.
*/

int prime_check( int num ) {

/* Local declarations */
int i; /* Loop counter */

/* Search through the first sqrt(num) integers */
for( i = 2; i*i < num; i++ )
if ( (num % i) == 0 )
return 1;

/* It is a prime */
return 0;
}

/*
* hashtbl_init
* Create and initialize a hash table.
* The input variable are
* tbl_size Suggested size of the table, which should be a prime or 0.
* The functions returns a pointer to a hashtbl or NULL for failure.
* The hash table is the size of the largest prime found for the suggested
* table size of the default size if 0 is given as a suggestion.
*/

hashtbl* hashtbl_init( int tbl_size ) {

/* Local declarations */
hashtbl* hp; /* Hash table pointer */
int i; /* Loop counter */

/* Initial values */
hp = NULL;

/* Determine the actual table size */
if ( tbl_size <= 0 )
tbl_size = DEFAULT_HASHTBL_LENGTH;
else if ( prime_check( tbl_size ) )
return NULL;
if ( tbl_size <= 0 )
return NULL;

/* Allocate the hash table */
if( ( hp = (hashtbl*)malloc( sizeof(hashtbl) ) ) == NULL )
return hp;

/* Allocate the linked list vector */
if( ( hp->ht_links = (hashelt**)malloc( tbl_size * sizeof(hashelt*) ) ) == NULL ) {
free( hp );
return NULL;
}

/* Fill in the hash table with no entries */
for( i = 0 ; i < tbl_size; i++ )
hp->ht_links[i] = NULL;

/* Fill in the hash table size */
hp->ht_prime = tbl_size;

/* All done */
return hp;
}


/*
* hashtbl_key_convert
* Make an index into the key chain in the hash table from a key:
* kindex = sum of the characters in hkey modulo ht_prime
* The input variable are
* htable A pointer to a hash table.
* hkey A pointer to a a key.
* The functions returns an index into the key chain.
*/

int hashtbl_key_convert( hashtbl* htable, char* hkey ) {

/* Local declarations */
int i; /* Loop counter */
int klen; /* Length of the key */
int kindex; /* Key index to return */

/* Compute the key index */
klen = strlen( hkey );
for( kindex = 0, i = 0 ; i < klen; i++ )
kindex += hkey[i];
kindex = kindex % htable->ht_prime;

/* All done */
return kindex;
}


/*
* hashtbl_lookup
* Lookup a (key,data) in a hash table.
* The input variable are
* htable A pointer to a hash table.
* hkey A key.
* hdata A pointer to the data.
* The functions returns 0 for found and 1 for not found.
*/

hashelt* hashtbl_lookup( hashtbl* htable, char* hkey, char* hdata ) {

/* Local declarations */
hashelt* hep; /* Hash element pointer */
int i; /* Loop counter */
int key; /* Hash table key index */

/* Get key index from hkey */
key = hashtbl_key_convert( htable, hkey );

/* Search through the hash table, including collicions */
for( hep = htable->ht_links[key] ; hep != NULL ; hep = hep->he_next )
if ( strcmp( hep->he_key, hkey ) == 0 )
return hep;

/* Not found */
return NULL;
}

/*
* hashtbl_add
* Add a (key,data) to a hash table.
* The input variable are
* htable A pointer to a hash table.
* hkey A key.
* hdata A pointer to the data.
* The functions returns 0 for success and 1-4 for failure.
*
*/

int hashtbl_add( hashtbl* htable, char* hkey, char* hdata ) {

/* Local declarations */
hashelt* hep; /* Element in linked list */
hashelt* newelt; /* New element in hash table */
int i; /* Loop counter */
int dlen; /* Length of data */
int key; /* Hash table key index */

/* Get key index from hkey */
key = hashtbl_key_convert( htable, hkey );

/* Check if the (key,data) is already in the hash table */
if ( ( hep = hashtbl_lookup( htable, hkey, hdata ) ) != NULL )
if ( strcmp( hep->he_data, hdata ) == 0 )
return 1;

/* Add the data */
dlen = strlen( hdata );
hep = htable->ht_links[key];
if ( ( newelt = (hashelt*)malloc( sizeof( hashelt ) ) ) == NULL ) {
fprintf( stderr, "hashtbl_add: Cannot allocate new hash element\n" );
return 3;
}
newelt->he_next = hep;
newelt->he_data_len = dlen;
newelt->he_key = (char*)malloc( (strlen(hkey)+1) * sizeof(char) );
newelt->he_data = (char*)malloc( (dlen+1) * sizeof(char) );
if ( newelt->he_key == NULL || newelt->he_data == NULL ) {
fprintf( stderr, "hashtbl_add: Cannot allocate new hash element contents\n" );
return 4;
}
strcpy( newelt->he_key, hkey );
strcpy( newelt->he_data, hdata );
htable->ht_links[key] = newelt;

/* All done */
return 0;
}

/*
* hashtbl_print
* Print an entire hash table.
* The input variable are
* htable A pointer to a hash table.
* The functions returns 0 for success and 1-4 for failure.
*/

void hashtbl_print( hashtbl* htable ) {

/* Local declarations */
hashelt* hep; /* Element in linked list */
int i; /* Loop counter */
int l; /* Link count */

/* Initial printing */
printf( "\nHash table contents\n" );

/* Print a has tbale out, one key bucket at a time */
for( i = 0 ; i < htable->ht_prime ; i++ ) {
printf( "\nkey index %i\n", i );
hep = htable->ht_links[i];
if ( hep == NULL )
printf( " No entries in the linked list\n" );
else {
l = 0;
while( hep != NULL ) {
printf( " Element %d\n", l );
printf( " key: %s\n", hep->he_key );
printf( " data: %s\n", hep->he_data );
printf( " data_len: %d\n", hep->he_data_len );
hep = hep->he_next;
}
}
}

/* All done */
}

主文件
void make_string( char* buffer, int blen ) {

/* Local declarations */
char viable_chars[] = { "abcdefghijklmnopqrstuvwxyz-0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ" };
char* bp; /* Pointer into buffer */
int i; /* Loop counter */
int c; /* Index into viable_chars */
static int vc_len = 0; /* Length of viable_chars */

/* Do once only */
if ( vc_len == 0 )
vc_len = strlen( viable_chars );

/* Do the fill operation on a subset of buffer */
blen = rand() % blen;
if ( blen == 0 ) blen++;
for( bp = buffer, i = 0; i < blen ; i++ ) {
c = rand() % vc_len;
*bp++ = viable_chars[c];
}
*bp++ = '\0';

}

int main( int argc, char** argv ) {

/* Local declarations */
hashtbl* htable; /* Hash table pointer */
char* kstrings; /* Pointer to key vector */
char* dstrings; /* Pointer to data vector */

int tblsize = 0; /* Hash table size */
int nkeys = 0; /* Number of keys */
int dlen = 0; /* Maximum data length for (keys,data) */
int i; /* Loop counter */

double t1; /* Time keeper */
double t2; /* Time keeper */
double mrun(); /* Get timing information */

#ifdef GOOD_SEED
/* Get a good random number seed */
struct timeval t1; /* holder for time of day (seconds, microseconds) */
gettimeofday( &t1, NULL );
srand( t1.tv_usec * t1.tv_sec );
#endif

/* Get hash table size */
printf( "Hash table size (pick a prime or bound for one): " );
scanf( "%d", &tblsize );
fflush( stdin );

/* Initialize hash table */
if( ( htable = hashtbl_init( tblsize ) ) == NULL ) {
fprintf( stderr, "Oops... hashtbl_init returned NULL\n" );
return 1;
}
printf( "Actual size of hash table is %d\n", htable->ht_prime );

/* Now fill it up */
while ( nkeys <= 0 ) {
printf( "How many keys do you want? " );
scanf( "%d", &nkeys );
fflush( stdin );
}
while ( dlen <= 0 ) {
printf( "What is the maximum character string length? " );
scanf( "%d", &dlen );
fflush( stdin );
}

/* Allocate vectors for (key,data) */
kstrings = (char*)malloc( (dlen+1) * sizeof( char ) );
dstrings = (char*)malloc( (dlen+1) * sizeof( char ) );
if ( kstrings == NULL || dstrings == NULL ) {
fprintf( stderr, "main: Could not allocate string vectors for (key,data)\n" );
return 2;
}

/* Now fill it up */
for( i = 0 ; i < nkeys ; i++ ) {
make_string( kstrings, dlen );
make_string( dstrings, dlen );
hashtbl_add( htable, kstrings, dstrings );
}

/* Print it out */
hashtbl_print( htable );

/* All done */
return 0;
}

最佳答案

如果您更改,本地会更好

struct hashtbl {
hashelt** ht_links; /* Array of pointers to elemsnts
...

成为:
struct hashtbl {
...
hashelt* elements; /* Array of elements (as last entry of struct)

想一想哈希表的外观,在您的版本中,该表由指向元素的指针组成,在我的修改中,该表实际上包含那些一个接一个地打包的结构!当然有一个缺点:空的哈希箱不仅包含空指针(如您的版本),而且还包含整个 sizeof(hashelt)。另外,您还必须确保分配的hashtbl结构不具有 sizeof(hashtbl)的大小,因为您的hashtbl 包含hashelt的:您必须分配 ht_prime*sizeof(hashelt)+sizeof(int)(第一个被要求用于ht_prime hashelt的内容,第二个被要求用于存储ht_prime本身)。

关于c - Hashtable的有效实现,具有可识别缓存的本地性(对本地性敏感的哈希表),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/6717545/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com