原子【基础版本】(已完成插入、插入整数、查找、判断、插入浮点数)
注意:这里说的原子并不是原子操作!!!!
此处的原子源于LISP。
原子是一个指针,指向唯一的一个字符序列。
通过比较两个指针的值,即可判断两者指向的字符串是否相等,原因在于,任一字符序列只会出现一次!!!!
已完成基础的插入,后续还剩下 查找,比较等。
如果代码有任何BUG、或你有任何疑问、或建议,请提出,不胜感激。
冲突一定是会发生的,为了解决冲突,因此结构上使用了链表。
22:43 06/14
修改了散列函数,冲突明显减少。
00:39 06/15
修改了数列,虽然并未减少冲突,但是让散列值分布较之前更加均匀。
经测试读取一个9800个无重复单词的文本文档,发生冲突的几率是22%,链表的节点数均在3左右,最多节点数达到5。
16:24 06/15
查找、判断函数已完成。
19:05 06/15
已完成将长整数插入原子表。
20:53 06/16
已完成将浮点数插入原子表。
程序代码:
/*测试*/ #include <stdio.h> #include <string.h> #include "Atom.h" int main( void ) { char src[ 100 ]; char *des; while( NULL != fgets( src, 100, stdin ) ) { src[ strlen( src ) - 1 ] = '\0'; des = NewAtom( src ); printf( "%s\n", des ); } return 0; }
程序代码:
/*接口*/ #ifndef _ATOM_H #define _ATOM_H char * NewAtom( char *src ); /*将一个字符串添加到原子表中,如果该字符串已经存在,则返回位于原子表中的该字符串,否则添加到原子表并且返回位于原子表中的该字符串。 任何试图修改原子的行为是无法检测的错误。*/ char * IntAtom( long value ); /*将一个长整数转换为字符串,添加到原子表中,如果该字符串已经存在,则返回位于原子表中的该字符串,否则将该字符串添加到原子表并且返回位于原子表中的该字符串。 任何试图修改原子的行为是无法检测的错误。*/ char * DoubleAtom( double value, int length ); /*将一个浮点数转换为字符串,添加到原子表中,如果已经存在则返回原子表中的该字符串,否则将该字符串添加到原子表并且返回该字符串。 DoubleAtom()函数的第一个参数为需要添加到原子表中的浮点数,第二个参数为小数点后数字的长度。 length不可以小于0,如果小于0则返回NULL。 */ char * FindAtom( char *src ); /*在原子表中查找一个字符串,如果在表中,返回原子表中的该字符串,否则返回NULL 任何试图修改原子的行为是无法见车的错误*/ int DetermineAtom( char *src, char *dst ); /*判断两个原子指向的字符串是否相等,向该函数传递非源自原子表中的指针是无法检测的错误 相等返回1,否则返回0*/ void chongtu1( void );/*测试用函数,可删除*/ #endif
程序代码:
/*实现*/ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "Atom.h" #define EvenKey 101 #define OddKey 211 #define H Hash typedef struct H *H; #define K Key struct K { unsigned long int Even; unsigned long int Odd; int Len; }; struct H{ H Link; int Len; char *Src; }; static H HashTable[ EvenKey ][ OddKey ]; static struct K HashFun( char *src ); char * NewAtom( char *src ) { struct K key; H this; H new; if( NULL == src ) return NULL; key = HashFun( src ); for( this = HashTable[ key.Even ][ key.Odd ]; NULL != this; this = this->Link ) if( key.Len == this->Len ) if( 0 == strcmp( src, this->Src ) ) return this->Src; new = ( H )malloc( sizeof( struct H ) ); if( NULL == new ) exit( EXIT_FAILURE ); new->Len = key.Len; new->Src = strdup( src ); new->Link = HashTable[ key.Even ][ key.Odd ]; HashTable[ key.Even ][ key.Odd ] = new; return new->Src; } int DetermineAtom( char *src, char *dst ) { return src == dst; } char * FindAtom( char *src ) { struct K key; H this; key = HashFun( src ); for( this = HashTable[ key.Even ][ key.Odd]; NULL != this; this = this->Link ) if( key.Len == this->Len ) if( 0 == strcmp( src, this->Src) ) return this->Src; return NULL; } #include <limits.h> void ItoS( unsigned long value, char *buffer ); char * IntAtom( long value ) { char buffer[ 100 ]; unsigned long n; if( LONG_MIN == value ) { n = LONG_MAX + 1UL; buffer[ 0 ] = '-'; ItoS( n, buffer + 1 ); } else if( 0 > value ) { n = -value; buffer[ 0 ] = '-'; ItoS( n, buffer + 1 ); } else ItoS( value, buffer );; return NewAtom( buffer ); } char * DoubleAtom( double value , int length ) { char buffer[ 100 ]; char *src; unsigned long power; double integer; double devimal; src = buffer; power = pow( 10, length ); if( 0 > value ) { value *= -1; *src++ = '-'; } devimal = modf( value, &integer ); devimal *= power; ItoS( ( unsigned long )value, src ); if( 0 != ( unsigned long )devimal ) { int length = strlen(src); src[ length++ ] = '.'; src[ length ] = '\0'; ItoS( ( unsigned long )devimal, &src[ length ]); } return NewAtom( buffer ); } void ItoS( unsigned long value, char *buffer ) { static int ix = 0; if( 0 != value / 10 ) ItoS( value / 10, buffer ); else ix = 0; buffer[ ix++ ] = value % 10 + '0'; buffer[ ix ] = '\0'; } static unsigned long int scatter[] = {763886,98427677,92132,8985211,13039125,24243037,3301847,59153526,33682303,59871667,2427280,63468416,32036220,36579419,54565969,88329145,65162120,80548711,97819618,24188516,90161124,29694574,912717 33,83294586,8342758,6132648,38282737,12987094,97193732,4179587,12266336,3036295,2585741,83131394,21439985,8521426,24979665,81878169,4426519,56103885,8278484,82839570,76081111,59512197,2518751,46884 362,1881190,69327849,4350926,3881300,20162120,25741213,65855507,86483029,51586458,73342808,56754322,40931767,40079146,74331877,49054083,32127559,355490,98962454,3614643,36314919,93276160,10015213,2 808739,387580,7826681,62762801,71611367,27236721,5992993,7143959,43431560,6222210,5467604,35719894,15008883,52829025,73163041,72121897,22794936,3976693,39427257,11816449,93372508,84363894,98438213, 69143257,52249272,83662259,71171005,93724481,2223663,49325772,53039410,93291499,2668699,28877224,38669365,6013633,74264121,89221968,23854237,28506493,44054328,13154706,67653109,19177199,3206486,283 07599,31531019,4206922,67706420,85556941,1271355,7529831,18846373,14516070,37538210,53272913,1327634,39271927,4280435,73248024,38623417,60415090,85059333,2041599,1294746,80323900,48358219,425589,45 59456,84163312,16375,79208671,75848350,57761050,5372448,8754836,67779544,22839116,78482152,6115560,26503349,70711533,20895590,60789818,97641356,977253,721025,57114446,16306660,43275651,6949483,7031 6811,26901210,16285927,26767451,3307438,41737694,20803560,11028044,12054262,32029542,35907903,63191356,52829286,68406538,66281290,5478517,185494,4558551,37349290,61047847,63481161,88952733,18194614 ,77244724,8039728,2749281,21824485,3984467,91862302,9173219,3821671,41638702,43176491,20199960,44476653,62493364,85413794,84736745,18631309,9418487,47801626,60753091,4425439,42276238,4247976,705395 7,1113555,60525829,52625011,73234853,64327448,45458665,19279463,2562731,12143942,58894828,39004963,262952,18981190,15215655,325683,2973033,65251844,58104562,96943559,4733669,7473324,29673054,558693 62,94432250,39907562,56881884,66066043,5847501,68945840,81133107,5119999,39524518,16173428,58767743,4218541,2500641,4744839,23396012,90303049,98584351,3896895,51434895,28441306,13578262,53589942,78 687629,18106700,16908972,8034409,64662849,4523895 }; static int chongtu[ EvenKey ][ OddKey ];/*测试用数组,可删除*/ void chongtu1( void )/*测试用函数,可删除*/ { int i,j; int count; for( i = 0, count = 0;EvenKey > i;++i ) for( j = 0; OddKey > j; ++j ) { if( 1 < chongtu[i][j] ) printf( "冲突发生在 %d %d, 发生了 %d 次\n", i, j, chongtu[i][j] ); if( 0 == chongtu[i][j] ) count++; } printf( "%d个空间未被散列到\n",count ); } static struct K HashFun( char *src ) { struct K key; int ix; key.Len = strlen( src ); key.Even = 0; key.Odd = 0; for( ix = 0; key.Len > ix; ++ix ) { key.Even = ( key.Even << 1 ) + scatter[ ( unsigned )*( src + ix ) ]; ++ix; if( key.Len > ix ) key.Odd = ( key.Odd << 1 ) + scatter[ ( unsigned )*( src + ix ) ]; } key.Even %= EvenKey; key.Odd %= OddKey; ++chongtu[ key.Even ][ key.Odd ];/*该语句为测试用,可删除*/ return key; }
[此贴子已经被作者于2017-6-16 23:46编辑过]