回复 20楼 rjsp
1.zip
(51.58 KB)
FILE* file = fopen( "result1.txt", "wb" ); if( !file ) return 1; for( size_t i=0; i<180000000; ++i ) { fputc( "agct"[rand()%4], file ); } fclose( file );然后用12楼的代码,在15秒内就处理完这个1.8G的文件,没有出现任何错误
#include <stdio.h> #include <string.h> #include <assert.h> void dna8_val2str( unsigned long val, char str[8] ) { assert( val < (1ul<<(8*2)) ); for( size_t i=0; i<8; ++i ) { str[7-i] = "agct"[val%4]; val >>= 2; } } int main() { unsigned long words = 0; unsigned long numbers[65536] = { 0 }; // 处理 FILE* fin = fopen( "3.txt", "r" ); if( !fin ) return 1; int bav = 0; unsigned long val = 0; for( int c; c=fgetc(fin), c!=EOF; ) { switch( c ) { case '@': // 遇到@则结束 break; case '\r': //case '\n': ++words; continue; case 'a': case 'g': case 'c': case 't': val = ((val<<2)&0xFFFF) | (c%36%5); if(bav<7) // 不足8个有效字符时先等等 ++bav; else ++numbers[val]; break; default: // 出现了agct之外的字符 long pos = ftell(fin)-1; if( c>0x20 && c<0xFF ) // 可显示的字符,就显示其本身 printf( "--- 0x%08lX处出现非法字符\'%c\'\n", pos, (char)c ); else // 不可显示的字符,就显示其对应的ASCII值 printf( "--- 0x%08lX处出现非法字符0x%02hhX\n", pos, (char)c ); } } fclose( fin ); // 输出 FILE* fout = fopen( "result2.txt", "w" ); if( !fout ) return 3; fprintf( fout, "The Number of total words are %ld\n", words ); fprintf( fout,"The Expect Number words are %f\n", words/65536.0 ); for( int i=0; i<65536; ++i ) { // if( numbers[i] !=0 ) { char str[8]; dna8_val2str( i, str ); fprintf( fout, "%.8s\t%ld\t%f\n", str, numbers[i], numbers[i]/65536.0 ); } } fclose( fout ); printf( "处理完毕\n" ); return 0; }另外注意一下,我用unsigned long计数,也就是数量不可以超过4亿。如果数量超过,你得改类型,还得改相应的ftell