一个使用rle和huffman算法的压缩程序
某日看到一个jpeg图片的原理,突发兴趣写了个压缩程序,vc2005编译人比较懒,头文件啥的都堆一个文件里面了
huffman.exe [-r|-h] filename//用rle还是huffman压缩
huffman.exe srcfile dstfile//解压缩
程序代码:
#define _CRT_SECURE_NO_WARNINGS #include <windows.h> #include <Winbase.h> #include<stdio.h> #include<stdlib.h> #include<string.h> #include<conio.h> #ifndef UINT16 #define UINT16 unsigned short #endif #ifndef UINT8 #define UINT8 unsigned char #endif #ifndef UINT32 #define UINT32 unsigned long #endif #define CANONICAL_HUFFMAN #define ASC_NUM 256 #define ASC_HUFFMAN_NODE (2*256-1) #define BITS_PER_BYTE 8 #define HUFFMAN_CODE 0x01 #define RLE_CODE 0x10 #define BITMAP_SET(bitMap, gBitLoc, bitVal) \ (((bitVal) == 0) ? \ (((char*)bitMap)[((gBitLoc)-1)/8]=(((char*)bitMap)[((gBitLoc)-1)/8]&(~((unsigned char)(1<<(7-((gBitLoc)-1)%8)))))) : \ (((char*)bitMap)[((gBitLoc)-1)/8]=(((char*)bitMap)[((gBitLoc)-1)/8]|((unsigned char)(1<<(7-((gBitLoc)-1)%8))))) \ ) #define BITMAP_BITVAL(bitMap, gBitLoc) \ ((((char*)bitMap)[((gBitLoc)-1)/8]&(1<<(7-((gBitLoc)-1)%8))) == 0 ? 0 : 1) typedef UINT32 HUFFMAN_NODE_HEADER; typedef struct { UINT8 c; UINT32 weight; int p, l, r;/* parent, left child, right child */ int flag;/* 0x01:selected 0x02:NULL node 0x04:NULL node padding*/ UINT8 code[4]; UINT32 bits; }HUFFMAN_NODE, *PHUFFMAN_NODE; HUFFMAN_NODE huffmanNode[ASC_HUFFMAN_NODE] = {0}; HUFFMAN_NODE_HEADER nodeHeader; UINT32 fileSize = 0; UINT32 worked = 0; char* avg; #define FGET_C(fd) (++worked)?fgetc((fd)):0 /* by haoxu 2010-07-05 offset:dst offset, unit is bits len:src len, unit is bits */ #define MEMCPY_BITS(dst, src, offset, len) {\ int i;\ for(i=0;i<(len);i++)\ BITMAP_SET((dst), ((offset)+i+1), BITMAP_BITVAL((src), (i+1)));\ } /* by haoxu 2010-07-07 dst : UINT8 string src : int bits : bit length */ #define CONVERT_INT2BITS(dst, src, bits) {\ int x;\ for (x = 0; x < (bits); x++)\ (void)BITMAP_SET((dst), (bits-x), (src)&(1<<x));\ } #define BITS_PRINT(src, offset, bits) {\ int x;\ for (x = 0; x < (bits); x++)\ printf("%d", BITMAP_BITVAL(src, (x+offset+1)));\ } void disStat(int c) { int i; if(c == 1) { for(i = 0; i< ASC_HUFFMAN_NODE; i++) { printf("%04x : %d %s%s%s\n", i, huffmanNode[i].bits, (huffmanNode[i].flag & 0x01)?" s":"", (huffmanNode[i].flag & 0x02)?" n":"", (huffmanNode[i].flag & 0x04)?" p":""); } } if(c == 2) for(i = 0; i< ASC_HUFFMAN_NODE; i++) { printf("%04x (%02x): %02x %02x\n", i, huffmanNode[i].c, huffmanNode[i].l, huffmanNode[i].r); } } void printError(int no) { if(no == 1) printf("huffman tree is too deep\n"); else if(no == 2) printf("file is incomplete\n"); else if(no == 3) printf("buffer overflow\n"); else if(no == 4) printf("open file failed\n"); #if defined(_DEBUG) disStat(2); #endif exit(0); } void disHuffmanTree(PHUFFMAN_NODE node, int current, char* code, int path) { #define NODE node[current] int i; if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02)) { printf("deep %d weight %d %02x : ",path, NODE.weight, NODE.c); for(i=0;i<path;i++) printf("%d", code[i]); printf("\n"); return; } if((i = NODE.l) != -1) { code[path] = 0; disHuffmanTree(node, i, code, path+1); } if((i = NODE.r) != -1) { code[path] = 1; disHuffmanTree(node, i, code, path+1); } } #ifdef CANONICAL_HUFFMAN #define LIST_DEEP 32 int node_isSingleChar(PHUFFMAN_NODE node) { int i; for(i=ASC_NUM;i<ASC_HUFFMAN_NODE;++i) { if(node[i].flag&0x04) return 0; } return 1; } void node_calcDeep(PHUFFMAN_NODE node, int current, int path, UINT8* buffer, int* list) { #undef NODE #define NODE node[current] int i = 0; if(path > LIST_DEEP) { printError(1); } if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02)) { NODE.bits = path; buffer[path-1]++; if(!buffer[path-1]) buffer[-1] = (1<<7)|(path-1); NODE.p = list[path-1]; list[path-1] = current; return; } if((i = NODE.l) != -1) { node_calcDeep(node, i, path+1, buffer, list); } if((i = NODE.r) != -1) { node_calcDeep(node, i, path+1, buffer, list); } } int node_buildHuffmanTree(PHUFFMAN_NODE node, HUFFMAN_NODE_HEADER header, UINT8* buffer, int bufSize) { int list[LIST_DEEP]; int i, j, k; int last = -1; int lastBits = 0; int tmp; int lastNonNullList = 0; UINT8 buf[256]; #if defined(_DEBUG) int c; #endif if(bufSize < (512)) return 0; memset(buffer, 0, 512); memset(list, 0xff, LIST_DEEP*sizeof(int));/* code length list header */ if(node_isSingleChar(node)) { *buffer = 1; *(buffer+1) = 1; *(buffer+2) = (UINT8)header; node[header].bits = 1; memset(node[header].code, 0, 4); return 3; } node_calcDeep(node, header, 0, buffer+1, list); j = 0; if(*buffer) { k = *buffer&(~(1<<7)); while(k != -1) { tmp = (last + 1)<<(node[k].bits - lastBits); CONVERT_INT2BITS(node[k].code, tmp, node[k].bits); #if defined(_DEBUG) printf("deep %d weight %d %02x : ",node[k].bits, node[k].weight, node[k].c); for(c=1;c<=node[k].bits;c++) printf("%d", BITMAP_BITVAL(node[k].code, c)); printf("\n"); #endif last = tmp; lastBits = node[k].bits; buf[j++] = node[k].c; k = node[k].p; } memcpy(buffer+1, buf, j); return (1+j); } else { for(i=0;i<LIST_DEEP;i++) { #if defined(_DEBUG) //printf("code length: %d\n", i+1); #endif k = list[i]; while(k != -1) { lastNonNullList = i+1; tmp = (last + 1)<<(node[k].bits - lastBits); CONVERT_INT2BITS(node[k].code, tmp, node[k].bits); #if defined(_DEBUG) printf("deep %d weight %d %02x : ",node[k].bits, node[k].weight, node[k].c); for(c=1;c<=node[k].bits;c++) printf("%d", BITMAP_BITVAL(node[k].code, c)); printf("\n"); #endif last = tmp; lastBits = node[k].bits; buf[j++] = node[k].c; k = node[k].p; } } *buffer = lastNonNullList; memcpy(buffer+1+lastNonNullList, buf, j); return (1+lastNonNullList+j); } } #else void node_buildHuffmanTree(PHUFFMAN_NODE node, int current, char* code, int path) { #define NODE node[current] int i; if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02)) { for(i=0;i<path;i++) BITMAP_SET(NODE.code, i+1, code[i]); NODE.bits = path; return; } if((i = NODE.l) != -1) { code[path] = 0; node_buildHuffmanTree(node, i, code, path+1); } if((i = NODE.r) != -1) { code[path] = 1; node_buildHuffmanTree(node, i, code, path+1); } } #endif int node_selectMinsWeight(PHUFFMAN_NODE node, UINT32* n1, UINT32* n2) { int i, ret; UINT32 w1, w2; ret = -1; w1 = 0xFFFFFFFF; w2 = 0xFFFFFFFF; for(i = 0; i < ASC_HUFFMAN_NODE; i++) { /* the node is selected or the node is NULL but not selected or weight is zero*/ if(node[i].flag&0x01 || (!(node[i].flag&0x04) && node[i].flag&0x02) || (!node[i].weight && !(node[i].flag&0x02))) continue; if(node[i].weight < w1 && node[i].weight < w2 )/* weight < w1, weight < w2 */ { if(w1 < w2) { w2 = w1; *n2 = *n1; } w1 = node[i].weight; *n1 = i; nodeHeader = i; } else if(node[i].weight >= w1 && node[i].weight < w2 )/* w1 <= weight < w2 */ { w2 = node[i].weight; *n2 = i; } ret = 0; } /* w1 is node header */ if(w2 == 0xFFFFFFFF) return 1; return ret; } int node_getMinWeight(PHUFFMAN_NODE node, UINT32* n) { int i, ret; UINT32 w; ret = -1; w = 0xFFFFFFFF; for(i = 0; i < ASC_NUM; i++) { if(node[i].weight < w) { w = node[i].weight; *n = i; } ret = 0; } return ret; } int node_getEmptyNode(PHUFFMAN_NODE node) { int i; for(i = ASC_NUM; i < ASC_HUFFMAN_NODE; i++) { if((!(node[i].flag&0x04))&&(node[i].flag&0x02)) return i; } return -1; } int node_makeParentNode(PHUFFMAN_NODE node) { int n; int ret; UINT32 n1, n2; ret = node_selectMinsWeight(node, &n1, &n2); if(ret) return ret; else { if((n = node_getEmptyNode(node)) != -1) { node[n].flag |= 0x04; node[n].l = n1; node[n].r = n2; node[n].weight = node[n1].weight + node[n2].weight; node[n1].p = n; node[n1].flag |= 0x01; node[n2].p = n; node[n2].flag |= 0x01; } else return -1; } return 0; } void node_init(PHUFFMAN_NODE node) { int i; memset(node, 0, sizeof(HUFFMAN_NODE)*ASC_HUFFMAN_NODE); /* init char node */ for(i=0;i<ASC_NUM;i++) { node[i].c = i; node[i].weight = 0; node[i].l = -1; node[i].r = -1; node[i].p = -1; } /* init NULL node */ for(i=ASC_NUM;i<ASC_HUFFMAN_NODE;i++) { node[i].flag = 0x02; node[i].l = -1; node[i].r = -1; node[i].p = -1; } } int node_getWeightFromFile(char* file, PHUFFMAN_NODE node) { FILE * sfp; int c; printf("parse file...\n"); if(!(sfp = fopen(file, "rb"))) { printError(4); return -1; } while(!feof(sfp)) { c = fgetc(sfp); if(c == -1) break; node[c].weight++; fileSize++; } fclose(sfp); return 0; } /* by haoxu 2010-07-07 if success return write number if error return -1 */ int make_huffman_code_from_file(char* file, PHUFFMAN_NODE node, UINT8* buffer, int bufSize) { int ret; node_init(node); if(node_getWeightFromFile(file, node)) return -1; while(!(ret = node_makeParentNode(node))){} #if defined(_DEBUG) printf("header %04x\n", nodeHeader); #endif if(ret == -1) return -1; else if(ret == 1) { #ifdef CANONICAL_HUFFMAN return node_buildHuffmanTree(node, nodeHeader, buffer, bufSize); #else node_buildHuffmanTree(node, nodeHeader, buffer, 0); disHuffmanTree(node, nodeHeader, buffer, 0); #endif } return 0; } /* by haoxu 2010-07-08 canonical huffman struct | encrpyt flag(1 byte) | file size(4 bytes) | |list length=x(1 byte) | list (x bytes) | code (sum list bytes) | */ int huffman_encrpyt_file(char* src) { FILE * sfp,* dfp; int c; UINT8 buf[512]; int offset; int i; char dst[256]; if((i = make_huffman_code_from_file(src, huffmanNode, buf, sizeof(buf))) == -1) return -1; #if defined(_DEBUG) printf("huffman code number %d\n", i); #endif if(!(sfp = fopen(src, "rb"))) { printError(4); } sprintf(dst, "%s.hfm", src); if(!(dfp=fopen(dst, "wb"))) { fclose(sfp); printError(4); } fputc(HUFFMAN_CODE, dfp); fwrite(&fileSize, sizeof(UINT32), 1, dfp); fwrite(buf, i, 1, dfp); offset = 0; while(!feof(sfp)) { c = FGET_C(sfp); if(c == -1) break; #if defined(_DEBUG) BITS_PRINT(huffmanNode[c].code,0,huffmanNode[c].bits); printf("\n"); #endif MEMCPY_BITS(buf, huffmanNode[c].code, offset, huffmanNode[c].bits); offset += huffmanNode[c].bits; if(offset > BITS_PER_BYTE*100) { fwrite(buf, 100, 1, dfp); memcpy(buf, buf+100, 4); offset -= BITS_PER_BYTE*100; } } if(offset) { fwrite(buf, offset/BITS_PER_BYTE+1, 1, dfp); } return 0; } void huffman_unencrpyt_file(FILE* sfp, FILE *dfp) { #undef NODE #define NODE huffmanNode int i, k, listLength; int last = -1; int lastBits = 0; int tmp; int offset = 0; UINT8 buf[512]; UINT8 list[32]; int ret, c; node_init(huffmanNode); /* get old file size */ ret = fread(&fileSize, sizeof(UINT32), 1, sfp); if(ret != 1) printError(2); if(fileSize == 0) return; /* get list length */ listLength = fgetc(sfp); if(listLength == -1) printError(2); if(listLength > LIST_DEEP) { lastBits = listLength&(~(1<<7)); for(i = 0;i<ASC_NUM;i++) { c = fgetc(sfp); if(c == -1) printError(2); tmp = last + 1; CONVERT_INT2BITS(NODE[c].code, tmp, lastBits); NODE[c].bits = lastBits; last = tmp; } } else { ret = fread(list, sizeof(UINT8), listLength, sfp); if(ret != listLength) printError(2); for(i=0;i<listLength;i++) { ret = list[i]; while(ret--) { c = fgetc(sfp); if(c == -1) printError(2); tmp = (last + 1)<<(i - lastBits); NODE[c].bits = i+1; CONVERT_INT2BITS(NODE[c].code, tmp, NODE[c].bits); last = tmp; lastBits = i; #if 0 printf("deep %d %02x : ",NODE[c].bits, NODE[c].c); for(k=1;k<=NODE[c].bits;k++) printf("%d", BITMAP_BITVAL(NODE[c].code, k)); printf("\n"); #endif } } } c = node_getEmptyNode(NODE); nodeHeader = c; NODE[nodeHeader].flag |= 0x04; for(i=0;i<ASC_NUM;i++) { if(NODE[i].bits) { tmp = nodeHeader; for(k=1;k<NODE[i].bits;k++) { if(BITMAP_BITVAL(NODE[i].code, k)?(NODE[tmp].r != -1):(NODE[tmp].l != -1)) { tmp = BITMAP_BITVAL(NODE[i].code, k)?(NODE[tmp].r):(NODE[tmp].l); continue; } c = node_getEmptyNode(NODE); if(c < 0) printError(3); NODE[c].flag |= 0x04; /* 0:l, 1:r */ if(BITMAP_BITVAL(NODE[i].code, k))/* r */ NODE[tmp].r = c; else NODE[tmp].l = c; tmp = c; } if(BITMAP_BITVAL(NODE[i].code, k))/* r */ NODE[tmp].r = i; else NODE[tmp].l = i; } } tmp = nodeHeader; listLength = 0; worked = 0; while(!feof(sfp)) { ret = fread(buf, sizeof(UINT8), 512, sfp); k = ret*BITS_PER_BYTE; for(i=0;i<k;i++) { if(BITMAP_BITVAL(buf, i+1)) tmp = NODE[tmp].r; else tmp = NODE[tmp].l; #if defined(_DEBUG) printf("%d", BITMAP_BITVAL(buf, i+1)); #endif if(NODE[tmp].bits) { #if defined(_DEBUG) printf("\n"); #endif worked++; listLength += NODE[tmp].bits; fputc(NODE[tmp].c, dfp); tmp = nodeHeader; if(worked == fileSize) break; } } } #if defined(_DEBUG) printf("listLength %d worked %d filesize %d\n", listLength,worked ,fileSize); #endif if(worked < fileSize) printError(2); #if 0 listLength = strlen(avg); tmp = nodeHeader; for(i = 0;i<listLength;i++) { if(avg[i] == '1') tmp = NODE[tmp].r; else tmp = NODE[tmp].l; if(NODE[tmp].c) { printf("%02x ", NODE[tmp].c); tmp = nodeHeader; } } printf("\n"); #endif } int rle_encrpyt_file(char* src) { FILE * sfp,* dfp; int keyPadding; unsigned char keyNum; int count = 0; int c; int code; int ret; UINT8 key; char dst[256]; /* get key from file */ node_init(huffmanNode); if(node_getWeightFromFile(src, huffmanNode)) return -1; if(node_getMinWeight(huffmanNode, &ret)) return -1; key = huffmanNode[ret].c; keyNum = 0; keyPadding = 0; code = -1; if(!(sfp = fopen(src, "rb"))) { printError(4); } sprintf(dst, "%s.rle", src); if(!(dfp=fopen(dst,"wb"))) { fclose(sfp); printError(4); } fputc(RLE_CODE, dfp); fputc(key, dfp); worked = 0; while(!feof(sfp)) { c = FGET_C(sfp); if(c == -1) { if(keyPadding)/* if EOF, check action */ { fputc(key, dfp); fputc(code, dfp); fputc(keyNum, dfp); } else fputc(code, dfp); break; } if(c != key)/* c is not key */ { if(keyPadding)/* if action is calc repeat char */ { if(code == c)/* if the char is repeated,incr it */ { keyNum++; if(keyNum == 0xff) { fputc(key, dfp); fputc(code, dfp); fputc(keyNum, dfp); keyNum = 0; } } else/* if the char is not repeated, record it and puts repeat data */ { fputc(key, dfp); fputc(code, dfp); fputc(keyNum, dfp); code = c; keyPadding = 0; keyNum = 0; } } else/* if first get the char */ { if(code == c)/* if the char equal last one,padding start */ { keyPadding = 1; keyNum = 2; } else/* if the char unequal last one,record it and puts last one */ { if(code != -1) fputc(code, dfp); code = c; } } } else/* if c is key */ { if(keyPadding)/* if action is padding,ignore key */ { fputc(key, dfp); fputc(code, dfp); fputc(keyNum, dfp); code = -1; keyPadding = 0; keyNum = 0; } else/* if no action, puts last code */ { if(code != -1) fputc(code, dfp); code = -1; } /* puts double key */ fputc(key, dfp); fputc(key, dfp); } } fclose(sfp); fclose(dfp); return 0; } void rle_unencrpyt_file(FILE* sfp, FILE *dfp) { UINT8 key; int c; int count; int i; if(!feof(sfp)) key = (UINT8)fgetc(sfp); while(!feof(sfp)) { c = FGET_C(sfp); if(c == -1) break; if(c == key) { c = FGET_C(sfp); if(key == c) fputc(key, dfp); else { count = FGET_C(sfp); for(i = 0; i < count; i++) fputc(c, dfp); } } else fputc(c, dfp); } } UINT32 getfilesize(char* file) { FILE* fp; UINT32 length; fp = fopen(file,"rb"); if(fp==NULL) printError(4); fseek(fp, 0L, SEEK_END); length = ftell(fp); fclose(fp); return length; } int unencrpyt_file(char* src, char* dst) { FILE * sfp,* dfp; int c; if(!(sfp = fopen(src, "rb"))) { printError(4); } if(!(dfp=fopen(dst,"wb"))) { fclose(sfp); printError(4); } while(!feof(sfp)) { c = FGET_C(sfp); if(c == -1) break; if(c == HUFFMAN_CODE ) { huffman_unencrpyt_file(sfp, dfp); break; } else if(c == RLE_CODE) { fileSize = getfilesize(src); rle_unencrpyt_file(sfp, dfp); break; } } fclose(sfp); fclose(dfp); return 0; } DWORD WINAPI printfThread ( LPVOID pvParam ) { while(1) { if(worked && fileSize) { printf("build file...%.2f%%\r",100*((float)worked)/fileSize); } #ifdef WIN32 Sleep(500); #else delay(1); #endif } return 0; } int main(int argc, char **argv) { char tmp[] = "tmp"; if(argc < 3) return 0; #ifdef WIN32 CreateThread ( NULL, 0, printfThread, NULL, 0, ( LPDWORD ) NULL ); #endif if(!strcmp("-r", argv[1])) rle_encrpyt_file(argv[2]); else if(!strcmp("-h", argv[1])) huffman_encrpyt_file(argv[2]); else { unencrpyt_file(argv[1], argv[2]); } printf("build file...100%% \n"); exit(0); }