压缩问题就救 - 数据结构与算法

问题点数：0 回复次数：2

压缩问题就救

/*========================================*/
/* 课题[3]哈夫曼编码实现文件的压缩功能 */
/* */
/* */
/*========================================*/

#include<stdio.h> //包含头文件
#include<stdlib.h>
#include<string.h>
#include<conio.h>
#include<iostream.h>

#define MAX_SINGLECODE 10 //单个字符最大码长
#define MAX_STRING 10000 //要编码的字符串的最大长度
#define MAX_CODESTRING 50000 //产生的二进制码的最大长度
#define MAX_WORDS 1000 //要编码的字符串中字符种数最大值
#define END_TREE 30000 //树部分存储的结束符
#define PATH 50 //路径的最大字符

/*======数据结构定义部分======*/

/*哈夫曼树结构定义，其中的next域用于链表的操作*/
typedef struct Huffmantree
{
char ch; //字符部分
int weight; //结点权值
int mark; //标记是否加入树中
struct Huffmantree *parent,*lchild,*rchild,*next;
}HTNode,*LinkTree;

/*****编码字典结构定义*****/
typedef struct //ch为字符值，code[]为该字符的哈夫曼编码
{
char ch; //字符部分
char code[MAX_SINGLECODE]; //编码部分
}CodeDictionary;

/*======子函数======*/

/*===================压缩功能实现部分=====================*/

/*功能：读取.TXT文件并将其中的字符中保存到string[]中*/
void readFile(char *string)
{
FILE *fp;
int i;
char ch; //记录读入的字符
char path[PATH]; //文本文件的读路径

cout<<"请输入要压缩的.txt文件地址：（无需扩展名）"<<endl;
gets(path);
if((fp=fopen(strcat(path,".txt"),"r"))==NULL) //只读方式打开一个文件
{
cout<<"\n路径不正确！\n"<<endl;
getch();
return;
}

ch=fgetc(fp); //循环将文件内的字符输入到数组string[]中
for(i=0;ch!=EOF;i++)
{
string[i]=ch;
ch=fgetc(fp);
}
string[i]='\0'; //数组末尾加上结束标志'\o'
fclose(fp);
}

/*功能对string[]中的字符处理，将相同的舍去，并增加权值*/
LinkTree setWeight(char *string)
{
int i=0; //文件字符串下标
LinkTree tree; //头指针
LinkTree ptr,beforeptr; //创建指针与其前驱
HTNode *node;

if((tree=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
return NULL; //内存不足
tree->next=NULL;

for(i=0;string[i]!='\0';i++)
{
ptr=tree;
beforeptr=tree;

if((node=(HTNode *)malloc(sizeof(HTNode)))==NULL)
return NULL; //建立新的结点
node->next=NULL;
node->parent=NULL;
node->lchild=NULL;
node->rchild=NULL;
node->mark=0;
node->ch=string[i];
node->weight=1;

if(tree->next==NULL) //如果是第一个非头结点
tree->next=node;
else
{
ptr=tree->next;
while(ptr&&ptr->ch!=node->ch) //查找相同字符
{
ptr=ptr->next;
beforeptr=beforeptr->next;
}
if(ptr&&ptr->ch==node->ch) //如果链表中某结点的字符与新结点的字符相同
{
ptr->weight++; //将该结点的权加一
free(node);
}
else //将新结点插入链表后
{
node->next=beforeptr->next;
beforeptr->next=node;
}
}
}
return tree; //返回头指针
}

/*功能：将链表中的含权字符按权的从小到大排列并输入到一链表*/
LinkTree sortNode(LinkTree tree)
{
LinkTree head; //头指针
LinkTree ph,beforeph; //创建指针及其前驱
LinkTree pt;

if((head=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建新链表的头结点
return NULL;
head->next=NULL;

ph=head;
beforeph=head;

while(tree->next)
{
pt=tree->next; //取被操作链表的头结点
tree->next=pt->next;
pt->next=NULL;

ph=head->next;
beforeph=head;

if(head->next==NULL)
head->next=pt; //创建当前操作链表头结点
else
{
while(ph&&ph->weight<pt->weight) //将被操作结点插入相应位置
{
ph=ph->next;
beforeph=beforeph->next;
}
pt->next=beforeph->next;
beforeph->next=pt;
}
}
free(tree); //释放含权树
return head; //返回排序后的头指针
}

/*用排序后的链表建立哈夫曼树*/
LinkTree createHTree(LinkTree tree)
{
LinkTree pt,q,beforept;
HTNode *newnode;

for(pt=tree->next,q=pt->next;pt!=NULL&&q!=NULL;pt=tree->next,q=pt->next)
//pt、q初值为头结点后的两个结点，即最小权结点
{
tree->next=q->next; //取出将最小的两个结点
q->next=NULL;
pt->next=NULL;

if((newnode=(HTNode *)malloc(sizeof(HTNode)))==NULL)
//申请新结点作为哈夫曼树的中间结点
return NULL;
newnode->next=NULL;
newnode->mark=0; //标记

newnode->lchild=pt; //取链表头结点后的两个结点作为新结点的左、右孩子
newnode->rchild=q;
pt->parent=newnode;
q->parent=newnode;
newnode->weight=pt->weight+q->weight; //权值等于孩子权值相加

pt=tree->next;
beforept=tree;

if(pt!=NULL&&pt->weight>=newnode->weight)
{
newnode->next=beforept->next; //将新结点插入原链表的相应位置
beforept->next=newnode;
}
else
{
while(pt!=NULL&&pt->weight<newnode->weight)//循环找出newnode结点的插入位置
{
pt=pt->next;
beforept=beforept->next;
}
newnode->next=beforept->next;
beforept->next=newnode;
}
}
return (tree->next);
}

/*对哈夫曼树进行编码,并将关键字保存如数组codedictionary[]中*/
void codeHTree(LinkTree tree,CodeDictionary *codedictionary)
{
int index=0,k=0;
char code[MAX_SINGLECODE]; //用于统计每个字符的哈夫曼编码
LinkTree ptr=tree; //从树的根结点开始

if(ptr==NULL)
{
cout<<"要压缩的文件是空的!\n"<<endl;
exit(0);
}
else
{
while(ptr->lchild&&ptr->rchild&&ptr->mark==0)
{
while(ptr->lchild&&ptr->lchild->mark==0)
{
code[index++]='0'; //左支路编码为0
ptr=ptr->lchild;
if(!ptr->lchild&&!ptr->rchild) //如果没有左右孩子，即叶子结点
{
ptr->mark=1; //作标记，表明该字符已被编码
code[index]='\0'; //编码0-1字符串结束
codedictionary[k].ch=ptr->ch;//给字典赋字符值
for(index=0;code[index]!='\0';index++)
codedictionary[k].code[index]=code[index];//给字典赋码值
codedictionary[k].code[index]='\0';
k++;
ptr=tree; //指针复位
index=0;
}
}
if(ptr->rchild&&ptr->rchild->mark==0)
{
ptr=ptr->rchild;
code[index++]='1'; //右支路编码为1
}
if(!ptr->lchild&&!ptr->rchild) //如果没有左右孩子，即叶子结点
{
ptr->mark=1;
code[index++]='\0';
codedictionary[k].ch=ptr->ch; //给字典赋字符值
for(index=0;code[index]!='\0';index++)
codedictionary[k].code[index]=code[index];//给字典赋码值
codedictionary[k].code[index]='\0';
k++;
ptr=tree;
index=0;
}
if(ptr->lchild->mark==1&&ptr->rchild->mark==1)//如果左右孩子都已标记
{
ptr->mark=1;
ptr=tree;
index=0;
}
}
}
cout<<"\n"<<endl;
}

/*将整个字符串转化为0-1的字符串*/
void compressString(char *string,CodeDictionary *codedictionary,char *codestring)
{
int i=0,j=0,k=0,m;

while(string[i]) //整个文件字符串没结束时
{
while(string[i]!=codedictionary[j].ch&&j<MAX_WORDS)
//找与对应字符相同的字符
j++;
if(string[i]==codedictionary[j].ch) //如果找到与对应字符相同的字符
for(m=0;codedictionary[j].code[m];m++,k++)
codestring[k]=codedictionary[j].code[m];
j=0; //字典复位
i++;
}
codestring[k]='\0'; //标记结束
}

/*保存按权排列的链表和编码后的字符串*/
void writeCode(LinkTree tree,char *string)
{
FILE *fp;
int i;
int weight; //记录写入的权值
char ch; //记录写入的字符
LinkTree p;
char path[PATH]; //0-1码文件的写路径

cout<<"请输入压缩后的保存路径及文件名：（无需扩展名）"<<endl;
gets(path);
if((fp=fopen(strcat(path,".wp"),"w+"))==NULL)
{
cout<<"\n文件路径出错！\n"<<endl;
getch();
return;
}
p=tree->next;

/*按权排列部分写入文件前部分*/
do
{
ch=p->ch;
weight=p->weight;
fprintf(fp,"%c%d",ch,weight);
p=p->next;
}while(p);
fprintf(fp,"%c%d",'^',END_TREE);

fseek(fp,sizeof(char),1); //空出区分位，用于解码时区分链表和编码

/*0-1码写入文件后部分*/
for(i=1;string[i-1];i++)
{
if(string[i-1]=='1')
{
ch<<=1;
ch+=1;
}
if(string[i-1]=='0')
{
ch<<=1;
ch+=0;
}
if(i%8==0)
fputc(ch,fp);
}
cout<<"\n压缩成功！\n"<<endl;
getch();
fclose(fp);
}

/*释放哈夫曼树所占用的空间*/
void deleteTree(LinkTree tree)
{
LinkTree ptr=tree;
if(ptr)
{
deleteTree(ptr->lchild); //第归处理左子树
deleteTree(ptr->rchild); //第归处理右子树
free(ptr); //释放结点
}
}

搜索更多相关主题的帖子: 压缩　

第 2 楼

得分:0

/*=============实现压缩功能的子函数===============*/
void Pack()
{

char string[MAX_STRING]; //保存从文件中读取的内容
LinkTree temp; //保存赋了权值的链表表
LinkTree ht; //保存排序后的链表
LinkTree htcopy,tempcopy; //链表表备份
LinkTree htree; //保存哈夫曼树
CodeDictionary codedictionary[MAX_WORDS];//编码字典
char codestring[MAX_CODESTRING]; //保存0-1形的代码串

if((ht=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
{
cout<<"内存不足！"<<endl;
getch();
exit(0);
}
readFile(string); //读取要编码的文件（字符串）
temp=setWeight(string); //得到有权值的表
tempcopy=setWeight(string);
ht=sortNode(temp); //按权值排序后的表
htcopy=sortNode(tempcopy); //用于记录解码树
htree=createHTree(ht); //得到哈夫曼树
codeHTree(htree,codedictionary); //哈夫曼编码
compressString(string,codedictionary,codestring);//压缩为0-1码
writeCode(htcopy,codestring); //将解码树和0-1码保存
deleteTree(htree); //释放空间*/
}

/*读取压缩文件，并保存其中的链表和字符编码*/
void readCode(LinkTree tree,char *string)
{

FILE *fp; //指向文件的指针
int i=0,j;
int weight; //记录读入的权值
char ch; //记录读入的字符
LinkTree ptr,beforeptr;
char path[PATH]; //0-1码文件的读路径

cout<<"请输入要解压的文件路径及文件名：（无需扩展名）"<<endl;
gets(path);
if((fp=fopen(strcat(path,".wp"),"r"))==NULL)
{
cout<<"\n文件路径出错！\n"<<endl;
getch();
return;
}
beforeptr=tree;

fscanf(fp,"%c%d",&ch,&weight); /*从文件前部分读出解码树*/

while(weight!=END_TREE)
{
if((ptr=(LinkTree)malloc(sizeof(HTNode)))==NULL) //建立新结点
{
cout<<"内存不足！"<<endl;
getch();
exit(1); //错误出口
}
ptr->ch=ch;
ptr->weight=weight;
ptr->lchild=NULL;
ptr->rchild=NULL;
ptr->parent=NULL;
ptr->mark=0;
beforeptr->next=ptr;
beforeptr=ptr;
fscanf(fp,"%c%d",&ch,&weight);
}
beforeptr->next=NULL;

fseek(fp,sizeof(char),1); //文件指针定位

/*从文件后部分读出0-1码*/
ch=fgetc(fp);
while(ch!=EOF)
{
for(j=7;j>=0;--j)
{
if(ch>>j&1==1)
string[i++]='1';
if(ch>>j&1==0)
string[i++]='0';
}
ch=fgetc(fp);
}
string[i]='\0';
fclose(fp);
}

/*利用哈夫曼编码将0-1(保存在code[]中)码转化为字符串,保存在filestring中*/
void decodeHTree(LinkTree tree,char *code,char *filestring)
{
int i=0,j=0,k=0;
char *char0_1;
LinkTree ptr=tree;
char0_1=(char *)malloc(MAX_SINGLECODE); //此数组用于统计输入的0-1序列

cout<<"预览解压后的字符：\n"<<endl;
for(j=0,ptr=tree;code[i]!='\0'&&ptr->lchild&&ptr->rchild;j=0,ptr=tree)
{
for(j=0;code[i]!='\0'&&ptr->lchild&&ptr->rchild;j++,i++)
{
if(code[i]=='0')
{
ptr=ptr->lchild;
char0_1[j]='0';
}
if(code[i]=='1')
{
ptr=ptr->rchild;
char0_1[j]='1';
}
}
if(!ptr->lchild&&!ptr->rchild)
{
printf("%c",ptr->ch); //显示解压后的字符
filestring[k++]=ptr->ch; //将字符逐一保存到字符串里
}
if(code[i]=='\0'&&ptr->lchild&&ptr->rchild)
{
char0_1[j]='\0';
printf("\n没有与最后的几个0-1序列：%s相匹配的字符!\n",char0_1);
return;
}
}
printf("\n\n");
filestring[k]='\0';
free(char0_1);
}

/*保存解码后的文件*/
void writeFile(char *string)
{
FILE *fp;
char ch; //记录写入的字符
int i;
char path[PATH]; //文本文件的写路径

cout<<"请输入解压后的保存路径及文件名：（无需扩展名）"<<endl;
gets(path);
if((fp=fopen(strcat(path,".txt"),"w+"))==NULL)
{
cout<<"\n文件路径出错！\n"<<endl;
getch();
return;
}

for(i=0;string[i];i++)
{
ch=string[i];
fputc(ch,fp);
}
cout<<"\n解压成功！\n"<<endl;
getch();
fclose(fp);
}

/*==============实现解压缩功能的子函数==================*/
void Unpack()
{
LinkTree ht;
char codestring[MAX_CODESTRING]; //保存0-1形的代码串
LinkTree htree; //保存哈夫曼树
CodeDictionary codedictionary[MAX_WORDS];//编码字典
char filestring[MAX_STRING]; //解码后要写入文件中的内容

if((ht=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
{
cout<<"内存不足！"<<endl;
getch();
exit(0);
}
readCode(ht,codestring); //读取要解码的0-1码
htree=createHTree(ht); //得到哈夫曼树
codeHTree(htree,codedictionary); //哈夫曼编码
decodeHTree(htree,codestring,filestring); //解码
writeFile(filestring); //将解码文件保存
deleteTree(htree); //释放空间
}

void menu()
{
cout<<"\n"<<endl;
cout<<"\t\t\t-----** 课题[3]程序实现 **-----"<<endl;
cout<<"\n"<<endl;
cout<<"提供功能：(仅对.txt文件操作，文件不小于5K)"<<endl;
cout<<"\n"<<endl;
cout<<"\t\t\t\t <p> 压缩\n\n"<<endl;
cout<<"\t\t\t\t <u> 解压\n\n"<<endl;
cout<<"\t\t\t\t <b> 比较\n\n"<<endl;
cout<<"\t\t\t\t <e> 退出\n\n"<<endl;
cout<<"\n请按键选择："<<endl;
cout<<"\n"<<endl;
}

/*对解压前后文件大小的比较若相同则相同率为100%*/
void Comfp()
{
int i=0,j=0,w=37; //ASC2码%号为37
char ch1,ch2;
char path1[PATH],path2[PATH];
FILE *fp1,*fp2; //指向文件的指针

cout<<"请输入压缩前.txt文件的地址：（无需扩展名）"<<endl;
gets(path1);
cout<<"请输入解压后.txt文件的地址：（无需扩展名）"<<endl;
gets(path2);
if((fp1=fopen(strcat(path1,".txt"),"r"))==NULL||(fp2=fopen(strcat(path2,".txt"),"r"))==NULL) //只读方式打开一个文件
{
cout<<"\n路径不正确！\n"<<endl;
getch();
return;
}

ch1=fgetc(fp1);
ch2=fgetc(fp2);
while(ch1!=EOF) //分别比较文件1和文件2的字符
{
if(ch1==ch2)
{
i++;
j++;
}
if(ch1!=ch2)
i++;
ch1=fgetc(fp1);
ch2=fgetc(fp2);
}
printf("文件1和文件2的相同率为：%d%c",j/i*100,w); //相同率
getch();
}

void main(void)
{
char ch;
while(1)
{
menu();
ch=getch();
switch(ch)
{
case 'p':
case 'P':
Pack();
break;
case 'u':
case 'U':
Unpack();
break;
case 'b':
case 'B':
Comfp();
break;
case 'e':
case 'E':
exit(0);
default:
cout<<"选择有误,按任意键重新输入"<<endl;
getch();
break;
}
}
}

这是哈夫曼编码实现压缩功能的函数，运行结果出现压缩前后数据的丢失，
原因就是出在对数据存储和解压那里，
void writeCode(LinkTree tree,char *string)是压缩的功能
void readCode(LinkTree tree,char *string)是解压功能

这两个函数我原来是这样的
void writeCode(LinkTree tree,char *string)
{
FILE *fp;
int i;
int weight; //记录写入的权值
char ch; //记录写入的字符
LinkTree p;
char path[PATH]; //0-1码文件的写路径

/*按权排列部分写入文件前部分*/
do
{
ch=p->ch;
weight=p->weight;
fprintf(fp,"%c%d",ch,weight);
p=p->next;
}while(p);
fprintf(fp,"%c%d",'^',END_TREE);

fseek(fp,sizeof(char),1); //空出区分位，用于解码时区分链表和编码

/*0-1码写入文件后部分*/
for(i=0;string[i];i++)
{
ch=string[i];
fputc(ch,fp);
}
cout<<"\n压缩成功！\n"<<endl;
getch();
fclose(fp);
}

void readCode(LinkTree tree,char *string)
{

FILE *fp; //指向文件的指针
int i;
int weight; //记录读入的权值
char ch; //记录读入的字符
LinkTree ptr,beforeptr;
char path[PATH]; //0-1码文件的读路径

fscanf(fp,"%c%d",&ch,&weight); /*从文件前部分读出解码树*/

fseek(fp,sizeof(char),1); //文件指针定位

/*从文件后部分读出0-1码*/
ch=fgetc(fp);
for(i=0;ch!=EOF;i++)
{
string[i]=ch;
ch=fgetc(fp);
}
string[i]='\0';
fclose(fp);
}

按原来的操作可以实现解压前后数据的一致，但压缩文件非常大，原因当然就是01串是按字节存储的，不是按位存储的

真的查不出来原因了，高手给点提示吧，我被老师骂了N回了！！！