文件压缩与解压思想:
1)统计字符出现次数,结合最小堆的性质生成哈夫曼树;
2)对叶节点进行编码,结点左边编0,右边编1;
3)读取文件,用哈夫曼编码代替字符产生新的字符,即压缩;
4)读取压缩文件,进行哈夫曼编码的解读产生相应字符,即解压;
例如,对以下数据生成哈夫曼树,以及产生相应的哈夫曼编码:
//自己写的最小堆(因为在加强巩固,所以没用STL里的)
#pragma once
#include<iostream>
#include<vector>
using namespace std;
template<class T>
struct Less
{
bool operator()(const T&left, const T&right)
{
return left->_weight < right->_weight;
}
};
template<class T,class Compare=Less<T>>
class Heap
{
public:
Heap()
{}
Heap(const T*v, int size, int valid)
{
for (int i = 0; i < size; i++)
{
if (v[i] != valid)
_v.push_back(v[i]);
}
int _size = _v.size();
int begin = (_size - 2) / 2;
for (int root = begin; root >= 0; root--)
{
AdjustDown(root);
}
}
void Insert(const T& value)
{
_v.push_back(value);
int leaf = _v.size() - 1;
AdjustUp(leaf);
}
void Remove()
{
swap(_v[0], _v[_v.size() - 1]);
_v.pop_back();
AdjustDown(0);
}
T& GetTop()
{
if (!_v.empty())
return _v[0];
}
bool Empty()
{
if (_v.empty())
return true;
return false;
}
int Size()
{
return _v.size();
}
protected:
void AdjustDown(int root)
{
size_t left = root * 2 + 1;
size_t right = left + 1;
int key = left;
while (left < _v.size())
{
if (right < _v.size() && Compare()(_v[right], _v[left]))
key = right;
if (Compare()(_v[key], _v[root]))
{
swap(_v[key], _v[root]);
root = key;
left = root * 2 + 1;
right = left + 1;
key = left;
}
else
break;
}
}
void AdjustUp(int leaf)
{
int root = (leaf - 1) / 2;
while (leaf > 0)
{
if (Compare()(_v[leaf], _v[root]))
{
swap(_v[leaf], _v[root]);
leaf = root;
root = (leaf - 1) / 2;
}
else
break;
}
}
private:
vector<T> _v;
};
//哈夫曼树的建立
#pragma once
#include<iostream>
#include<string>
#include"Heap.h"
using namespace std;
template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
T _weight;
HuffmanTreeNode(const T& weight)
:_left(NULL)
, _right(NULL)
, _weight(weight)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree(const T*a, int size, T invaild)
{
_CreatHuffmanTree(a, size, invaild);
}
Node* GetRoot()
{
return _root;
}
protected:
void _CreatHuffmanTree(const T*a,int size,T invaild)
{
Heap<Node*,Less<Node*>>hp;
for (int i = 0; i < size; i++)
{
if (a[i] != invaild)
hp.Insert(new Node(a[i]));//建立小堆
}
//当_v中只剩下一个数据时,哈弗曼树建立完成
while (hp.Size()>1)
{
Node* left = hp.GetTop();
hp.Remove();
Node* right = hp.GetTop();
hp.Remove();
Node*parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
hp.Insert(parent);
}
_root = hp.GetTop();
}
private:
Node* _root;
};
//文件压缩与解压
#pragma once
#include<iostream>
#include<string>
using namespace std;
#include"HuffmanTree.h"
struct CharInfo
{
unsigned char _ch;
long _count;
string _code;
CharInfo(const long count = 0)
:_count(count)
{}
CharInfo(const unsigned char ch)
:_ch(ch)
{}
long operator+(const CharInfo& info)const
{
return _count + info._count;
}
bool operator!=(const CharInfo& info)const
{
return _count != info._count;
}
bool operator<(const CharInfo& info)const
{
return _count < info._count;
}
};
class FileCompress
{
public:
//对文件filename进行压缩
void Compress(const char* filename)
{
FILE* fread = fopen(filename, "rb");
if (fread == NULL)
{
cout << "打开文件失败..." << endl;
return;
}
//打开文件成功
//1、字符与下标对应,填充结构体的_ch
for (int i = 0; i < 256; i++)
{
_Info[i]._ch = i;
}
//2、统计各个字符出现的次数
unsigned char ch = fgetc(fread);
while (!feof(fread))
{
_Info[ch]._count++;
ch = fgetc(fread);
}
//3、建立哈夫曼树
HuffmanTree<CharInfo> hft(_Info, 256, CharInfo());
//4、获取各个字符的哈夫曼编码
//每次都从根节点开始,走到叶节点,即得到叶节点对应字符的编码
HuffmanTreeNode<CharInfo>*root = hft.GetRoot();
string code;
GetHuffmanCodeOfChar(root, code);
//5、从头开始读文件,把读取字符的编码重新组合写入压缩文件中
fseek(fread, 0, SEEK_SET);
//压缩产生的文件的名字
string fcp = filename;
fcp += ".compress";
FILE* fwrite = fopen(fcp.c_str(), "wb");
if (fwrite == NULL)
{
cout << "compress 文件打开失败" << endl;
return;
}
//打开文件成功
unsigned char data = 0;//写入压缩文件的字符
int offset = 7;//偏移量
ch = fgetc(fread);
while (!feof(fread))
{
const char* str = _Info[ch]._code.c_str();//获取当前字符的编码
while (*str != '\0')
{
if (offset >= 0)
{
data = data | ((*str - '0') << offset);
offset--;
}
if (offset < 0)
{
fputc(data, fwrite);
data = 0;
offset = 7;
}
str++;
}
ch = fgetc(fread);
}
fputc(data, fwrite);
WriteConfig(filename);
fclose(fread);
fclose(fwrite);
}
void UnCompress(const char* filename)
{
string fcp = filename;
fcp += ".compress";
FILE* fread = fopen(fcp.c_str(), "rb");
if (fread == NULL)
{
cout << "打开文件失败..." << endl;
return;
}
//打开文件成功
//3、建立哈夫曼树
CharInfo info[256];
ReadConfig(filename, info);
HuffmanTree<CharInfo> hft(info, 256, CharInfo());
string fucp = filename;
fucp += ".uncompress";
FILE* fwrite = fopen(fucp.c_str(), "wb");
HuffmanTreeNode<CharInfo>*root = hft.GetRoot();
HuffmanTreeNode<CharInfo>*cur = root;//每次都要从根开始读,读到叶节点即可获取一个原字符
long DataTotal = (root->_weight)._count;//原文件中的字符总数
unsigned char ch = fgetc(fread);
while (DataTotal)
{
int tmp = 1;
int offset = 7;
while (offset >= 0)
{
if (ch&(1 << offset))//检验位是否为0
{
cur = cur->_right;
offset--;
}
else
{
cur = cur->_left;
offset--;
}
if (cur->_left == NULL&&cur->_right == NULL)
{
unsigned char wch = cur->_weight._ch;
fputc(wch, fwrite);
cur = root;
DataTotal--;
//最后一个字符的编码在最后两个字节当中的情况
if (!DataTotal)
{
break;
}
}
}
ch = fgetc(fread);
}
fclose(fread);
fclose(fwrite);
}
protected:
void WriteConfig(const char*filename)
{
string Config = filename;
Config += "config";
FILE* fwrite = fopen(Config.c_str(), "wb");
if (fwrite == NULL)
{
cout << "打开文件失败" << endl;
return;
}
//打开成功
for (int i = 0; i < 256; i++)
{
if (_Info[i] ._count)
{
fputc(_Info[i]._ch, fwrite);
fputc(',', fwrite);
char count[100];
//参数说明:1.要转换的值;2.结果存放区;3.进制
_itoa(_Info[i]._count, count, 10);
fputs(count, fwrite);
fputc(',', fwrite);
fputs(_Info[i]._code.c_str(), fwrite);
fputc('\n', fwrite);
}
}
fclose(fwrite);
}
void ReadConfig(const char*filename,CharInfo *info)
{
string Config = filename;
Config += "config";
FILE* fread = fopen(Config.c_str(), "rb");
if (fread == NULL)
{
cout << "打开文件失败" << endl;
return;
}
//打开成功
char str[100];
while (fgets(str, 100, fread))
{
char*pstr = str;
unsigned char ch = (unsigned char)*pstr;//得到字符
info[ch]._ch = ch;
if (ch == '\n')
{
fgets(str, 100, fread);
pstr=str;
pstr++;
}
else
pstr+=2;//跳过分隔符(逗号)
//得到_count的字符串形式
long count = 0;
while (*pstr&&*pstr!=',')
{
count *= 10;
count += *pstr - '0';
pstr++;
}
info[ch]._count = count;//得到出现次数
pstr++;
string code(pstr);
info[ch]._code = code;
}
}
void GetHuffmanCodeOfChar(HuffmanTreeNode<CharInfo>*root, string& code)
{
if (root == NULL)
return;
if (root->_left == NULL&&root->_right == NULL)
{
unsigned char ch = root->_weight._ch;
_Info[ch]._code = code;
return;
}
GetHuffmanCodeOfChar(root->_left, code + '0');//左边编码为0
GetHuffmanCodeOfChar(root->_right, code + '1');//右边编码为1
}
private:
CharInfo _Info[256];//结构体数组
};