mirror of
https://github.com/heqin-zhu/algorithm.git
synced 2024-03-22 13:30:46 +08:00
376 lines
10 KiB
C++
376 lines
10 KiB
C++
/* mbinary
|
|
#########################################################################
|
|
# File : huffman.cc
|
|
# Author: mbinary
|
|
# Mail: zhuheqin1@gmail.com
|
|
# Blog: https://mbinary.coding.me
|
|
# Github: https://github.com/mbinary
|
|
# Created Time: 2018-04-25 22:32
|
|
# Description:
|
|
#########################################################################
|
|
*/
|
|
|
|
#include<math.h>
|
|
#include<stdio.h>
|
|
#include<iostream>
|
|
#include<algorithm>
|
|
#include<map>
|
|
#include<unistd.h>
|
|
#include<iomanip>
|
|
#include<string>
|
|
#include<vector>
|
|
#include<queue>
|
|
#include<sys/time.h>
|
|
#define numDigit 10
|
|
#define nameLength 50
|
|
#define starNum 80
|
|
using namespace std;
|
|
|
|
void cat(string s)
|
|
{
|
|
FILE* f=fopen(s.c_str(),"rb");
|
|
cout<<"file content"<<endl;
|
|
while(!feof(f)){
|
|
cout<<fgetc(f);
|
|
}
|
|
cout<<endl;
|
|
}
|
|
string uniFileName(string file)
|
|
{
|
|
FILE * check = fopen(file.c_str(),"rb");
|
|
if(check){
|
|
char c;
|
|
cout<<"the file "<<file<<" already exists! continue? [Y/n]:"<<flush;
|
|
c=cin.get();
|
|
if(c=='n')exit(0);
|
|
int p,q;
|
|
p= file.find('(');
|
|
q=file.rfind('.');
|
|
if(q==string::npos)q=file.size();
|
|
if(p==string::npos)p=q;
|
|
string name=file.substr(0,p),suffix=file.substr(q,file.size());
|
|
int n=0;
|
|
while(true){
|
|
char s[3];
|
|
n+=1;
|
|
snprintf(s,3,"%d",n);
|
|
file=(name+"("+s+")"+suffix);
|
|
FILE* f=fopen(file.c_str(),"rb");
|
|
if(!f)break;
|
|
else fclose(f);
|
|
}
|
|
}
|
|
return file;
|
|
}
|
|
template<class t1, class t2>
|
|
void mapprint(map<t1,t2> &f)
|
|
{
|
|
for(class map<t1,t2>::iterator i = f.begin();i!=f.end();++i)
|
|
cout<<i->first<<") : "<<i->second<<endl;
|
|
}
|
|
template<typename ky,typename wt>
|
|
class node
|
|
{
|
|
public:
|
|
ky key;
|
|
wt val;
|
|
bool visited;
|
|
node * left,*right;
|
|
node(const node &a){val = a.val;key= a.key;visited = a.visited;left= a.left;right=a.right;}
|
|
node(ky k=0,wt v=0):key(k),val(v),visited(false),left(NULL),right(NULL){};
|
|
bool operator<(const node<ky,wt> & a)const{return val>a.val;};
|
|
};
|
|
template<typename ky,typename wt>
|
|
class huffman
|
|
{
|
|
private:
|
|
node<ky,wt> root;
|
|
string res;
|
|
public:
|
|
long total(){return root.val;}
|
|
map<ky,string> encode_map;
|
|
map<string,ky> decode_map;
|
|
huffman(map<ky,wt>& mp);
|
|
void display();
|
|
string encode(string,long &);
|
|
string decode(string,long&);
|
|
void preOrder(node<ky,wt>*,string);
|
|
};
|
|
template<typename ky,typename wt>
|
|
huffman<ky,wt>::huffman(map<ky,wt>& mp)
|
|
{
|
|
if(mp.empty()){
|
|
cout<<"Error! No data!"<<endl;
|
|
root=NULL;
|
|
return ;
|
|
}
|
|
priority_queue<node<ky,wt> > hp;
|
|
for(typename map<ky,wt>::iterator i=mp.begin();i!=mp.end();++i){
|
|
hp.push( node<ky,wt>(i->first,i->second));
|
|
}
|
|
int n =hp.size();
|
|
if(n==1){
|
|
root = hp.top();
|
|
return;
|
|
}
|
|
while(--n>=1){
|
|
node<ky,wt> *a = new node<ky,wt>(hp.top());
|
|
hp.pop();
|
|
node<ky,wt> *b = new node<ky,wt>(hp.top());
|
|
hp.pop();
|
|
node<ky,wt> * tmp = new node<ky,wt>(0,a->val+b->val);
|
|
tmp->left = a,tmp->right = b;
|
|
hp.push(*tmp);
|
|
}
|
|
root = hp.top();
|
|
preOrder(&root,string());
|
|
}
|
|
template<typename ky,typename wt>
|
|
void huffman<ky,wt>::preOrder(node<ky, wt>* nd,string s)
|
|
{
|
|
if(nd->left == NULL){
|
|
encode_map[nd->key] =s;
|
|
decode_map[s] = nd->key;
|
|
delete nd;
|
|
return ;
|
|
}
|
|
preOrder(nd->left,s+'0');
|
|
preOrder(nd->right,s+'1');
|
|
delete nd;
|
|
}
|
|
template<typename ky,typename wt>
|
|
string huffman<ky,wt>::decode(string zipfile_name,long &charNum)
|
|
{
|
|
string uniFileName(string);
|
|
FILE * src = fopen(zipfile_name.c_str(),"rb");
|
|
char file_name[nameLength];
|
|
fgets(file_name,nameLength,src);
|
|
int ct=-1;
|
|
while(file_name[++ct]!='\n');
|
|
int pos = zipfile_name.find('.');
|
|
if(pos==string::npos)pos=zipfile_name.size();
|
|
string name(zipfile_name.substr(0,pos)) ,suffix(file_name,file_name+ct),file(name+suffix);
|
|
file=uniFileName(file);
|
|
cout<<"extracting compressed file :"<<zipfile_name<<endl;
|
|
FILE * f = fopen(file.c_str(),"wb");
|
|
char t[numDigit];
|
|
fgets(t,numDigit,src);
|
|
int sz=atoi(t);
|
|
char code[sz];
|
|
fread(code,sz,1,src);
|
|
int idx=0;
|
|
for(int i =0;i<sz;++i ){
|
|
if(code[i]==' '){
|
|
decode_map[string(code+idx,code+i)]=code[++i];
|
|
idx=i+1;
|
|
}
|
|
}
|
|
for(int i=0;i<starNum;++i)cout<<"@";
|
|
cout<<endl;
|
|
char c;
|
|
long cur=charNum,gap=charNum/starNum;
|
|
while(cur){
|
|
c=fgetc(src);
|
|
if(!((--cur)%gap))cout<<"@"<<flush;
|
|
for(int i =0;i<8;++i){
|
|
if(c&(1<<i))res.append(1,'1');
|
|
else res.append(1,'0');
|
|
if(decode_map.count(res)!=0){
|
|
fputc(decode_map[res],f);
|
|
res.clear();
|
|
}
|
|
}
|
|
}
|
|
cout<<endl;
|
|
c=fgetc(src);
|
|
int dgt=fgetc(src);
|
|
cout<<feof(f);
|
|
if((int)dgt!=-1 ){
|
|
for(int i =0;i<dgt;++i){
|
|
if(c&(1<<i))res.append(1,'1');
|
|
else res.append(1,'0');
|
|
if(decode_map.count(res)!=0){
|
|
fputc(decode_map[res],f);
|
|
res.clear();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
fclose(src);
|
|
fclose(f);
|
|
cout<<"get "<<file <<" successfully"<<endl;
|
|
return file;
|
|
}
|
|
template<typename ky,typename wt>
|
|
string huffman<ky,wt>::encode(string file_name,long &charNum)
|
|
{
|
|
charNum=0;
|
|
string uniFileName(string);
|
|
int pos =file_name.rfind('.');
|
|
if(pos==string::npos)pos=file_name.size();
|
|
string zipfile = file_name.substr(0,pos)+string(".zzip");
|
|
zipfile = uniFileName(zipfile);
|
|
cout<<"generating zip file :"<<zipfile<<endl;
|
|
FILE * dst = fopen(zipfile.c_str(),"wb");
|
|
FILE * f = fopen(file_name.c_str(),"rb");
|
|
fputs(file_name.substr(pos).c_str(),dst);
|
|
fputc('\n',dst);
|
|
string data;
|
|
for(class map<string,ky>::iterator i=decode_map.begin();i!=decode_map.end() ;++i ){
|
|
data.append((i->first));
|
|
data.append(" ");
|
|
data+=(i->second);
|
|
}
|
|
int data_size = data.size(); // calculate the size of the code_data
|
|
char sz[numDigit];
|
|
snprintf(sz,numDigit,"%d",data_size);
|
|
int ct=0;
|
|
for(;sz[ct];++ct)fputc(sz[ct],dst);
|
|
fputc('\n',dst);
|
|
fwrite(data.c_str(),data_size,1,dst);
|
|
int sum=0,digit=0,num;
|
|
string code8;
|
|
for(int i=0;i<starNum;++i)cout<<"@";
|
|
cout<<endl;
|
|
long gap=root.val/starNum,cur=0;
|
|
while(!feof(f)){
|
|
code8=encode_map[fgetc(f)];
|
|
if(!((++cur)%gap))cout<<"@";
|
|
for(int i=0;i<code8.size();++i){
|
|
if(code8[i]=='1')sum += 1<<(digit); //mistake if(tmp[j])
|
|
++digit;
|
|
if(digit==8){
|
|
++charNum;
|
|
fputc(sum,dst);
|
|
digit=sum=0;
|
|
}
|
|
}
|
|
}
|
|
cout<<endl;
|
|
if(digit!=0){ //mark
|
|
fputc(sum,dst);
|
|
fputc(digit,dst);
|
|
}
|
|
fclose(f);
|
|
fclose(dst);
|
|
cout<<"compress "<<file_name <<" successfully"<<endl;
|
|
return zipfile;
|
|
}
|
|
template<typename ky,typename wt>
|
|
void huffman<ky,wt>::display()
|
|
{
|
|
cout<<"the encoding map,huffman codes are as bellow:"<<endl;
|
|
for (typename map<ky,string>::iterator i=encode_map.begin();i!=encode_map.end() ;++i )
|
|
cout<<i->first<<"("<<(int)i->first<<"):"<<i->second<<endl;
|
|
}
|
|
bool handle_one(string file_name,vector<long> &origin,vector<long> &compressed)
|
|
{
|
|
int name_length = file_name.size();
|
|
FILE *src=fopen(file_name.c_str(),"rb");
|
|
cout<<"opening "<<file_name<<"..."<<endl;
|
|
if(!src){
|
|
cout<<"Path Error! Opening "<<file_name<<" Failed"<<endl;
|
|
origin.push_back(0);
|
|
compressed.push_back(0);
|
|
return false;
|
|
}
|
|
char cur;
|
|
map<char,long> mp;
|
|
while(!feof(src)){
|
|
fread(&cur,sizeof(char),1,src);
|
|
if(mp.count(cur)){
|
|
mp[cur]+=1;
|
|
}
|
|
else mp[cur]=1;
|
|
}
|
|
fclose(src);
|
|
huffman<char,long> hf(mp);
|
|
long sz;
|
|
string s(hf.encode(file_name,sz));
|
|
origin.push_back(hf.total()),compressed.push_back(sz);
|
|
cout<<"\ncontinue to uncompress? [Y/n]"<<endl;
|
|
char c=cin.get();
|
|
if(c=='n')return true;
|
|
hf.decode(s,sz);
|
|
return true;
|
|
}
|
|
bool isSep(char c)
|
|
{
|
|
return c==' '||c=='\n'||c=='\t'||c==',';
|
|
}
|
|
void splitToVec(char * s,vector<string>& v)
|
|
{
|
|
int i=0,last=0;
|
|
for(;s[i];++i){
|
|
if(isSep(s[i])){
|
|
v.push_back(string(s+last,s+i));
|
|
while(s[++i]&&isSep(s[i]));
|
|
last=i;
|
|
}
|
|
}
|
|
if(s[last])v.push_back(string(s+last,s+i));
|
|
}
|
|
bool lenStr(string &a,string &b)
|
|
{
|
|
return a.size()<b.size();
|
|
}
|
|
void go(vector<string> & names)
|
|
{
|
|
vector<long> originSize,compressedSize;
|
|
vector<int> deltaTime;
|
|
double last;
|
|
vector<bool> indicator;
|
|
bool bl;
|
|
for(vector<string>::iterator i=names.begin();i!=names.end();++i){
|
|
struct timeval tv;
|
|
gettimeofday(&tv,NULL);
|
|
last=tv.tv_sec;
|
|
bl=handle_one(*i,originSize,compressedSize);
|
|
indicator.push_back(bl);
|
|
gettimeofday(&tv,NULL);
|
|
deltaTime.push_back(tv.tv_sec-last);
|
|
}
|
|
cout<<"\nDealt file number "<<originSize.size()<<fixed<<setprecision(2)<<endl;
|
|
vector<string>::iterator p=max_element(names.begin(),names.end(),lenStr);
|
|
int len = p->size()+2;
|
|
for(int i =0;i<names.size();++i){
|
|
if(! indicator[i]){continue;}
|
|
cout<<names[i]<<string(len-names[i].size(),' ');
|
|
cout<<deltaTime[i]<<"s "<<compressedSize[i]/1024.0<<"KB/"<<originSize[i]/1024.0<<"KB :";
|
|
cout<<compressedSize[i]*100.0/originSize[i]<<"%"<<endl;
|
|
}
|
|
cout<<endl;
|
|
system("pause");
|
|
}
|
|
int main(int argv,char ** argc)
|
|
{
|
|
char cwd[50];
|
|
cout<<getcwd(cwd,50)<<endl;
|
|
vector<string> names;
|
|
string file;
|
|
if(argv>1){
|
|
for(int i=1;i<argv;++i){
|
|
names.push_back(argc[i]);
|
|
}
|
|
go(names);
|
|
names.clear();
|
|
}
|
|
char mk;
|
|
while(1){
|
|
char s[201];
|
|
cout<<"Input file names separated by space "<<endl;
|
|
if(cin.peek()=='\n')names.push_back(file);
|
|
else {
|
|
cin.getline(s,200);
|
|
splitToVec(s,names);
|
|
}
|
|
cout<<endl;
|
|
go(names);
|
|
cout<<"Continue? [Y/n]:"<<flush;
|
|
mk= cin.get();
|
|
if(mk=='n')break;
|
|
names.clear();
|
|
}
|
|
return 0;
|
|
}
|