/* mbinary ######################################################################### # File : huffman.cc # Author: mbinary # Mail: zhuheqin1@gmail.com # Blog: https://mbinary.xyz # Github: https://github.com/mbinary # Created Time: 2018-04-25 22:32 # Description: ######################################################################### */ #include #include #include #include #include #include #include #include #include #include #include #define numDigit 10 #define nameLength 50 #define starNum 80 using namespace std; void cat(string s) { FILE* f = fopen(s.c_str(), "rb"); cout << "file content" << endl; while (!feof(f)) { cout << fgetc(f); } cout << endl; } string uniFileName(string file) { FILE * check = fopen(file.c_str(), "rb"); if (check) { char c; cout << "the file " << file << " already exists! continue? [Y/n]:" << flush; c = cin.get(); if (c == 'n')exit(0); int p, q; p = file.find('('); q = file.rfind('.'); if (q == string::npos)q = file.size(); if (p == string::npos)p = q; string name = file.substr(0, p), suffix = file.substr(q, file.size()); int n = 0; while (true) { char s[3]; n += 1; snprintf(s, 3, "%d", n); file = (name + "(" + s + ")" + suffix); FILE* f = fopen(file.c_str(), "rb"); if (!f)break; else fclose(f); } } return file; } template void mapprint(map &f) { for (class map::iterator i = f.begin(); i != f.end(); ++i) cout << i->first << ") : " << i->second << endl; } template class node { public: ky key; wt val; bool visited; node * left, *right; node(const node &a) { val = a.val; key = a.key; visited = a.visited; left = a.left; right = a.right; } node(ky k = 0, wt v = 0): key(k), val(v), visited(false), left(NULL), right(NULL) {}; bool operator<(const node & a)const { return val > a.val; }; }; template class huffman { private: node root; string res; public: long total() { return root.val; } map encode_map; map decode_map; huffman(map& mp); void display(); string encode(string, long &); string decode(string, long&); void preOrder(node*, string); }; template huffman::huffman(map& mp) { if (mp.empty()) { cout << "Error! No data!" << endl; root = NULL; return ; } priority_queue > hp; for (typename map::iterator i = mp.begin(); i != mp.end(); ++i) { hp.push(node(i->first, i->second)); } int n = hp.size(); if (n == 1) { root = hp.top(); return; } while (--n >= 1) { node *a = new node(hp.top()); hp.pop(); node *b = new node(hp.top()); hp.pop(); node * tmp = new node(0, a->val + b->val); tmp->left = a, tmp->right = b; hp.push(*tmp); } root = hp.top(); preOrder(&root, string()); } template void huffman::preOrder(node* nd, string s) { if (nd->left == NULL) { encode_map[nd->key] = s; decode_map[s] = nd->key; delete nd; return ; } preOrder(nd->left, s + '0'); preOrder(nd->right, s + '1'); delete nd; } template string huffman::decode(string zipfile_name, long &charNum) { string uniFileName(string); FILE * src = fopen(zipfile_name.c_str(), "rb"); char file_name[nameLength]; fgets(file_name, nameLength, src); int ct = -1; while (file_name[++ct] != '\n'); int pos = zipfile_name.find('.'); if (pos == string::npos)pos = zipfile_name.size(); string name(zipfile_name.substr(0, pos)), suffix(file_name, file_name + ct), file(name + suffix); file = uniFileName(file); cout << "extracting compressed file :" << zipfile_name << endl; FILE * f = fopen(file.c_str(), "wb"); char t[numDigit]; fgets(t, numDigit, src); int sz = atoi(t); char code[sz]; fread(code, sz, 1, src); int idx = 0; for (int i = 0; i < sz; ++i) { if (code[i] == ' ') { decode_map[string(code + idx, code + i)] = code[++i]; idx = i + 1; } } for (int i = 0; i < starNum; ++i)cout << "@"; cout << endl; char c; long cur = charNum, gap = charNum / starNum; while (cur) { c = fgetc(src); if (!((--cur) % gap))cout << "@" << flush; for (int i = 0; i < 8; ++i) { if (c & (1 << i))res.append(1, '1'); else res.append(1, '0'); if (decode_map.count(res) != 0) { fputc(decode_map[res], f); res.clear(); } } } cout << endl; c = fgetc(src); int dgt = fgetc(src); cout << feof(f); if ((int)dgt != -1) { for (int i = 0; i < dgt; ++i) { if (c & (1 << i))res.append(1, '1'); else res.append(1, '0'); if (decode_map.count(res) != 0) { fputc(decode_map[res], f); res.clear(); break; } } } fclose(src); fclose(f); cout << "get " << file << " successfully" << endl; return file; } template string huffman::encode(string file_name, long &charNum) { charNum = 0; string uniFileName(string); int pos = file_name.rfind('.'); if (pos == string::npos)pos = file_name.size(); string zipfile = file_name.substr(0, pos) + string(".zzip"); zipfile = uniFileName(zipfile); cout << "generating zip file :" << zipfile << endl; FILE * dst = fopen(zipfile.c_str(), "wb"); FILE * f = fopen(file_name.c_str(), "rb"); fputs(file_name.substr(pos).c_str(), dst); fputc('\n', dst); string data; for (class map::iterator i = decode_map.begin(); i != decode_map.end() ; ++i) { data.append((i->first)); data.append(" "); data += (i->second); } int data_size = data.size(); // calculate the size of the code_data char sz[numDigit]; snprintf(sz, numDigit, "%d", data_size); int ct = 0; for (; sz[ct]; ++ct)fputc(sz[ct], dst); fputc('\n', dst); fwrite(data.c_str(), data_size, 1, dst); int sum = 0, digit = 0, num; string code8; for (int i = 0; i < starNum; ++i)cout << "@"; cout << endl; long gap = root.val / starNum, cur = 0; while (!feof(f)) { code8 = encode_map[fgetc(f)]; if (!((++cur) % gap))cout << "@"; for (int i = 0; i < code8.size(); ++i) { if (code8[i] == '1')sum += 1 << (digit); //mistake if(tmp[j]) ++digit; if (digit == 8) { ++charNum; fputc(sum, dst); digit = sum = 0; } } } cout << endl; if (digit != 0) { //mark fputc(sum, dst); fputc(digit, dst); } fclose(f); fclose(dst); cout << "compress " << file_name << " successfully" << endl; return zipfile; } template void huffman::display() { cout << "the encoding map,huffman codes are as bellow:" << endl; for (typename map::iterator i = encode_map.begin(); i != encode_map.end() ; ++i) cout << i->first << "(" << (int)i->first << "):" << i->second << endl; } bool handle_one(string file_name, vector &origin, vector &compressed) { int name_length = file_name.size(); FILE *src = fopen(file_name.c_str(), "rb"); cout << "opening " << file_name << "..." << endl; if (!src) { cout << "Path Error! Opening " << file_name << " Failed" << endl; origin.push_back(0); compressed.push_back(0); return false; } char cur; map mp; while (!feof(src)) { fread(&cur, sizeof(char), 1, src); if (mp.count(cur)) { mp[cur] += 1; } else mp[cur] = 1; } fclose(src); huffman hf(mp); long sz; string s(hf.encode(file_name, sz)); origin.push_back(hf.total()), compressed.push_back(sz); cout << "\ncontinue to uncompress? [Y/n]" << endl; char c = cin.get(); if (c == 'n')return true; hf.decode(s, sz); return true; } bool isSep(char c) { return c == ' ' || c == '\n' || c == '\t' || c == ','; } void splitToVec(char * s, vector& v) { int i = 0, last = 0; for (; s[i]; ++i) { if (isSep(s[i])) { v.push_back(string(s + last, s + i)); while (s[++i] && isSep(s[i])); last = i; } } if (s[last])v.push_back(string(s + last, s + i)); } bool lenStr(string &a, string &b) { return a.size() < b.size(); } void go(vector & names) { vector originSize, compressedSize; vector deltaTime; double last; vector indicator; bool bl; for (vector::iterator i = names.begin(); i != names.end(); ++i) { struct timeval tv; gettimeofday(&tv, NULL); last = tv.tv_sec; bl = handle_one(*i, originSize, compressedSize); indicator.push_back(bl); gettimeofday(&tv, NULL); deltaTime.push_back(tv.tv_sec - last); } cout << "\nDealt file number " << originSize.size() << fixed << setprecision(2) << endl; vector::iterator p = max_element(names.begin(), names.end(), lenStr); int len = p->size() + 2; for (int i = 0; i < names.size(); ++i) { if (! indicator[i]) { continue; } cout << names[i] << string(len - names[i].size(), ' '); cout << deltaTime[i] << "s " << compressedSize[i] / 1024.0 << "KB/" << originSize[i] / 1024.0 << "KB :"; cout << compressedSize[i] * 100.0 / originSize[i] << "%" << endl; } cout << endl; system("pause"); } int main(int argv, char ** argc) { char cwd[50]; cout << getcwd(cwd, 50) << endl; vector names; string file; if (argv > 1) { for (int i = 1; i < argv; ++i) { names.push_back(argc[i]); } go(names); names.clear(); } char mk; while (1) { char s[201]; cout << "Input file names separated by space " << endl; if (cin.peek() == '\n')names.push_back(file); else { cin.getline(s, 200); splitToVec(s, names); } cout << endl; go(names); cout << "Continue? [Y/n]:" << flush; mk = cin.get(); if (mk == 'n')break; names.clear(); } return 0; }