diff --git a/cuckoo.cpp b/cuckoo.cpp deleted file mode 100644 index ac6630d..0000000 --- a/cuckoo.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * cuckoo.cpp - * - * Created on: Oct 7, 2014 - * Author: mzohner - */ - -#include "cuckoo.h" - -//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements -#ifndef TEST_UTILIZATION -uint8_t* -#else -uint32_t -#endif -cuckoo_hashing(uint8_t* elements, uint32_t neles, uint32_t nbins, uint32_t bitlen, uint32_t *outbitlen, uint32_t* nelesinbin, - uint32_t* perm, uint32_t ntasks, prf_state_ctx* prf_state) -{ - //The resulting hash table - uint8_t* hash_table; -#ifdef DOUBLE_TABLE - cuckoo_entry_ctx*** cuckoo_table; -#else - cuckoo_entry_ctx** cuckoo_table; -#endif - cuckoo_entry_ctx* cuckoo_entries; - uint32_t i, j; - uint32_t *perm_ptr; - pthread_t* entry_gen_tasks; - cuckoo_entry_gen_ctx* ctx; - hs_t hs; - -#ifdef COUNT_FAILS - uint32_t fails = 0; -#endif - - - init_hashing_state(&hs, neles, bitlen, nbins, prf_state); - *outbitlen = hs.outbitlen; -#ifdef DOUBLE_TABLE - cuckoo_table = (cuckoo_entry_ctx***) calloc(2, sizeof(cuckoo_entry_ctx**)); - cuckoo_table[0] = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); - cuckoo_table[1] = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); -#else - cuckoo_table = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); -#endif - cuckoo_entries = (cuckoo_entry_ctx*) malloc(neles * sizeof(cuckoo_entry_ctx)); - entry_gen_tasks = (pthread_t*) malloc(sizeof(pthread_t) * ntasks); - ctx = (cuckoo_entry_gen_ctx*) malloc(sizeof(cuckoo_entry_gen_ctx) * ntasks); - -#ifndef TEST_UTILIZATION - for(i = 0; i < ntasks; i++) { - ctx[i].elements = elements; - ctx[i].cuckoo_entries = cuckoo_entries; - ctx[i].hs = &hs; - ctx[i].startpos = i * ceil_divide(neles, ntasks); - ctx[i].endpos = min(ctx[i].startpos + ceil_divide(neles, ntasks), neles); - //cout << "Thread " << i << " starting from " << ctx[i].startpos << " going to " << ctx[i].endpos << " for " << neles << " elements" << endl; - if(pthread_create(entry_gen_tasks+i, NULL, gen_cuckoo_entries, (void*) (ctx+i))) { - cerr << "Error in creating new pthread at cuckoo hashing!" << endl; - exit(0); - } - } - - for(i = 0; i < ntasks; i++) { - if(pthread_join(entry_gen_tasks[i], NULL)) { - cerr << "Error in joining pthread at cuckoo hashing!" << endl; - exit(0); - } - } -#else - ctx[0].elements = elements; - ctx[0].cuckoo_entries = cuckoo_entries; - ctx[0].hs = &hs; - ctx[0].startpos = 0; - ctx[0].endpos = neles; - gen_cuckoo_entries(ctx); -#endif - //for(i = 0; i < nbins; i++) { - // cout << "Address " << i << " mapped to " << hs.address_used[i] << " times" << endl; - //} - //insert all elements into the cuckoo hash table - for(i = 0; i < neles; i++) { - if(!(insert_element(cuckoo_table, cuckoo_entries + i, neles))) { -#ifdef COUNT_FAILS - fails++; - /*cout << "insertion failed for element " << (hex) << (*(((uint32_t*) elements)+i)) << ", inserting to address: "; - for(uint32_t j = 0; j < NUM_HASH_FUNCTIONS; j++) { - cout << (cuckoo_entries + i)->address[j] << ", "; - } - cout << (dec) << endl;*/ -#else - cerr << "Insertion not successful for element " <val, hs.outbytelen); - //cout << "copying value: " << (hex) << (unsigned int) cuckoo_table[i]->val[cuckoo_table[i]->pos][0] << (dec) << endl; - *perm_ptr = cuckoo_table[i]->eleid; - perm_ptr++; - nelesinbin[i] = 1; - } else { - memset(hash_table + i * hs.outbytelen, DUMMY_ENTRY_CLIENT, hs.outbytelen); - nelesinbin[i] = 0; - } - } - -#endif - -#ifndef TEST_UTILIZATION - - //Cleanup - for(i = 0; i < neles; i++) { - free(cuckoo_entries[i].val); - } -#endif - free(cuckoo_entries); -#ifdef DOUBLE_TABLE - free(cuckoo_table[0]); - free(cuckoo_table[1]); -#else - free(cuckoo_table); -#endif - free(entry_gen_tasks); - free(ctx); - - free_hashing_state(&hs); - -#ifdef TEST_UTILIZATION - return fails; -#else - return hash_table; -#endif -} - - -void *gen_cuckoo_entries(void *ctx_void) { - cuckoo_entry_gen_ctx* ctx = (cuckoo_entry_gen_ctx*) ctx_void; - hs_t* hs = ctx->hs; - uint32_t i, inbytelen = ceil_divide(hs->inbitlen, 8); - uint8_t* eleptr = ctx->elements + inbytelen * ctx->startpos; - - - //generate the cuckoo entries for all elements - for(i = ctx->startpos; i < ctx->endpos; i++, eleptr+=inbytelen) { - gen_cuckoo_entry(eleptr, ctx->cuckoo_entries + i, hs, i); - } -} - - -inline void gen_cuckoo_entry(uint8_t* in, cuckoo_entry_ctx* out, hs_t* hs, uint32_t ele_id) { - uint32_t i; - - out->pos = 0; - out->eleid = ele_id; - -#ifndef TEST_UTILIZATION - out->val = (uint8_t*) calloc(hs->outbytelen, sizeof(uint8_t)); -#endif - hashElement(in, out->address, out->val, hs); -} - -#ifdef DOUBLE_TABLE -inline bool insert_element(cuckoo_entry_ctx*** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations) { -#else -inline bool insert_element(cuckoo_entry_ctx** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations) { -#endif - cuckoo_entry_ctx *evicted, *tmp_evicted; - uint32_t i, ev_pos, iter_cnt; -#ifdef DEBUG_CUCKOO - cout << "iter_cnt = " << iter_cnt << " for element " << (hex) << (*((uint32_t*) element->element)) << (dec) << ", inserting to address: " - << element->address[element->pos] << " or " << element->address[element->pos^1] << endl; -#endif - - for(iter_cnt = 0, evicted = element; iter_cnt < max_iterations; iter_cnt++) { - //TODO: assert(addr < MAX_TAB_ENTRIES) - for(i = 0; i < NUM_HASH_FUNCTIONS; i++) {//, ele_pos=(ele_pos+1)%NUM_HASH_FUNCTIONS) { -#ifdef DOUBLE_TABLE - if(ctable[i][evicted->address[i]] == NULL) { - ctable[i][evicted->address[i]] = evicted; - evicted->pos = i; - return true; - } -#else - if(ctable[evicted->address[i]] == NULL) { - ctable[evicted->address[i]] = evicted; - evicted->pos = i; -#ifdef TEST_CHAINLEN - chain_cnt[iter_cnt]++; -#endif - return true; - } -#endif - } - - //choose random bin to evict other element -#if NUM_HASH_FUNCTIONS == 2 - ev_pos = evicted->address[evicted->pos% NUM_HASH_FUNCTIONS]; -#else - ev_pos = evicted->address[(evicted->pos^iter_cnt) % NUM_HASH_FUNCTIONS]; -#endif -#ifdef DOUBLE_TABLE - tmp_evicted = ctable[evicted->pos][ev_pos]; - ctable[evicted->pos][ev_pos] = evicted; -#else - tmp_evicted = ctable[ev_pos]; - ctable[ev_pos] = evicted; -#endif - evicted = tmp_evicted; - - //change position - if the number of HF's is increased beyond 2 this should be replaced by a different strategy - evicted->pos = (evicted->pos+1) % NUM_HASH_FUNCTIONS; - } - - //the highest number of iterations has been reached - return false; -} - -#ifdef TEST_CHAINLEN -void print_chain_cnt() { - //cout << "Chain Count: " << endl; - for(uint32_t i = 0; i < MAX_ITERATIONS; i++) { - //if(chain_cnt[i] > 0) - cout << i << "\t" << chain_cnt[i] << endl; - } -} -#endif diff --git a/cuckoo.h b/cuckoo.h deleted file mode 100644 index 54cb930..0000000 --- a/cuckoo.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * cuckoo.h - * - * Created on: Oct 7, 2014 - * Author: mzohner - */ - -#ifndef CUCKOO_H_ -#define CUCKOO_H_ - -#include "hashing_util.h" - -#define MAX_ITERATIONS 1024 -//#define DEBUG_CUCKOO -#ifdef TEST_UTILIZATION -#define COUNT_FAILS -#endif -//#define DOUBLE_TABLE -//#define TEST_CHAINLEN - -#ifdef TEST_CHAINLEN -static uint64_t chain_cnt[MAX_ITERATIONS]; -void print_chain_cnt(); -#endif - -struct cuckoo_entry_ctx { - //id of the element in the source set - uint32_t eleid; - //addresses the bin of the cuckoo entry in the cuckoo table, will only work for up to 2^{32} bins - uint32_t address[NUM_HASH_FUNCTIONS]; - //the value of the entry - uint8_t* val; - //which position is the entry currently mapped to - uint32_t pos; -#ifdef DEBUG_CUCKOO - uint8_t* element; -#endif -}; - - - -struct cuckoo_entry_gen_ctx { - //starting position in the generation process - uint32_t startpos; - //end position of entries that are generated by this thread - uint32_t endpos; - //input elements - uint8_t* elements; - //pointer to the cuckoo entries - cuckoo_entry_ctx* cuckoo_entries; - hs_t* hs; -}; - - -//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements -#ifndef TEST_UTILIZATION -uint8_t* -#else -uint32_t -#endif -cuckoo_hashing(uint8_t* elements, uint32_t neles, uint32_t nbins, uint32_t bitlen, uint32_t* outbitlen, uint32_t* nelesinbin, - uint32_t* perm, uint32_t ntasks, prf_state_ctx* prf_state); -//routine for generating the entries, is invoked by the threads -void *gen_cuckoo_entries(void *ctx); -inline void gen_cuckoo_entry(uint8_t* in, cuckoo_entry_ctx* out, hs_t* hs, uint32_t ele_id); -#ifdef DOUBLE_TABLE -inline bool insert_element(cuckoo_entry_ctx*** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations); -#else -inline bool insert_element(cuckoo_entry_ctx** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations); -#endif - - - -#endif /* CUCKOO_H_ */ diff --git a/hashing_util.h b/hashing_util.h deleted file mode 100644 index fbd5c99..0000000 --- a/hashing_util.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * hashing_util.h - * - * Created on: Oct 8, 2014 - * Author: mzohner - */ - -#ifndef HASHING_UTIL_H_ -#define HASHING_UTIL_H_ - -#include "../hashing_includes.h" - - -typedef uint16_t TABLEID_T; - -//#define TEST_UTILIZATION -#ifdef TEST_UTILIZATIN -#define NUM_HASH_FUNCTIONS 2 -#else -#define NUM_HASH_FUNCTIONS 3 -#endif -#define MAX_TABLE_SIZE_BYTES sizeof(TABLEID_T) -#define DUMMY_ENTRY_SERVER 0x00 -#define DUMMY_ENTRY_CLIENT 0xFF - -#define USE_LUBY_RACKOFF - -typedef struct hashing_state_ctx { - uint32_t** hf_values[NUM_HASH_FUNCTIONS]; - uint32_t nhfvals; - uint32_t nelements; - uint32_t nbins; - uint32_t inbitlen; - uint32_t addrbitlen; - uint32_t outbitlen; - //the byte values, are stored separately since they are needed very often - uint32_t inbytelen; - uint32_t addrbytelen; - uint32_t outbytelen; - uint32_t* address_used; - uint32_t mask; -} hs_t; - -//TODO: generate these randomly for each execution and communicate them between the parties -static const uint32_t HF_MASKS[3] = {0x00000000, 0x33333333, 0x14894568}; - -//use as mask to address the bits in a uint32_t vector -static const uint32_t SELECT_BITS[33] = \ - {0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, \ - 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF, \ - 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, \ - 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, \ - 0xFFFFFFFF }; - -//can also be computed as SELECT_BITS ^ 0xFFFFFFFF -static const uint32_t SELECT_BITS_INV[33] = \ - {0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFC, 0xFFFFFFF8, 0xFFFFFFF0, 0xFFFFFFE0, 0xFFFFFFC0, 0xFFFFFF80, \ - 0xFFFFFF00, 0xFFFFFE00, 0xFFFFFC00, 0xFFFFF800, 0xFFFFF000, 0xFFFFE000, 0xFFFFC000, 0xFFFF8000, \ - 0xFFFF0000, 0xFFFE0000, 0xFFFC0000, 0xFFF80000, 0xFFF00000, 0xFFE00000, 0xFFC00000, 0xFF800000, \ - 0xFF000000, 0xFE000000, 0xFC000000, 0xF8000000, 0xF0000000, 0xE0000000, 0xC0000000, 0x80000000, \ - 0x00000000 }; - -static const uint8_t BYTE_SELECT_BITS_INV[8] = {0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}; - -//Init the values for the hash function -static void init_hashing_state(hs_t* hs, uint32_t nelements, uint32_t inbitlen, uint32_t nbins, - prf_state_ctx* prf_state) { - uint32_t i, j, nrndbytes; - hs->nelements = nelements; - hs->nbins = nbins; - - hs->inbitlen = inbitlen; - hs->addrbitlen = min((uint32_t) ceil_log2(nbins), inbitlen); - -#ifdef USE_LUBY_RACKOFF - hs->outbitlen = hs->inbitlen - hs->addrbitlen+1; -#else - hs->outbitlen = inbitlen; -#endif - //TODO prevent too much memory utilization - //assert(hs->outbitlen < 32); - //TODO: quickfix to enable hashing for large values - //hs->outbitlen = min((double) hs->outbitlen, (double) 24); - - hs->inbytelen = ceil_divide(hs->inbitlen, 8); - hs->addrbytelen = ceil_divide(hs->addrbitlen, 8); - hs->outbytelen = ceil_divide(hs->outbitlen, 8); - - hs->nhfvals = ceil_divide(hs->outbytelen, MAX_TABLE_SIZE_BYTES); - - - nrndbytes = (1<<(8*MAX_TABLE_SIZE_BYTES)) * sizeof(uint32_t); - - //cout << " random bytes: " << nrndbytes << endl; - //cout << "inbitlen = " << hs->inbitlen << ", outbitlen = " << hs->outbitlen << ", addrbitlen = " << hs->addrbitlen << - // ", nhfvals = " << hs->nhfvals << ", nrndbytes = " << nrndbytes << endl; - - for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { - hs->hf_values[i] = (uint32_t**) malloc(sizeof(uint32_t*) * hs->nhfvals); - - for(j = 0; j < hs->nhfvals; j++) { - hs->hf_values[i][j] = (uint32_t*) malloc(nrndbytes); - assert(hs->hf_values[i][j]); - gen_rnd_bytes(prf_state, (uint8_t*) hs->hf_values[i][j], nrndbytes); - } - } - //cout << "nhfvals = " << hs->nhfvals << endl; - hs->address_used = (uint32_t*) calloc(nbins, sizeof(uint32_t)); - hs->mask = 0xFFFFFFFF; - if(hs->inbytelen < sizeof(uint32_t)) { - hs->mask >>= (sizeof(uint32_t) * 8 - hs->inbitlen - hs->addrbitlen); - } -} - -static void free_hashing_state(hs_t* hs) { - uint32_t i, j; - for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { - for(j = 0; j < hs->nhfvals; j++) { - free(hs->hf_values[i][j]); - } - free(hs->hf_values[i]); - } - free(hs->address_used); - //free(hs->hf_values); -} - -//reduce the bit-length of the elements if some bits are used to determine the bin and a permutation is used for hashing -//static uint32_t getOutBitLen(uint32_t inbitlen, uint32_t nelements) { -// return inbitlen - ceil_log2(nelements); -//}; - -//TODO: a generic place holder, can be replaced by any other hash function -//inline void hashElement(uint8_t* element, uint32_t* address, uint8_t* val, uint32_t hfid, hs_t* hs) { -inline void hashElement(uint8_t* element, uint32_t* address, uint8_t* val, hs_t* hs) { - -#ifdef USE_LUBY_RACKOFF - //TODO: the table-lookup hashing is only used for elements up to 32-bit length, since it gets very inefficient for larger values - uint64_t i, j, L, R; - TABLEID_T hfmaskaddr; - //Store the first hs->addrbitlen bits in L - L = *((uint32_t*) element) & SELECT_BITS[hs->addrbitlen]; - //Store the remaining hs->outbitlen bits in R and pad correspondingly - R = (*((uint32_t*) element) & SELECT_BITS_INV[hs->addrbitlen]) >> (hs->addrbitlen); - - R &= hs->mask;//mask = (1<<32-hs->addrbitlen) - - - //assert(R < (1<outbitlen)); - //cout << "R = " << R << endl; - /*if(hfid == 0) { - *address = L % hs->nbins; - *((uint32_t*) val) = R; - } else if(hfid == 1) { - *address = R % hs->nbins; - *((uint32_t*) val) = L; - } else { - *address = (L ^ R) % hs->nbins; - *((uint32_t*) val) = R; - }*/ - hfmaskaddr = R * sizeof(uint32_t); - //cout << "L = " << L << ", R = " << R << " addresses: "; - - for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { - //cout << "i = " << i << ", addrbytelen = " << hs->addrbytelen << ", R = " << R << ", nbins = " << - // hs->nbins << ", L = " << L << ", addr= " << endl; - //address[i] = (L ^ *(((uint32_t*) &(hs->hf_values[i][R*hs->addrbytelen])))) % hs->nbins; - for(j = 0; j < hs->nhfvals; j++) { - //assert(hfmaskaddr < (1<<(8*MAX_TABLE_SIZE_BYTES)) * hs->addrbytelen); - //cout << "i = " << i << ", j = " << j << ", Hfmaskaddr = " << hfmaskaddr << endl; - //cout << "Hfvalue: " << hs->hf_values[i][j][hfmaskaddr] << endl; - address[i] = (L ^ *((hs->hf_values[i][j]+hfmaskaddr))) % hs->nbins; - //address[i] = (L ^ (i * R)) % hs->nbins; - } - //cout << address[i] << ", "; - //hs->address_used[address[i]]++; - } - //cout << endl; -#ifndef TEST_UTILIZATION - *((uint32_t*) val) = R; - //TODO copy remaining bits - - //if(hs->outbytelen >= sizeof(uint32_t)) - if(hs->inbitlen > sizeof(uint32_t) * 8) { - //memcpy(val + (sizeof(uint32_t) - hs->addrbytelen), element + sizeof(uint32_t), hs->outbytelen - (sizeof(uint32_t) - hs->addrbytelen)); - memcpy(val + (sizeof(uint32_t) - (hs->addrbitlen >>3)), element + sizeof(uint32_t), hs->outbytelen - (sizeof(uint32_t) - (hs->addrbitlen >>3))); - - //cout << "Element: "<< (hex) << (uint32_t) val[hs->outbytelen-1] << ", " << (uint32_t) (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]) - // << ", " << (uint32_t) (val[hs->outbytelen-1] & (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]) )<< (dec) << " :"; - - val[hs->outbytelen-1] &= (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]); - - /*for(i = 0; i < hs->inbytelen; i++) { - cout << (hex) << (uint32_t) element[i]; - } - cout << ", "; - for(i = 0; i < hs->outbytelen; i++) { - cout << (hex) << (uint32_t) val[i]; - } - cout << (dec) << endl;*/ - } - - -#endif - //cout << "Address for hfid = " << hfid << ": " << *address << ", L = " << L << ", R = " << R << endl; - -#else - for(uint64_t i = 0; i < NUM_HASH_FUNCTIONS; i++) { - address[i] = ((*((uint32_t*) element+i) ^ HF_MASKS[i]) & SELECT_BITS[hs->addrbitlen]) % hs->nbins; - - #ifndef TEST_UTILIZATION - *((uint32_t*) val) = (*((uint32_t*) element) & SELECT_BITS_INV[hs->addrbitlen]) >> (hs->addrbitlen); - - //copy the remaining full bytes - if(hs->outbytelen >= sizeof(uint32_t)) - memcpy(val + (sizeof(uint32_t) - hs->addrbytelen), element + sizeof(uint32_t), hs->outbytelen - sizeof(uint32_t)); - #endif - } -#endif -} - -inline void domain_hashing(uint32_t nelements, uint8_t* elements, uint32_t elebytelen, uint8_t* result, - uint32_t resultbytelen, crypto* crypt) { - - uint8_t *eleptr, *resultptr, *hash_buf; - uint32_t i; - - eleptr=elements; - resultptr = result; -#ifndef BATCH - cout << "Hashing " << nelements << " elements from " << elebytelen << " bytes into " << resultbytelen << " bytes" << endl; -#endif - hash_buf = (uint8_t*) calloc(crypt->get_hash_bytes(), sizeof(uint8_t)); - for(i = 0; i < nelements; i++, resultptr+=resultbytelen, eleptr+=elebytelen) { - memcpy(hash_buf, eleptr, elebytelen); - crypt->hash(resultptr, resultbytelen, hash_buf, elebytelen); - } - free(hash_buf); -} - -inline void domain_hashing(uint32_t nelements, uint8_t** elements, uint32_t* elebytelens, uint8_t* result, - uint32_t resultbytelen, crypto* crypt) { - uint8_t *resultptr;//, *hash_buf; - uint32_t i; - - //eleptr=elements; - resultptr = result; -#ifndef BATCH - cout << "Hashing " << nelements << " elements from " << elebytelens << " bytes into " << resultbytelen << " bytes" << endl; -#endif - //hash_buf = (uint8_t*) calloc(crypt->get_hash_bytes(), sizeof(uint8_t)); - for(i = 0; i < nelements; i++, resultptr+=resultbytelen) { - //memcpy(hash_buf, elements[i], elebytelens[i]); - crypt->hash(resultptr, resultbytelen, elements[i], elebytelens[i]); - } - //free(hash_buf); -} - -#endif /* HASHING_UTIL_H_ */ diff --git a/simple_hashing.cpp b/simple_hashing.cpp deleted file mode 100644 index 55ce6c6..0000000 --- a/simple_hashing.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * simple_hashing.cpp - * - * Created on: Oct 8, 2014 - * Author: mzohner - */ - -#include "simple_hashing.h" - -uint8_t* simple_hashing(uint8_t* elements, uint32_t neles, uint32_t bitlen, uint32_t *outbitlen, uint32_t* nelesinbin, uint32_t nbins, - uint32_t ntasks, prf_state_ctx* prf_state) { - sht_ctx* table; - //uint8_t** bin_content; - uint8_t *eleptr, *bin_ptr, *result, *res_bins; - uint32_t i, j, tmpneles; - sheg_ctx* ctx; - pthread_t* entry_gen_tasks; - hs_t hs; - - init_hashing_state(&hs, neles, bitlen, nbins, prf_state); - //Set the output bit-length of the hashed elements - *outbitlen = hs.outbitlen; - - entry_gen_tasks = (pthread_t*) malloc(sizeof(pthread_t) * ntasks); - ctx = (sheg_ctx*) malloc(sizeof(sheg_ctx) * ntasks); - table = (sht_ctx*) malloc(sizeof(sht_ctx) * ntasks); - - - - for(i = 0; i < ntasks; i++) { - init_hash_table(table + i, ceil_divide(neles, ntasks), &hs); - } - - //for(i = 0; i < nbins; i++) - // pthread_mutex_init(locks+i, NULL); - - //tmpbuf = (uint8_t*) malloc(table->outbytelen); - - for(i = 0; i < ntasks; i++) { - ctx[i].elements = elements; - ctx[i].table = table + i; - ctx[i].startpos = i * ceil_divide(neles, ntasks); - ctx[i].endpos = min(ctx[i].startpos + ceil_divide(neles, ntasks), neles); - ctx[i].hs = &hs; - - //cout << "Thread " << i << " starting from " << ctx[i].startpos << " going to " << ctx[i].endpos << " for " << neles << " elements" << endl; - if(pthread_create(entry_gen_tasks+i, NULL, gen_entries, (void*) (ctx+i))) { - cerr << "Error in creating new pthread at simple hashing!" << endl; - exit(0); - } - } - - for(i = 0; i < ntasks; i++) { - if(pthread_join(entry_gen_tasks[i], NULL)) { - cerr << "Error in joining pthread at simple hashing!" << endl; - exit(0); - } - } - - //for(i = 0, eleptr=elements; i < neles; i++, eleptr+=inbytelen) { - // insert_element(table, eleptr, tmpbuf); - //} - - //malloc and copy simple hash table into hash table - //bin_content = (uint8_t**) malloc(sizeof(uint8_t*) * nbins); - //*nelesinbin = (uint32_t*) malloc(sizeof(uint32_t) * nbins); - - res_bins = (uint8_t*) malloc(neles * NUM_HASH_FUNCTIONS * hs.outbytelen); - bin_ptr = res_bins; - - - for(i = 0; i < hs.nbins; i++) { - nelesinbin[i] = 0; - for(j = 0; j < ntasks; j++) { - tmpneles = (table +j)->bins[i].nvals; - nelesinbin[i] += tmpneles; - //bin_content[i] = (uint8_t*) malloc(nelesinbin[i] * table->outbytelen); - memcpy(bin_ptr, (table + j)->bins[i].values, tmpneles * hs.outbytelen); - bin_ptr += (tmpneles * hs.outbytelen); - } - //right now only the number of elements in each bin is copied instead of the max bin size - } - - for(j = 0; j < ntasks; j++) - free_hash_table(table + j); - free(table); - free(entry_gen_tasks); - free(ctx); - - //for(i = 0; i < nbins; i++) - // pthread_mutex_destroy(locks+i); - //free(locks); - - free_hashing_state(&hs); - - return res_bins; -} - -void *gen_entries(void *ctx_tmp) { - //Insert elements in parallel, use lock to communicate - uint8_t *tmpbuf, *eleptr; - sheg_ctx* ctx = (sheg_ctx*) ctx_tmp; - uint32_t i, inbytelen, *address; - - address = (uint32_t*) malloc(NUM_HASH_FUNCTIONS * sizeof(uint32_t)); - tmpbuf = (uint8_t*) calloc(ceil_divide(ctx->hs->outbitlen, 8), sizeof(uint8_t)); //for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { - // tmpbuf[i] = (uint8_t*) malloc(ceil_divide(ctx->hs->outbitlen, 8)); - //} - - for(i = ctx->startpos, eleptr=ctx->elements, inbytelen=ctx->hs->inbytelen; i < ctx->endpos; i++, eleptr+=inbytelen) { - insert_element(ctx->table, eleptr, address, tmpbuf, ctx->hs); - } - free(tmpbuf); - free(address); -} - -inline void insert_element(sht_ctx* table, uint8_t* element, uint32_t* address, uint8_t* tmpbuf, hs_t* hs) { - uint32_t i, j; - bin_ctx* tmp_bin; - - hashElement(element, address, tmpbuf, hs); - - for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { - - tmp_bin=table->bins + address[i]; - //pthread_mutex_lock(locks + address[i]); - memcpy(tmp_bin->values + tmp_bin->nvals * hs->outbytelen, tmpbuf, hs->outbytelen); - for(j = 0; j < i; j++) { - if(address[i] == address[j]) { - memset(tmp_bin->values + tmp_bin->nvals * hs->outbytelen, DUMMY_ENTRY_SERVER, hs->outbytelen); - } - } - tmp_bin->nvals++; - //TODO: or simply allocate a bigger block of memory: table->maxbinsize * 2, left out for efficiency reasons - if(tmp_bin->nvals == table->maxbinsize) { - increase_max_bin_size(table, hs->outbytelen); - } - //assert(tmp_bin->nvals < table->maxbinsize); - /*cout << "Inserted into bin: " << address << ": " << (hex); - for(uint32_t j = 0; j < table->outbytelen; j++) { - cout << (unsigned int) tmpbuf[j]; - } - cout << (dec) << endl;*/ - //pthread_mutex_unlock(locks + address[i]); - } -} - -void init_hash_table(sht_ctx* table, uint32_t nelements, hs_t* hs) { - uint32_t i; - - //table->addrbitlen = ceil_log2(nbins); - //table->addrbytelen = ceil_divide(table->addrbitlen, 8); - //table->inbytelen = ceil_divide(inbitlen, 8); - if(ceil_divide(nelements, hs->nbins) < 3) - table->maxbinsize = 3*max(ceil_log2(nelements),3); - else - table->maxbinsize = 6*max((int) ceil_divide(nelements, hs->nbins), 3); - //cout << "maxbinsize = " << table->maxbinsize << endl; - //table->outbytelen = ceil_divide(getOutBitLen(inbitlen, nbins), 8); - table->nbins = hs->nbins; - - table->bins = (bin_ctx*) calloc(hs->nbins, sizeof(bin_ctx)); - - for(i = 0; i < hs->nbins; i++) { - table->bins[i].values = (uint8_t*) malloc(table->maxbinsize * hs->outbytelen); - } -} - -void free_hash_table(sht_ctx* table) { - uint32_t i; - //1. free the byte-pointers for the values in the bints - for(i = 0; i < table->nbins; i++) { - //if(table->bins[i].nvals > 0) - free(table->bins[i].values); - } - //2. free the bins - free(table->bins); - //3. free the actual table - //free(table); -} - -void increase_max_bin_size(sht_ctx* table, uint32_t valbytelen) { - uint32_t new_maxsize = table->maxbinsize * 2; - uint8_t* tmpvals; - for(uint32_t i = 0; i < table->nbins; i++) { - tmpvals = table->bins[i].values; - table->bins[i].values = (uint8_t*) malloc(new_maxsize * valbytelen); - memcpy(table->bins[i].values, tmpvals, table->bins[i].nvals * valbytelen); - free(tmpvals); - } - table->maxbinsize = new_maxsize; -} diff --git a/simple_hashing.h b/simple_hashing.h deleted file mode 100644 index d06e5d7..0000000 --- a/simple_hashing.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * simle_hashing.h - * - * Created on: Oct 8, 2014 - * Author: mzohner - */ - -#ifndef SIMLE_HASHING_H_ -#define SIMLE_HASHING_H_ - -#include "hashing_util.h" - -struct bin_ctx { - //hash-values of all elements mapped to this bin - uint8_t* values; - //number of elements stored in this bin - uint32_t nvals; -}; - -typedef struct simple_hash_table_ctx { - //pointer to the bins in the hash table - bin_ctx* bins; - //number bins in the hash table - uint32_t nbins; - //max bin size - uint32_t maxbinsize; - //uint32_t addrbitlen; - //uint32_t addrbytelen; - //uint32_t inbytelen; - //uint32_t outbytelen; -} sht_ctx; - -typedef struct simple_hash_entry_gen_ctx { - sht_ctx* table; - //input elements - uint8_t* elements; - uint32_t startpos; - uint32_t endpos; - //uint32_t inbytelen; - hs_t* hs; -} sheg_ctx; - - - -//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements -uint8_t* simple_hashing(uint8_t* elements, uint32_t neles, uint32_t bitlen, uint32_t* outbitlen, uint32_t* nelesinbin, uint32_t nbins, - uint32_t ntasks, prf_state_ctx* prf_state); -//routine for generating the entries, is invoked by the threads -void *gen_entries(void *ctx); -void init_hash_table(sht_ctx* table, uint32_t nelements, hs_t* hs); -void increase_max_bin_size(sht_ctx* table, uint32_t valbytelen); -void free_hash_table(sht_ctx* table); -inline void insert_element(sht_ctx* table, uint8_t* element, uint32_t* address, uint8_t* tmpbuf, hs_t* hs); - -#endif /* SIMLE_HASHING_H_ */