diff --git a/Makefile b/Makefile index 24855c7..0951e70 100644 --- a/Makefile +++ b/Makefile @@ -35,8 +35,8 @@ SOURCES_OT=${SRC}/util/ot/*.cpp OBJECTS_OT=${SRC}/util/ot/*.o SOURCES_CRYPTO=${SRC}/util/crypto/*.cpp OBJECTS_CRYPTO=${SRC}/util/crypto/*.o -SOURCES_HASHING=${EXT}/hashing/*.cpp -OBJECTS_HASHING=${EXT}/hashing/*.o +SOURCES_HASHING=${SRC}/hashing/*.cpp +OBJECTS_HASHING=${SRC}/hashing/*.o # naive hashing-based solution SOURCES_NAIVE=${SRC}/naive-hashing/*.cpp OBJECTS_NAIVE=${SRC}/naive-hashing/*.o diff --git a/src/hashing/cuckoo.cpp b/src/hashing/cuckoo.cpp new file mode 100644 index 0000000..ac6630d --- /dev/null +++ b/src/hashing/cuckoo.cpp @@ -0,0 +1,237 @@ +/* + * cuckoo.cpp + * + * Created on: Oct 7, 2014 + * Author: mzohner + */ + +#include "cuckoo.h" + +//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements +#ifndef TEST_UTILIZATION +uint8_t* +#else +uint32_t +#endif +cuckoo_hashing(uint8_t* elements, uint32_t neles, uint32_t nbins, uint32_t bitlen, uint32_t *outbitlen, uint32_t* nelesinbin, + uint32_t* perm, uint32_t ntasks, prf_state_ctx* prf_state) +{ + //The resulting hash table + uint8_t* hash_table; +#ifdef DOUBLE_TABLE + cuckoo_entry_ctx*** cuckoo_table; +#else + cuckoo_entry_ctx** cuckoo_table; +#endif + cuckoo_entry_ctx* cuckoo_entries; + uint32_t i, j; + uint32_t *perm_ptr; + pthread_t* entry_gen_tasks; + cuckoo_entry_gen_ctx* ctx; + hs_t hs; + +#ifdef COUNT_FAILS + uint32_t fails = 0; +#endif + + + init_hashing_state(&hs, neles, bitlen, nbins, prf_state); + *outbitlen = hs.outbitlen; +#ifdef DOUBLE_TABLE + cuckoo_table = (cuckoo_entry_ctx***) calloc(2, sizeof(cuckoo_entry_ctx**)); + cuckoo_table[0] = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); + cuckoo_table[1] = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); +#else + cuckoo_table = (cuckoo_entry_ctx**) calloc(nbins, sizeof(cuckoo_entry_ctx*)); +#endif + cuckoo_entries = (cuckoo_entry_ctx*) malloc(neles * sizeof(cuckoo_entry_ctx)); + entry_gen_tasks = (pthread_t*) malloc(sizeof(pthread_t) * ntasks); + ctx = (cuckoo_entry_gen_ctx*) malloc(sizeof(cuckoo_entry_gen_ctx) * ntasks); + +#ifndef TEST_UTILIZATION + for(i = 0; i < ntasks; i++) { + ctx[i].elements = elements; + ctx[i].cuckoo_entries = cuckoo_entries; + ctx[i].hs = &hs; + ctx[i].startpos = i * ceil_divide(neles, ntasks); + ctx[i].endpos = min(ctx[i].startpos + ceil_divide(neles, ntasks), neles); + //cout << "Thread " << i << " starting from " << ctx[i].startpos << " going to " << ctx[i].endpos << " for " << neles << " elements" << endl; + if(pthread_create(entry_gen_tasks+i, NULL, gen_cuckoo_entries, (void*) (ctx+i))) { + cerr << "Error in creating new pthread at cuckoo hashing!" << endl; + exit(0); + } + } + + for(i = 0; i < ntasks; i++) { + if(pthread_join(entry_gen_tasks[i], NULL)) { + cerr << "Error in joining pthread at cuckoo hashing!" << endl; + exit(0); + } + } +#else + ctx[0].elements = elements; + ctx[0].cuckoo_entries = cuckoo_entries; + ctx[0].hs = &hs; + ctx[0].startpos = 0; + ctx[0].endpos = neles; + gen_cuckoo_entries(ctx); +#endif + //for(i = 0; i < nbins; i++) { + // cout << "Address " << i << " mapped to " << hs.address_used[i] << " times" << endl; + //} + //insert all elements into the cuckoo hash table + for(i = 0; i < neles; i++) { + if(!(insert_element(cuckoo_table, cuckoo_entries + i, neles))) { +#ifdef COUNT_FAILS + fails++; + /*cout << "insertion failed for element " << (hex) << (*(((uint32_t*) elements)+i)) << ", inserting to address: "; + for(uint32_t j = 0; j < NUM_HASH_FUNCTIONS; j++) { + cout << (cuckoo_entries + i)->address[j] << ", "; + } + cout << (dec) << endl;*/ +#else + cerr << "Insertion not successful for element " <val, hs.outbytelen); + //cout << "copying value: " << (hex) << (unsigned int) cuckoo_table[i]->val[cuckoo_table[i]->pos][0] << (dec) << endl; + *perm_ptr = cuckoo_table[i]->eleid; + perm_ptr++; + nelesinbin[i] = 1; + } else { + memset(hash_table + i * hs.outbytelen, DUMMY_ENTRY_CLIENT, hs.outbytelen); + nelesinbin[i] = 0; + } + } + +#endif + +#ifndef TEST_UTILIZATION + + //Cleanup + for(i = 0; i < neles; i++) { + free(cuckoo_entries[i].val); + } +#endif + free(cuckoo_entries); +#ifdef DOUBLE_TABLE + free(cuckoo_table[0]); + free(cuckoo_table[1]); +#else + free(cuckoo_table); +#endif + free(entry_gen_tasks); + free(ctx); + + free_hashing_state(&hs); + +#ifdef TEST_UTILIZATION + return fails; +#else + return hash_table; +#endif +} + + +void *gen_cuckoo_entries(void *ctx_void) { + cuckoo_entry_gen_ctx* ctx = (cuckoo_entry_gen_ctx*) ctx_void; + hs_t* hs = ctx->hs; + uint32_t i, inbytelen = ceil_divide(hs->inbitlen, 8); + uint8_t* eleptr = ctx->elements + inbytelen * ctx->startpos; + + + //generate the cuckoo entries for all elements + for(i = ctx->startpos; i < ctx->endpos; i++, eleptr+=inbytelen) { + gen_cuckoo_entry(eleptr, ctx->cuckoo_entries + i, hs, i); + } +} + + +inline void gen_cuckoo_entry(uint8_t* in, cuckoo_entry_ctx* out, hs_t* hs, uint32_t ele_id) { + uint32_t i; + + out->pos = 0; + out->eleid = ele_id; + +#ifndef TEST_UTILIZATION + out->val = (uint8_t*) calloc(hs->outbytelen, sizeof(uint8_t)); +#endif + hashElement(in, out->address, out->val, hs); +} + +#ifdef DOUBLE_TABLE +inline bool insert_element(cuckoo_entry_ctx*** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations) { +#else +inline bool insert_element(cuckoo_entry_ctx** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations) { +#endif + cuckoo_entry_ctx *evicted, *tmp_evicted; + uint32_t i, ev_pos, iter_cnt; +#ifdef DEBUG_CUCKOO + cout << "iter_cnt = " << iter_cnt << " for element " << (hex) << (*((uint32_t*) element->element)) << (dec) << ", inserting to address: " + << element->address[element->pos] << " or " << element->address[element->pos^1] << endl; +#endif + + for(iter_cnt = 0, evicted = element; iter_cnt < max_iterations; iter_cnt++) { + //TODO: assert(addr < MAX_TAB_ENTRIES) + for(i = 0; i < NUM_HASH_FUNCTIONS; i++) {//, ele_pos=(ele_pos+1)%NUM_HASH_FUNCTIONS) { +#ifdef DOUBLE_TABLE + if(ctable[i][evicted->address[i]] == NULL) { + ctable[i][evicted->address[i]] = evicted; + evicted->pos = i; + return true; + } +#else + if(ctable[evicted->address[i]] == NULL) { + ctable[evicted->address[i]] = evicted; + evicted->pos = i; +#ifdef TEST_CHAINLEN + chain_cnt[iter_cnt]++; +#endif + return true; + } +#endif + } + + //choose random bin to evict other element +#if NUM_HASH_FUNCTIONS == 2 + ev_pos = evicted->address[evicted->pos% NUM_HASH_FUNCTIONS]; +#else + ev_pos = evicted->address[(evicted->pos^iter_cnt) % NUM_HASH_FUNCTIONS]; +#endif +#ifdef DOUBLE_TABLE + tmp_evicted = ctable[evicted->pos][ev_pos]; + ctable[evicted->pos][ev_pos] = evicted; +#else + tmp_evicted = ctable[ev_pos]; + ctable[ev_pos] = evicted; +#endif + evicted = tmp_evicted; + + //change position - if the number of HF's is increased beyond 2 this should be replaced by a different strategy + evicted->pos = (evicted->pos+1) % NUM_HASH_FUNCTIONS; + } + + //the highest number of iterations has been reached + return false; +} + +#ifdef TEST_CHAINLEN +void print_chain_cnt() { + //cout << "Chain Count: " << endl; + for(uint32_t i = 0; i < MAX_ITERATIONS; i++) { + //if(chain_cnt[i] > 0) + cout << i << "\t" << chain_cnt[i] << endl; + } +} +#endif diff --git a/src/hashing/cuckoo.h b/src/hashing/cuckoo.h new file mode 100644 index 0000000..54cb930 --- /dev/null +++ b/src/hashing/cuckoo.h @@ -0,0 +1,74 @@ +/* + * cuckoo.h + * + * Created on: Oct 7, 2014 + * Author: mzohner + */ + +#ifndef CUCKOO_H_ +#define CUCKOO_H_ + +#include "hashing_util.h" + +#define MAX_ITERATIONS 1024 +//#define DEBUG_CUCKOO +#ifdef TEST_UTILIZATION +#define COUNT_FAILS +#endif +//#define DOUBLE_TABLE +//#define TEST_CHAINLEN + +#ifdef TEST_CHAINLEN +static uint64_t chain_cnt[MAX_ITERATIONS]; +void print_chain_cnt(); +#endif + +struct cuckoo_entry_ctx { + //id of the element in the source set + uint32_t eleid; + //addresses the bin of the cuckoo entry in the cuckoo table, will only work for up to 2^{32} bins + uint32_t address[NUM_HASH_FUNCTIONS]; + //the value of the entry + uint8_t* val; + //which position is the entry currently mapped to + uint32_t pos; +#ifdef DEBUG_CUCKOO + uint8_t* element; +#endif +}; + + + +struct cuckoo_entry_gen_ctx { + //starting position in the generation process + uint32_t startpos; + //end position of entries that are generated by this thread + uint32_t endpos; + //input elements + uint8_t* elements; + //pointer to the cuckoo entries + cuckoo_entry_ctx* cuckoo_entries; + hs_t* hs; +}; + + +//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements +#ifndef TEST_UTILIZATION +uint8_t* +#else +uint32_t +#endif +cuckoo_hashing(uint8_t* elements, uint32_t neles, uint32_t nbins, uint32_t bitlen, uint32_t* outbitlen, uint32_t* nelesinbin, + uint32_t* perm, uint32_t ntasks, prf_state_ctx* prf_state); +//routine for generating the entries, is invoked by the threads +void *gen_cuckoo_entries(void *ctx); +inline void gen_cuckoo_entry(uint8_t* in, cuckoo_entry_ctx* out, hs_t* hs, uint32_t ele_id); +#ifdef DOUBLE_TABLE +inline bool insert_element(cuckoo_entry_ctx*** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations); +#else +inline bool insert_element(cuckoo_entry_ctx** ctable, cuckoo_entry_ctx* element, uint32_t max_iterations); +#endif + + + +#endif /* CUCKOO_H_ */ diff --git a/src/hashing/hashing_util.h b/src/hashing/hashing_util.h new file mode 100644 index 0000000..fbd5c99 --- /dev/null +++ b/src/hashing/hashing_util.h @@ -0,0 +1,258 @@ +/* + * hashing_util.h + * + * Created on: Oct 8, 2014 + * Author: mzohner + */ + +#ifndef HASHING_UTIL_H_ +#define HASHING_UTIL_H_ + +#include "../hashing_includes.h" + + +typedef uint16_t TABLEID_T; + +//#define TEST_UTILIZATION +#ifdef TEST_UTILIZATIN +#define NUM_HASH_FUNCTIONS 2 +#else +#define NUM_HASH_FUNCTIONS 3 +#endif +#define MAX_TABLE_SIZE_BYTES sizeof(TABLEID_T) +#define DUMMY_ENTRY_SERVER 0x00 +#define DUMMY_ENTRY_CLIENT 0xFF + +#define USE_LUBY_RACKOFF + +typedef struct hashing_state_ctx { + uint32_t** hf_values[NUM_HASH_FUNCTIONS]; + uint32_t nhfvals; + uint32_t nelements; + uint32_t nbins; + uint32_t inbitlen; + uint32_t addrbitlen; + uint32_t outbitlen; + //the byte values, are stored separately since they are needed very often + uint32_t inbytelen; + uint32_t addrbytelen; + uint32_t outbytelen; + uint32_t* address_used; + uint32_t mask; +} hs_t; + +//TODO: generate these randomly for each execution and communicate them between the parties +static const uint32_t HF_MASKS[3] = {0x00000000, 0x33333333, 0x14894568}; + +//use as mask to address the bits in a uint32_t vector +static const uint32_t SELECT_BITS[33] = \ + {0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, \ + 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF, \ + 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, \ + 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, \ + 0xFFFFFFFF }; + +//can also be computed as SELECT_BITS ^ 0xFFFFFFFF +static const uint32_t SELECT_BITS_INV[33] = \ + {0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFC, 0xFFFFFFF8, 0xFFFFFFF0, 0xFFFFFFE0, 0xFFFFFFC0, 0xFFFFFF80, \ + 0xFFFFFF00, 0xFFFFFE00, 0xFFFFFC00, 0xFFFFF800, 0xFFFFF000, 0xFFFFE000, 0xFFFFC000, 0xFFFF8000, \ + 0xFFFF0000, 0xFFFE0000, 0xFFFC0000, 0xFFF80000, 0xFFF00000, 0xFFE00000, 0xFFC00000, 0xFF800000, \ + 0xFF000000, 0xFE000000, 0xFC000000, 0xF8000000, 0xF0000000, 0xE0000000, 0xC0000000, 0x80000000, \ + 0x00000000 }; + +static const uint8_t BYTE_SELECT_BITS_INV[8] = {0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}; + +//Init the values for the hash function +static void init_hashing_state(hs_t* hs, uint32_t nelements, uint32_t inbitlen, uint32_t nbins, + prf_state_ctx* prf_state) { + uint32_t i, j, nrndbytes; + hs->nelements = nelements; + hs->nbins = nbins; + + hs->inbitlen = inbitlen; + hs->addrbitlen = min((uint32_t) ceil_log2(nbins), inbitlen); + +#ifdef USE_LUBY_RACKOFF + hs->outbitlen = hs->inbitlen - hs->addrbitlen+1; +#else + hs->outbitlen = inbitlen; +#endif + //TODO prevent too much memory utilization + //assert(hs->outbitlen < 32); + //TODO: quickfix to enable hashing for large values + //hs->outbitlen = min((double) hs->outbitlen, (double) 24); + + hs->inbytelen = ceil_divide(hs->inbitlen, 8); + hs->addrbytelen = ceil_divide(hs->addrbitlen, 8); + hs->outbytelen = ceil_divide(hs->outbitlen, 8); + + hs->nhfvals = ceil_divide(hs->outbytelen, MAX_TABLE_SIZE_BYTES); + + + nrndbytes = (1<<(8*MAX_TABLE_SIZE_BYTES)) * sizeof(uint32_t); + + //cout << " random bytes: " << nrndbytes << endl; + //cout << "inbitlen = " << hs->inbitlen << ", outbitlen = " << hs->outbitlen << ", addrbitlen = " << hs->addrbitlen << + // ", nhfvals = " << hs->nhfvals << ", nrndbytes = " << nrndbytes << endl; + + for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { + hs->hf_values[i] = (uint32_t**) malloc(sizeof(uint32_t*) * hs->nhfvals); + + for(j = 0; j < hs->nhfvals; j++) { + hs->hf_values[i][j] = (uint32_t*) malloc(nrndbytes); + assert(hs->hf_values[i][j]); + gen_rnd_bytes(prf_state, (uint8_t*) hs->hf_values[i][j], nrndbytes); + } + } + //cout << "nhfvals = " << hs->nhfvals << endl; + hs->address_used = (uint32_t*) calloc(nbins, sizeof(uint32_t)); + hs->mask = 0xFFFFFFFF; + if(hs->inbytelen < sizeof(uint32_t)) { + hs->mask >>= (sizeof(uint32_t) * 8 - hs->inbitlen - hs->addrbitlen); + } +} + +static void free_hashing_state(hs_t* hs) { + uint32_t i, j; + for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { + for(j = 0; j < hs->nhfvals; j++) { + free(hs->hf_values[i][j]); + } + free(hs->hf_values[i]); + } + free(hs->address_used); + //free(hs->hf_values); +} + +//reduce the bit-length of the elements if some bits are used to determine the bin and a permutation is used for hashing +//static uint32_t getOutBitLen(uint32_t inbitlen, uint32_t nelements) { +// return inbitlen - ceil_log2(nelements); +//}; + +//TODO: a generic place holder, can be replaced by any other hash function +//inline void hashElement(uint8_t* element, uint32_t* address, uint8_t* val, uint32_t hfid, hs_t* hs) { +inline void hashElement(uint8_t* element, uint32_t* address, uint8_t* val, hs_t* hs) { + +#ifdef USE_LUBY_RACKOFF + //TODO: the table-lookup hashing is only used for elements up to 32-bit length, since it gets very inefficient for larger values + uint64_t i, j, L, R; + TABLEID_T hfmaskaddr; + //Store the first hs->addrbitlen bits in L + L = *((uint32_t*) element) & SELECT_BITS[hs->addrbitlen]; + //Store the remaining hs->outbitlen bits in R and pad correspondingly + R = (*((uint32_t*) element) & SELECT_BITS_INV[hs->addrbitlen]) >> (hs->addrbitlen); + + R &= hs->mask;//mask = (1<<32-hs->addrbitlen) + + + //assert(R < (1<outbitlen)); + //cout << "R = " << R << endl; + /*if(hfid == 0) { + *address = L % hs->nbins; + *((uint32_t*) val) = R; + } else if(hfid == 1) { + *address = R % hs->nbins; + *((uint32_t*) val) = L; + } else { + *address = (L ^ R) % hs->nbins; + *((uint32_t*) val) = R; + }*/ + hfmaskaddr = R * sizeof(uint32_t); + //cout << "L = " << L << ", R = " << R << " addresses: "; + + for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { + //cout << "i = " << i << ", addrbytelen = " << hs->addrbytelen << ", R = " << R << ", nbins = " << + // hs->nbins << ", L = " << L << ", addr= " << endl; + //address[i] = (L ^ *(((uint32_t*) &(hs->hf_values[i][R*hs->addrbytelen])))) % hs->nbins; + for(j = 0; j < hs->nhfvals; j++) { + //assert(hfmaskaddr < (1<<(8*MAX_TABLE_SIZE_BYTES)) * hs->addrbytelen); + //cout << "i = " << i << ", j = " << j << ", Hfmaskaddr = " << hfmaskaddr << endl; + //cout << "Hfvalue: " << hs->hf_values[i][j][hfmaskaddr] << endl; + address[i] = (L ^ *((hs->hf_values[i][j]+hfmaskaddr))) % hs->nbins; + //address[i] = (L ^ (i * R)) % hs->nbins; + } + //cout << address[i] << ", "; + //hs->address_used[address[i]]++; + } + //cout << endl; +#ifndef TEST_UTILIZATION + *((uint32_t*) val) = R; + //TODO copy remaining bits + + //if(hs->outbytelen >= sizeof(uint32_t)) + if(hs->inbitlen > sizeof(uint32_t) * 8) { + //memcpy(val + (sizeof(uint32_t) - hs->addrbytelen), element + sizeof(uint32_t), hs->outbytelen - (sizeof(uint32_t) - hs->addrbytelen)); + memcpy(val + (sizeof(uint32_t) - (hs->addrbitlen >>3)), element + sizeof(uint32_t), hs->outbytelen - (sizeof(uint32_t) - (hs->addrbitlen >>3))); + + //cout << "Element: "<< (hex) << (uint32_t) val[hs->outbytelen-1] << ", " << (uint32_t) (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]) + // << ", " << (uint32_t) (val[hs->outbytelen-1] & (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]) )<< (dec) << " :"; + + val[hs->outbytelen-1] &= (BYTE_SELECT_BITS_INV[hs->outbitlen & 0x03]); + + /*for(i = 0; i < hs->inbytelen; i++) { + cout << (hex) << (uint32_t) element[i]; + } + cout << ", "; + for(i = 0; i < hs->outbytelen; i++) { + cout << (hex) << (uint32_t) val[i]; + } + cout << (dec) << endl;*/ + } + + +#endif + //cout << "Address for hfid = " << hfid << ": " << *address << ", L = " << L << ", R = " << R << endl; + +#else + for(uint64_t i = 0; i < NUM_HASH_FUNCTIONS; i++) { + address[i] = ((*((uint32_t*) element+i) ^ HF_MASKS[i]) & SELECT_BITS[hs->addrbitlen]) % hs->nbins; + + #ifndef TEST_UTILIZATION + *((uint32_t*) val) = (*((uint32_t*) element) & SELECT_BITS_INV[hs->addrbitlen]) >> (hs->addrbitlen); + + //copy the remaining full bytes + if(hs->outbytelen >= sizeof(uint32_t)) + memcpy(val + (sizeof(uint32_t) - hs->addrbytelen), element + sizeof(uint32_t), hs->outbytelen - sizeof(uint32_t)); + #endif + } +#endif +} + +inline void domain_hashing(uint32_t nelements, uint8_t* elements, uint32_t elebytelen, uint8_t* result, + uint32_t resultbytelen, crypto* crypt) { + + uint8_t *eleptr, *resultptr, *hash_buf; + uint32_t i; + + eleptr=elements; + resultptr = result; +#ifndef BATCH + cout << "Hashing " << nelements << " elements from " << elebytelen << " bytes into " << resultbytelen << " bytes" << endl; +#endif + hash_buf = (uint8_t*) calloc(crypt->get_hash_bytes(), sizeof(uint8_t)); + for(i = 0; i < nelements; i++, resultptr+=resultbytelen, eleptr+=elebytelen) { + memcpy(hash_buf, eleptr, elebytelen); + crypt->hash(resultptr, resultbytelen, hash_buf, elebytelen); + } + free(hash_buf); +} + +inline void domain_hashing(uint32_t nelements, uint8_t** elements, uint32_t* elebytelens, uint8_t* result, + uint32_t resultbytelen, crypto* crypt) { + uint8_t *resultptr;//, *hash_buf; + uint32_t i; + + //eleptr=elements; + resultptr = result; +#ifndef BATCH + cout << "Hashing " << nelements << " elements from " << elebytelens << " bytes into " << resultbytelen << " bytes" << endl; +#endif + //hash_buf = (uint8_t*) calloc(crypt->get_hash_bytes(), sizeof(uint8_t)); + for(i = 0; i < nelements; i++, resultptr+=resultbytelen) { + //memcpy(hash_buf, elements[i], elebytelens[i]); + crypt->hash(resultptr, resultbytelen, elements[i], elebytelens[i]); + } + //free(hash_buf); +} + +#endif /* HASHING_UTIL_H_ */ diff --git a/src/hashing/simple_hashing.cpp b/src/hashing/simple_hashing.cpp new file mode 100644 index 0000000..55ce6c6 --- /dev/null +++ b/src/hashing/simple_hashing.cpp @@ -0,0 +1,192 @@ +/* + * simple_hashing.cpp + * + * Created on: Oct 8, 2014 + * Author: mzohner + */ + +#include "simple_hashing.h" + +uint8_t* simple_hashing(uint8_t* elements, uint32_t neles, uint32_t bitlen, uint32_t *outbitlen, uint32_t* nelesinbin, uint32_t nbins, + uint32_t ntasks, prf_state_ctx* prf_state) { + sht_ctx* table; + //uint8_t** bin_content; + uint8_t *eleptr, *bin_ptr, *result, *res_bins; + uint32_t i, j, tmpneles; + sheg_ctx* ctx; + pthread_t* entry_gen_tasks; + hs_t hs; + + init_hashing_state(&hs, neles, bitlen, nbins, prf_state); + //Set the output bit-length of the hashed elements + *outbitlen = hs.outbitlen; + + entry_gen_tasks = (pthread_t*) malloc(sizeof(pthread_t) * ntasks); + ctx = (sheg_ctx*) malloc(sizeof(sheg_ctx) * ntasks); + table = (sht_ctx*) malloc(sizeof(sht_ctx) * ntasks); + + + + for(i = 0; i < ntasks; i++) { + init_hash_table(table + i, ceil_divide(neles, ntasks), &hs); + } + + //for(i = 0; i < nbins; i++) + // pthread_mutex_init(locks+i, NULL); + + //tmpbuf = (uint8_t*) malloc(table->outbytelen); + + for(i = 0; i < ntasks; i++) { + ctx[i].elements = elements; + ctx[i].table = table + i; + ctx[i].startpos = i * ceil_divide(neles, ntasks); + ctx[i].endpos = min(ctx[i].startpos + ceil_divide(neles, ntasks), neles); + ctx[i].hs = &hs; + + //cout << "Thread " << i << " starting from " << ctx[i].startpos << " going to " << ctx[i].endpos << " for " << neles << " elements" << endl; + if(pthread_create(entry_gen_tasks+i, NULL, gen_entries, (void*) (ctx+i))) { + cerr << "Error in creating new pthread at simple hashing!" << endl; + exit(0); + } + } + + for(i = 0; i < ntasks; i++) { + if(pthread_join(entry_gen_tasks[i], NULL)) { + cerr << "Error in joining pthread at simple hashing!" << endl; + exit(0); + } + } + + //for(i = 0, eleptr=elements; i < neles; i++, eleptr+=inbytelen) { + // insert_element(table, eleptr, tmpbuf); + //} + + //malloc and copy simple hash table into hash table + //bin_content = (uint8_t**) malloc(sizeof(uint8_t*) * nbins); + //*nelesinbin = (uint32_t*) malloc(sizeof(uint32_t) * nbins); + + res_bins = (uint8_t*) malloc(neles * NUM_HASH_FUNCTIONS * hs.outbytelen); + bin_ptr = res_bins; + + + for(i = 0; i < hs.nbins; i++) { + nelesinbin[i] = 0; + for(j = 0; j < ntasks; j++) { + tmpneles = (table +j)->bins[i].nvals; + nelesinbin[i] += tmpneles; + //bin_content[i] = (uint8_t*) malloc(nelesinbin[i] * table->outbytelen); + memcpy(bin_ptr, (table + j)->bins[i].values, tmpneles * hs.outbytelen); + bin_ptr += (tmpneles * hs.outbytelen); + } + //right now only the number of elements in each bin is copied instead of the max bin size + } + + for(j = 0; j < ntasks; j++) + free_hash_table(table + j); + free(table); + free(entry_gen_tasks); + free(ctx); + + //for(i = 0; i < nbins; i++) + // pthread_mutex_destroy(locks+i); + //free(locks); + + free_hashing_state(&hs); + + return res_bins; +} + +void *gen_entries(void *ctx_tmp) { + //Insert elements in parallel, use lock to communicate + uint8_t *tmpbuf, *eleptr; + sheg_ctx* ctx = (sheg_ctx*) ctx_tmp; + uint32_t i, inbytelen, *address; + + address = (uint32_t*) malloc(NUM_HASH_FUNCTIONS * sizeof(uint32_t)); + tmpbuf = (uint8_t*) calloc(ceil_divide(ctx->hs->outbitlen, 8), sizeof(uint8_t)); //for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { + // tmpbuf[i] = (uint8_t*) malloc(ceil_divide(ctx->hs->outbitlen, 8)); + //} + + for(i = ctx->startpos, eleptr=ctx->elements, inbytelen=ctx->hs->inbytelen; i < ctx->endpos; i++, eleptr+=inbytelen) { + insert_element(ctx->table, eleptr, address, tmpbuf, ctx->hs); + } + free(tmpbuf); + free(address); +} + +inline void insert_element(sht_ctx* table, uint8_t* element, uint32_t* address, uint8_t* tmpbuf, hs_t* hs) { + uint32_t i, j; + bin_ctx* tmp_bin; + + hashElement(element, address, tmpbuf, hs); + + for(i = 0; i < NUM_HASH_FUNCTIONS; i++) { + + tmp_bin=table->bins + address[i]; + //pthread_mutex_lock(locks + address[i]); + memcpy(tmp_bin->values + tmp_bin->nvals * hs->outbytelen, tmpbuf, hs->outbytelen); + for(j = 0; j < i; j++) { + if(address[i] == address[j]) { + memset(tmp_bin->values + tmp_bin->nvals * hs->outbytelen, DUMMY_ENTRY_SERVER, hs->outbytelen); + } + } + tmp_bin->nvals++; + //TODO: or simply allocate a bigger block of memory: table->maxbinsize * 2, left out for efficiency reasons + if(tmp_bin->nvals == table->maxbinsize) { + increase_max_bin_size(table, hs->outbytelen); + } + //assert(tmp_bin->nvals < table->maxbinsize); + /*cout << "Inserted into bin: " << address << ": " << (hex); + for(uint32_t j = 0; j < table->outbytelen; j++) { + cout << (unsigned int) tmpbuf[j]; + } + cout << (dec) << endl;*/ + //pthread_mutex_unlock(locks + address[i]); + } +} + +void init_hash_table(sht_ctx* table, uint32_t nelements, hs_t* hs) { + uint32_t i; + + //table->addrbitlen = ceil_log2(nbins); + //table->addrbytelen = ceil_divide(table->addrbitlen, 8); + //table->inbytelen = ceil_divide(inbitlen, 8); + if(ceil_divide(nelements, hs->nbins) < 3) + table->maxbinsize = 3*max(ceil_log2(nelements),3); + else + table->maxbinsize = 6*max((int) ceil_divide(nelements, hs->nbins), 3); + //cout << "maxbinsize = " << table->maxbinsize << endl; + //table->outbytelen = ceil_divide(getOutBitLen(inbitlen, nbins), 8); + table->nbins = hs->nbins; + + table->bins = (bin_ctx*) calloc(hs->nbins, sizeof(bin_ctx)); + + for(i = 0; i < hs->nbins; i++) { + table->bins[i].values = (uint8_t*) malloc(table->maxbinsize * hs->outbytelen); + } +} + +void free_hash_table(sht_ctx* table) { + uint32_t i; + //1. free the byte-pointers for the values in the bints + for(i = 0; i < table->nbins; i++) { + //if(table->bins[i].nvals > 0) + free(table->bins[i].values); + } + //2. free the bins + free(table->bins); + //3. free the actual table + //free(table); +} + +void increase_max_bin_size(sht_ctx* table, uint32_t valbytelen) { + uint32_t new_maxsize = table->maxbinsize * 2; + uint8_t* tmpvals; + for(uint32_t i = 0; i < table->nbins; i++) { + tmpvals = table->bins[i].values; + table->bins[i].values = (uint8_t*) malloc(new_maxsize * valbytelen); + memcpy(table->bins[i].values, tmpvals, table->bins[i].nvals * valbytelen); + free(tmpvals); + } + table->maxbinsize = new_maxsize; +} diff --git a/src/hashing/simple_hashing.h b/src/hashing/simple_hashing.h new file mode 100644 index 0000000..d06e5d7 --- /dev/null +++ b/src/hashing/simple_hashing.h @@ -0,0 +1,55 @@ +/* + * simle_hashing.h + * + * Created on: Oct 8, 2014 + * Author: mzohner + */ + +#ifndef SIMLE_HASHING_H_ +#define SIMLE_HASHING_H_ + +#include "hashing_util.h" + +struct bin_ctx { + //hash-values of all elements mapped to this bin + uint8_t* values; + //number of elements stored in this bin + uint32_t nvals; +}; + +typedef struct simple_hash_table_ctx { + //pointer to the bins in the hash table + bin_ctx* bins; + //number bins in the hash table + uint32_t nbins; + //max bin size + uint32_t maxbinsize; + //uint32_t addrbitlen; + //uint32_t addrbytelen; + //uint32_t inbytelen; + //uint32_t outbytelen; +} sht_ctx; + +typedef struct simple_hash_entry_gen_ctx { + sht_ctx* table; + //input elements + uint8_t* elements; + uint32_t startpos; + uint32_t endpos; + //uint32_t inbytelen; + hs_t* hs; +} sheg_ctx; + + + +//returns a cuckoo hash table with the first dimension being the bins and the second dimension being the pointer to the elements +uint8_t* simple_hashing(uint8_t* elements, uint32_t neles, uint32_t bitlen, uint32_t* outbitlen, uint32_t* nelesinbin, uint32_t nbins, + uint32_t ntasks, prf_state_ctx* prf_state); +//routine for generating the entries, is invoked by the threads +void *gen_entries(void *ctx); +void init_hash_table(sht_ctx* table, uint32_t nelements, hs_t* hs); +void increase_max_bin_size(sht_ctx* table, uint32_t valbytelen); +void free_hash_table(sht_ctx* table); +inline void insert_element(sht_ctx* table, uint8_t* element, uint32_t* address, uint8_t* tmpbuf, hs_t* hs); + +#endif /* SIMLE_HASHING_H_ */ diff --git a/src/externals/hashing_includes.h b/src/hashing_includes.h similarity index 86% rename from src/externals/hashing_includes.h rename to src/hashing_includes.h index 61d0eaf..01e7089 100644 --- a/src/externals/hashing_includes.h +++ b/src/hashing_includes.h @@ -13,7 +13,7 @@ #ifndef HASHING_INCLUDES_H_ #define HASHING_INCLUDES_H_ -#include "../util/typedefs.h" -#include "../util/crypto/crypto.h" +#include "util/typedefs.h" +#include "util/crypto/crypto.h" #endif /* HASHING_INCLUDES_H_ */ diff --git a/src/ot-based/ot-psi.h b/src/ot-based/ot-psi.h index fe022cb..4ef3a39 100644 --- a/src/ot-based/ot-psi.h +++ b/src/ot-based/ot-psi.h @@ -13,8 +13,8 @@ #include "../util/cbitvector.h" #include "../util/socket.h" #include "../util/connection.h" -#include "../externals/hashing/cuckoo.h" -#include "../externals/hashing/simple_hashing.h" +#include "../hashing/cuckoo.h" +#include "../hashing/simple_hashing.h" #include "../util/ot/ot-extension-1oon-ecc.h" #include "../util/ot/naor-pinkas.h" #include "../util/ot/opemasking.h"