From 95fae73833b9781a5bb5fd4fb13d985aad3058c4 Mon Sep 17 00:00:00 2001 From: dehnert Date: Tue, 14 Nov 2017 22:23:05 +0100 Subject: [PATCH] slight improvements to bit vector hashmap --- src/storm/storage/BitVector.cpp | 109 +++++++++++++++++++++- src/storm/storage/BitVector.h | 9 +- src/storm/storage/BitVectorHashMap.cpp | 120 +++++++------------------ src/storm/storage/BitVectorHashMap.h | 50 +++++------ 4 files changed, 165 insertions(+), 123 deletions(-) diff --git a/src/storm/storage/BitVector.cpp b/src/storm/storage/BitVector.cpp index 3b3f9c478..fdc8f27e6 100644 --- a/src/storm/storage/BitVector.cpp +++ b/src/storm/storage/BitVector.cpp @@ -1018,7 +1018,7 @@ namespace storm { return seed; } - inline __attribute__((always_inline)) uint32_t fmix32 (uint32_t h) { + inline __attribute__((always_inline)) uint32_t fmix32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; @@ -1028,16 +1028,36 @@ namespace storm { return h; } + inline __attribute__((always_inline)) uint64_t fmix64(uint64_t k) { + k ^= k >> 33; + k *= 0xff51afd7ed558ccdull; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53ull; + k ^= k >> 33; + + return k; + } + inline uint32_t rotl32(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } - inline __attribute__((always_inline)) uint32_t getblock32 (uint32_t const* p, int i) { + inline uint64_t rotl64 ( uint64_t x, int8_t r ) + { + return (x << r) | (x >> (64 - r)); + } + + inline __attribute__((always_inline)) uint32_t getblock32(uint32_t const* p, int i) { return p[i]; } - std::size_t Murmur3_32_BitVectorHash::operator()(storm::storage::BitVector const& bv) const { - const uint8_t * data = reinterpret_cast(bv.buckets); + inline __attribute__((always_inline)) uint32_t getblock64(uint64_t const* p, int i) { + return p[i]; + } + + template<> + uint32_t Murmur3BitVectorHash::operator()(storm::storage::BitVector const& bv) const { + uint8_t const* data = reinterpret_cast(bv.buckets); uint32_t len = bv.bucketCount() * 8; const int nblocks = bv.bucketCount() * 2; @@ -1074,6 +1094,84 @@ namespace storm { return h1; } + template<> + uint64_t Murmur3BitVectorHash::operator()(storm::storage::BitVector const& bv) const { + uint8_t const* data = reinterpret_cast(bv.buckets); + uint64_t len = bv.bucketCount() * 8; + const int nblocks = bv.bucketCount() / 2; + + uint64_t h1 = 0; + uint64_t h2 = 0; + + const uint64_t c1 = 0x87c37b91114253d5ull; + const uint64_t c2 = 0x4cf5ad432745937full; + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for (int i = 0; i < nblocks; i++) { + uint64_t k1 = getblock64(blocks,i*2+0); + uint64_t k2 = getblock64(blocks,i*2+1); + + k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + uint8_t const* tail = reinterpret_cast(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= ((uint64_t)tail[14]) << 48; + case 14: k2 ^= ((uint64_t)tail[13]) << 40; + case 13: k2 ^= ((uint64_t)tail[12]) << 32; + case 12: k2 ^= ((uint64_t)tail[11]) << 24; + case 11: k2 ^= ((uint64_t)tail[10]) << 16; + case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; + case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; + k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; + case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; + case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; + case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; + case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; + case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; + case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; + case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; + k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; + }; + + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + return h1 ^ h2; + } + // All necessary explicit template instantiations. template BitVector::BitVector(uint_fast64_t length, std::vector::iterator begin, std::vector::iterator end); template BitVector::BitVector(uint_fast64_t length, std::vector::const_iterator begin, std::vector::const_iterator end); @@ -1083,6 +1181,9 @@ namespace storm { template void BitVector::set(std::vector::const_iterator begin, std::vector::const_iterator end, bool value); template void BitVector::set(boost::container::flat_set::iterator begin, boost::container::flat_set::iterator end, bool value); template void BitVector::set(boost::container::flat_set::const_iterator begin, boost::container::flat_set::const_iterator end, bool value); + + template struct Murmur3BitVectorHash; + template struct Murmur3BitVectorHash; } } diff --git a/src/storm/storage/BitVector.h b/src/storm/storage/BitVector.h index 8a5c07719..9d7ec5fe7 100644 --- a/src/storm/storage/BitVector.h +++ b/src/storm/storage/BitVector.h @@ -502,7 +502,9 @@ namespace storm { friend std::ostream& operator<<(std::ostream& out, BitVector const& bitVector); friend struct std::hash; friend struct FNV1aBitVectorHash; - friend struct Murmur3_32_BitVectorHash; + + template + friend struct Murmur3BitVectorHash; private: /*! @@ -576,8 +578,9 @@ namespace storm { std::size_t operator()(storm::storage::BitVector const& bv) const; }; - struct Murmur3_32_BitVectorHash { - std::size_t operator()(storm::storage::BitVector const& bv) const; + template + struct Murmur3BitVectorHash { + StateType operator()(storm::storage::BitVector const& bv) const; }; } // namespace storage diff --git a/src/storm/storage/BitVectorHashMap.cpp b/src/storm/storage/BitVectorHashMap.cpp index 5409e2cef..019023f04 100644 --- a/src/storm/storage/BitVectorHashMap.cpp +++ b/src/storm/storage/BitVectorHashMap.cpp @@ -69,12 +69,12 @@ namespace storm { } template - std::size_t BitVectorHashMap::size() const { + uint64_t BitVectorHashMap::size() const { return numberOfElements; } template - std::size_t BitVectorHashMap::capacity() const { + uint64_t BitVectorHashMap::capacity() const { return 1ull << currentSize; } @@ -88,7 +88,6 @@ namespace storm { #endif // Create new containers and swap them with the old ones. - numberOfElements = 0; storm::storage::BitVector oldBuckets(bucketSize * (1ull << currentSize)); std::swap(oldBuckets, buckets); storm::storage::BitVector oldOccupied = storm::storage::BitVector(1ull << currentSize); @@ -97,29 +96,9 @@ namespace storm { std::swap(oldValues, values); // Now iterate through the elements and reinsert them in the new storage. - bool fail = false; + numberOfElements = 0; for (auto bucketIndex : oldOccupied) { - fail = !this->insertWithoutIncreasingSize(oldBuckets.get(bucketIndex * bucketSize, bucketSize), oldValues[bucketIndex]); - STORM_LOG_ASSERT(!fail, "Expected to be able to insert all elements."); - } - } - - template - bool BitVectorHashMap::insertWithoutIncreasingSize(storm::storage::BitVector const& key, ValueType const& value) { - std::tuple flagBucketTuple = this->findBucketToInsert(key); - if (std::get<2>(flagBucketTuple)) { - return false; - } - - if (std::get<0>(flagBucketTuple)) { - return true; - } else { - // Insert the new bits into the bucket. - buckets.set(std::get<1>(flagBucketTuple) * bucketSize, key); - occupied.set(std::get<1>(flagBucketTuple)); - values[std::get<1>(flagBucketTuple)] = value; - ++numberOfElements; - return true; + findOrAddAndGetBucket(oldBuckets.get(bucketIndex * bucketSize, bucketSize), oldValues[bucketIndex]); } } @@ -129,59 +108,41 @@ namespace storm { } template - void BitVectorHashMap::setOrAdd(storm::storage::BitVector const& key, ValueType const& value) { - setOrAddAndGetBucket(key, value); - } - - template - std::pair BitVectorHashMap::findOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value) { - // If the load of the map is too high, we increase the size. - if (numberOfElements >= loadFactor * (1ull << currentSize)) { - this->increaseSize(); - } + std::pair BitVectorHashMap::findOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value) { + checkIncreaseSize(); - std::tuple flagBucketTuple = this->findBucketToInsert(key); - STORM_LOG_ASSERT(!std::get<2>(flagBucketTuple), "Failed to find bucket for insertion."); - if (std::get<0>(flagBucketTuple)) { - return std::make_pair(values[std::get<1>(flagBucketTuple)], std::get<1>(flagBucketTuple)); + std::pair flagAndBucket = this->findBucketToInsert(key); + if (flagAndBucket.first) { + return std::make_pair(values[flagAndBucket.second], flagAndBucket.second); } else { // Insert the new bits into the bucket. - buckets.set(std::get<1>(flagBucketTuple) * bucketSize, key); - occupied.set(std::get<1>(flagBucketTuple)); - values[std::get<1>(flagBucketTuple)] = value; + buckets.set(flagAndBucket.second * bucketSize, key); + occupied.set(flagAndBucket.second); + values[flagAndBucket.second] = value; ++numberOfElements; - return std::make_pair(value, std::get<1>(flagBucketTuple)); + return std::make_pair(value, flagAndBucket.second); } } template - std::size_t BitVectorHashMap::setOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value) { + bool BitVectorHashMap::checkIncreaseSize() { // If the load of the map is too high, we increase the size. if (numberOfElements >= loadFactor * (1ull << currentSize)) { this->increaseSize(); + return true; } - - std::tuple flagBucketTuple = this->findBucketToInsert(key); - STORM_LOG_ASSERT(!std::get<2>(flagBucketTuple), "Failed to find bucket for insertion."); - if (!std::get<0>(flagBucketTuple)) { - // Insert the new bits into the bucket. - buckets.set(std::get<1>(flagBucketTuple) * bucketSize, key); - occupied.set(std::get<1>(flagBucketTuple)); - ++numberOfElements; - } - values[std::get<1>(flagBucketTuple)] = value; - return std::get<1>(flagBucketTuple); + return false; } template ValueType BitVectorHashMap::getValue(storm::storage::BitVector const& key) const { - std::pair flagBucketPair = this->findBucket(key); + std::pair flagBucketPair = this->findBucket(key); STORM_LOG_ASSERT(flagBucketPair.first, "Unknown key."); return values[flagBucketPair.second]; } template - ValueType BitVectorHashMap::getValue(std::size_t bucket) const { + ValueType BitVectorHashMap::getValue(uint64_t bucket) const { return values[bucket]; } @@ -201,73 +162,58 @@ namespace storm { } template - std::pair BitVectorHashMap::findBucket(storm::storage::BitVector const& key) const { + uint64_t BitVectorHashMap::getCurrentShiftWidth() const { + return (sizeof(decltype(hasher(storm::storage::BitVector()))) * 8 - currentSize); + } + + template + std::pair BitVectorHashMap::findBucket(storm::storage::BitVector const& key) const { #ifndef NDEBUG ++numberOfFinds; #endif - uint_fast64_t initialHash = hasher(key) % (1ull << currentSize); - uint_fast64_t bucket = initialHash; + uint64_t bucket = hasher(key) >> this->getCurrentShiftWidth(); - uint_fast64_t i = 0; while (isBucketOccupied(bucket)) { - ++i; #ifndef NDEBUG ++numberOfFindProbingSteps; #endif if (buckets.matches(bucket * bucketSize, key)) { return std::make_pair(true, bucket); } - bucket += 1; + ++bucket; if (bucket == (1ull << currentSize)) { bucket = 0; } - - if (bucket == initialHash) { - return std::make_pair(false, bucket); - } } return std::make_pair(false, bucket); } template - template - std::tuple BitVectorHashMap::findBucketToInsert(storm::storage::BitVector const& key) { + std::pair BitVectorHashMap::findBucketToInsert(storm::storage::BitVector const& key) { #ifndef NDEBUG ++numberOfInsertions; #endif - uint_fast64_t initialHash = hasher(key) % (1ull << currentSize); - uint_fast64_t bucket = initialHash; + uint64_t bucket = hasher(key) >> this->getCurrentShiftWidth(); - uint64_t i = 0; while (isBucketOccupied(bucket)) { - ++i; #ifndef NDEBUG ++numberOfInsertionProbingSteps; #endif if (buckets.matches(bucket * bucketSize, key)) { - return std::make_tuple(true, bucket, false); + return std::make_pair(true, bucket); } - bucket += 1; + ++bucket; if (bucket == (1ull << currentSize)) { bucket = 0; } - - if (bucket == initialHash) { - if (increaseStorage) { - this->increaseSize(); - bucket = initialHash = hasher(key) % (1ull << currentSize); - } else { - return std::make_tuple(false, bucket, true); - } - } } - return std::make_tuple(false, bucket, false); + return std::make_tuple(false, bucket); } template - std::pair BitVectorHashMap::getBucketAndValue(std::size_t bucket) const { + std::pair BitVectorHashMap::getBucketAndValue(uint64_t bucket) const { return std::make_pair(buckets.get(bucket * bucketSize, bucketSize), values[bucket]); } @@ -278,7 +224,7 @@ namespace storm { } } - template class BitVectorHashMap; + template class BitVectorHashMap; template class BitVectorHashMap; } } diff --git a/src/storm/storage/BitVectorHashMap.h b/src/storm/storage/BitVectorHashMap.h index ba76e0504..38310aa4f 100644 --- a/src/storm/storage/BitVectorHashMap.h +++ b/src/storm/storage/BitVectorHashMap.h @@ -16,7 +16,7 @@ namespace storm { */ // template> // template - template + template> class BitVectorHashMap { public: class BitVectorHashMapIterator { @@ -73,15 +73,6 @@ namespace storm { * @return The found value if the key is already contained in the map and the provided new value otherwise. */ ValueType findOrAdd(storm::storage::BitVector const& key, ValueType const& value); - - /*! - * Sets the given key value pain in the map. If the key is found in the map, the corresponding value is - * overwritten with the given value. Otherwise, the key is inserted with the given value. - * - * @param key The key to search or insert. - * @param value The value to set. - */ - void setOrAdd(storm::storage::BitVector const& key, ValueType const& value); /*! * Searches for the given key in the map. If it is found, the mapped-to value is returned. Otherwise, the @@ -93,17 +84,7 @@ namespace storm { * the provided new value otherwise and whose second component is the index of the bucket into which the key * was inserted. */ - std::pair findOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value); - - /*! - * Sets the given key value pain in the map. If the key is found in the map, the corresponding value is - * overwritten with the given value. Otherwise, the key is inserted with the given value. - * - * @param key The key to search or insert. - * @param value The value to set. - * @return The index of the bucket into which the key was inserted. - */ - std::size_t setOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value); + std::pair findOrAddAndGetBucket(storm::storage::BitVector const& key, ValueType const& value); /*! * Retrieves the key stored in the given bucket (if any) and the value it is mapped to. @@ -111,7 +92,7 @@ namespace storm { * @param bucket The index of the bucket. * @return The content and value of the named bucket. */ - std::pair getBucketAndValue(std::size_t bucket) const; + std::pair getBucketAndValue(uint64_t bucket) const; /*! * Retrieves the value associated with the given key (if any). If the key does not exist, the behaviour is @@ -126,7 +107,7 @@ namespace storm { * * @return The value associated with the given bucket (if any). */ - ValueType getValue(std::size_t bucket) const; + ValueType getValue(uint64_t bucket) const; /*! * Checks if the given key is already contained in the map. @@ -155,14 +136,14 @@ namespace storm { * * @return The size of the map. */ - std::size_t size() const; + uint64_t size() const; /*! * Retrieves the capacity of the underlying container. * * @return The capacity of the underlying container. */ - std::size_t capacity() const; + uint64_t capacity() const; /*! * Performs a remapping of all values stored by applying the given remapping. @@ -187,7 +168,7 @@ namespace storm { * @return A pair whose first component indicates whether the key is already contained in the map and whose * second component indicates in which bucket the key is stored. */ - std::pair findBucket(storm::storage::BitVector const& key) const; + std::pair findBucket(storm::storage::BitVector const& key) const; /*! * Searches for the bucket into which the given key can be inserted. If no empty bucket can be found, the @@ -200,8 +181,7 @@ namespace storm { * an error flag indicating that the bucket could not be found (e.g. due to the restriction that the storage * must not be increased). */ - template - std::tuple findBucketToInsert(storm::storage::BitVector const& key); + std::pair findBucketToInsert(storm::storage::BitVector const& key); /*! * Inserts the given key-value pair without resizing the underlying storage. If that fails, this is @@ -218,6 +198,18 @@ namespace storm { */ void increaseSize(); + /*! + * Checks whether the size should be increased and does so if necessary. + * + * @return True iff the storage was increased. + */ + bool checkIncreaseSize(); + + /*! + * Determines the number of bits by which the hash value must be shifted to obtain a value in the legal range. + */ + uint64_t getCurrentShiftWidth() const; + // The load factor determining when the size of the map is increased. double loadFactor; @@ -237,7 +229,7 @@ namespace storm { std::vector values; // The number of elements in this map. - std::size_t numberOfElements; + uint64_t numberOfElements; // Functor object that are used to perform the actual hashing. Hash hasher;