diff --git a/resources/3rdparty/sparsepp/LICENSE b/resources/3rdparty/sparsepp/LICENSE new file mode 100644 index 000000000..865d273ff --- /dev/null +++ b/resources/3rdparty/sparsepp/LICENSE @@ -0,0 +1,36 @@ +// ---------------------------------------------------------------------- +// Copyright (c) 2016, Gregory Popovitch - greg7mdp@gmail.com +// All rights reserved. +// +// This work is derived from Google's sparsehash library +// +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ---------------------------------------------------------------------- + diff --git a/resources/3rdparty/sparsepp/README.md b/resources/3rdparty/sparsepp/README.md new file mode 100644 index 000000000..df473bed9 --- /dev/null +++ b/resources/3rdparty/sparsepp/README.md @@ -0,0 +1,288 @@ +[![Build Status](https://travis-ci.org/greg7mdp/sparsepp.svg?branch=master)](https://travis-ci.org/greg7mdp/sparsepp) + +# Sparsepp: A fast, memory efficient hash map for C++ + +Sparsepp is derived from Google's excellent [sparsehash](https://github.com/sparsehash/sparsehash) implementation. It aims to achieve the following objectives: + +- A drop-in alternative for unordered_map and unordered_set. +- **Extremely low memory usage** (typically about one byte overhead per entry). +- **Very efficient**, typically faster than your compiler's unordered map/set or Boost's. +- **C++11 support** (if supported by compiler). +- **Single header** implementation - just copy `sparsepp.h` to your project and include it. +- **Tested** on Windows (vs2010-2015, g++), linux (g++, clang++) and MacOS (clang++). + +We believe Sparsepp provides an unparalleled combination of performance and memory usage, and will outperform your compiler's unordered_map on both counts. Only Google's `dense_hash_map` is consistently faster, at the cost of much greater memory usage (especially when the final size of the map is not known in advance). + +For a detailed comparison of various hash implementations, including Sparsepp, please see our [write-up](bench.md). + +## Example + +```c++ +#include +#include +#include + +using spp::sparse_hash_map; + +int main() +{ + // Create an unordered_map of three strings (that map to strings) + sparse_hash_map email = + { + { "tom", "tom@gmail.com"}, + { "jeff", "jk@gmail.com"}, + { "jim", "jimg@microsoft.com"} + }; + + // Iterate and print keys and values + for (const auto& n : email) + std::cout << n.first << "'s email is: " << n.second << "\n"; + + // Add a new entry + email["bill"] = "bg@whatever.com"; + + // and print it + std::cout << "bill's email is: " << email["bill"] << "\n"; + + return 0; +} +``` + +## Installation + +Since the full Sparsepp implementation is contained in a single header file `sparsepp.h`, the installation consist in copying this header file wherever it will be convenient to include in your project(s). + +Optionally, a second header file `spp_utils.h` is provided, which implements only the spp::hash_combine() functionality. This is useful when we want to specify a hash function for a user-defined class in an header file, without including the full `sparsepp.h` header (this is demonstrated in [example 2](#example-2---providing-a-hash-function-for-a-user-defined-class) below). + +## Usage + +As shown in the example above, you need to include the header file: `#include ` + +This provides the implementation for the following classes: + +```c++ +namespace spp +{ + template , + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc>> + class sparse_hash_map; + + template , + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc> + class sparse_hash_set; +}; +``` + +These classes provide the same interface as std::unordered_map and std::unordered_set, with the following differences: + +- Calls to erase() may invalidate iterators. However, conformant to the C++11 standard, the position and range erase functions return an iterator pointing to the position immediately following the last of the elements erased. This makes it easy to traverse a sparse hash table and delete elements matching a condition. For example to delete odd values: + +```c++ + for (auto it = c.begin(); it != c.end(); ) + if (it->first % 2 == 1) + it = c.erase(it); + else + ++it; +``` + +- Since items are not grouped into buckets, Bucket APIs have been adapted: `max_bucket_count` is equivalent to `max_size`, and `bucket_count` returns the sparsetable size, which is normally at least twice the number of items inserted into the hash_map. + +## Example 2 - providing a hash function for a user-defined class + +In order to use a sparse_hash_set or sparse_hash_map, a hash function should be provided. Even though a the hash function can be provided via the HashFcn template parameter, we recommend injecting a specialization of `std::hash` for the class into the "std" namespace. For example: + +```c++ +#include +#include +#include +#include "sparsepp.h" + +using std::string; + +struct Person +{ + bool operator==(const Person &o) const + { return _first == o._first && _last == o._last; } + + string _first; + string _last; +}; + +namespace std +{ + // inject specialization of std::hash for Person into namespace std + // ---------------------------------------------------------------- + template<> + struct hash + { + std::size_t operator()(Person const &p) const + { + std::size_t seed = 0; + spp::hash_combine(seed, p._first); + spp::hash_combine(seed, p._last); + return seed; + } + }; +} + +int main() +{ + // As we have defined a specialization of std::hash() for Person, + // we can now create sparse_hash_set or sparse_hash_map of Persons + // ---------------------------------------------------------------- + spp::sparse_hash_set persons = { { "John", "Galt" }, + { "Jane", "Doe" } }; + for (auto& p: persons) + std::cout << p._first << ' ' << p._last << '\n'; +} +``` + +The `std::hash` specialization for `Person` combines the hash values for both first and last name using the convenient spp::hash_combine function, and returns the combined hash value. + +spp::hash_combine is provided by the header `sparsepp.h`. However, class definitions often appear in header files, and it is desirable to limit the size of headers included in such header files, so we provide the very small header `spp_utils.h` for that purpose: + +```c++ +#include +#include "spp_utils.h" + +using std::string; + +struct Person +{ + bool operator==(const Person &o) const + { + return _first == o._first && _last == o._last && _age == o._age; + } + + string _first; + string _last; + int _age; +}; + +namespace std +{ + // inject specialization of std::hash for Person into namespace std + // ---------------------------------------------------------------- + template<> + struct hash + { + std::size_t operator()(Person const &p) const + { + std::size_t seed = 0; + spp::hash_combine(seed, p._first); + spp::hash_combine(seed, p._last); + spp::hash_combine(seed, p._age); + return seed; + } + }; +} +``` + +## Example 3 - serialization + +sparse_hash_set and sparse_hash_map can easily be serialized/unserialized to a file or network connection. +This support is implemented in the following APIs: + +```c++ + template + bool serialize(Serializer serializer, OUTPUT *stream); + + template + bool unserialize(Serializer serializer, INPUT *stream); +``` + +The following example demontrates how a simple sparse_hash_map can be written to a file, and then read back. The serializer we use read and writes to a file using the stdio APIs, but it would be equally simple to write a serialized using the stream APIS: + +```c++ +#include + +#include "sparsepp.h" + +using spp::sparse_hash_map; +using namespace std; + +class FileSerializer +{ +public: + // serialize basic types to FILE + // ----------------------------- + template + bool operator()(FILE *fp, const T& value) + { + return fwrite((const void *)&value, sizeof(value), 1, fp) == 1; + } + + template + bool operator()(FILE *fp, T* value) + { + return fread((void *)value, sizeof(*value), 1, fp) == 1; + } + + // serialize std::string to FILE + // ----------------------------- + bool operator()(FILE *fp, const string& value) + { + const size_t size = value.size(); + return (*this)(fp, size) && fwrite(value.c_str(), size, 1, fp) == 1; + } + + bool operator()(FILE *fp, string* value) + { + size_t size; + if (!(*this)(fp, &size)) + return false; + char* buf = new char[size]; + if (fread(buf, size, 1, fp) != 1) + { + delete [] buf; + return false; + } + new (value) string(buf, (size_t)size); + delete[] buf; + return true; + } + + // serialize std::pair to FILE - needed for maps + // --------------------------------------------------------- + template + bool operator()(FILE *fp, const std::pair& value) + { + return (*this)(fp, value.first) && (*this)(fp, value.second); + } + + template + bool operator()(FILE *fp, std::pair *value) + { + return (*this)(fp, (A *)&value->first) && (*this)(fp, &value->second); + } +}; + +int main(int argc, char* argv[]) +{ + sparse_hash_map age{ { "John", 12 }, {"Jane", 13 }, { "Fred", 8 } }; + + // serialize age hash_map to "ages.dmp" file + FILE *out = fopen("ages.dmp", "wb"); + age.serialize(FileSerializer(), out); + fclose(out); + + sparse_hash_map age_read; + + // read from "ages.dmp" file into age_read hash_map + FILE *input = fopen("ages.dmp", "rb"); + age_read.unserialize(FileSerializer(), input); + fclose(input); + + // print out contents of age_read to verify correct serialization + for (auto& v : age_read) + printf("age_read: %s -> %d\n", v.first.c_str(), v.second); +} +``` + + + diff --git a/resources/3rdparty/sparsepp/bench.md b/resources/3rdparty/sparsepp/bench.md new file mode 100644 index 000000000..779f25c19 --- /dev/null +++ b/resources/3rdparty/sparsepp/bench.md @@ -0,0 +1,221 @@ +# Improving on Google's excellent Sparsehash + +[tl;dr] + + 1. Looking for a great hash map + 2. Google Sparsehash: brilliant idea, sparse version a bit slow and dated + 3. Introducing Sparsepp: fast, memory efficient, C++11, single header + + +### Hashtables, sparse and dense, maps and btrees - Memory usage + +First, let's compare two separate versions of std::unordered_map, from Boost and g++ (the test was done using Boost version 1.55 and g++ version 4.8.4 on Ubuntu 14.02, running on a VM with 5.8GB of total memory space (under VirtualBox), 5.7GB free before benchmarks. For all tests that follow, hash entries were inserted in an initially empty, default sized container, without calling resize to preallocate the necessary memory. The code for the benchmarks is listed at the end of this article. + +The graph below shows the memory usage when inserting `std::pair` into the unordered_map. + +![unordered_map memory usage](https://github.com/greg7mdp2/img/blob/master/sparsepp/umap_mem.PNG?raw=true) + +With this test, we see that Boost's implementation uses significantly more memory that the g++ version, and indeed it is unable to insert 100 Million entries into the map without running out of memory. Since the pairs we insert into the map are 16 bytes each, the minimum expected memory usage would be 1.6 GB. We see on the graph that g++ needs just a hair over 4 GB. + +Now, let's add to the comparison Google's offerings: the [Sparsehash](https://github.com/sparsehash/sparsehash) and [cpp-btree](https://code.google.com/archive/p/cpp-btree/) libraries. + +The [Sparsehash](https://github.com/sparsehash/sparsehash) library is a header-only library, open-sourced by Google in 2005. It offers two separate hash map implementations with very different performance characteristics. sparse_hash_map is designed to use as little memory as possible, at the expense of speed if necessary. dense_hash_map is extremely fast, but gobbles memory. + +The [cpp-btree](https://code.google.com/archive/p/cpp-btree/) library was open-sourced by Google in 2013. Although not a hash map, but a map storing ordered elements, I thought it would be interesting to include it because it claims low memory usage, and good performance thanks to cache friendliness. + + +![Google memory usage](https://github.com/greg7mdp2/img/blob/master/sparsepp/goog_mem.PNG?raw=true) + +So what do we see here? + +Google's dense_hash_map (blue curve) was doing well below 60 Million entries, using an amount of memory somewhat in between the Boost and g++ unordered_map implementations, but ran out of memory when trying to insert 70 Million entries. + +This is easily understood because Google's dense_hash_map stores all the entries in a large contiguous array, and resizes the array by doubling its size when the array is 50% full. + +For 40M to 60M elements, the dense_hash_map used 2GB. Indeed, 60M entries, each 16 byte in size, would occupy 960Mb. So 70M entries would require over 50% of 2GB, causing a resize to 4Gb. And when the resize occurs, a total of 6GB is allocated, as the entries have to be transferred from the 2GB array to the 4GB array. + +So we see that the dense_hash_map has pretty dramatic spikes in memory usage when resizing, equal to six times the space required for the actual entries. For big data cases, and unless the final size of the container can be accurately predicted and the container sized appropriately, the memory demands of dense_hash_map when resizing may remove it from consideration for many applications. + +On the contrary, both Google's sparse_hash_map and btree_map (from cpp-btree) have excellent memory usage characteristics. The sparse_hash_map, as promised, has a very small overhead (it uses about 1.9GB, just 18% higher than the theorical minimum 1.6GB). The btree_map uses a little over 2GB, still excellent. + +Interestingly, the memory usage of both sparse_hash_map and btree_map increases regularly without significant spikes when resizing, which allows them to grow gracefully and use all the memory available. + +The two std::unordered_map implementations do have memory usage spikes when resizing, but less drastic than Google's dense_hash_map. This is because std::unordered_map implementations typically implement the hash map as an array of buckets (all entries with the same hash value go into the same bucket), and buckets store entries into something equivalent to a std::forward_list. So, when resizing, while the bucket array has to be reallocated, the actual map entries can be moved to their new bucket without requiring extra memory. + + +### What about performance? + +Memory usage is one thing, but we also need efficient containers allowing fast insertion and lookup. We ran series of three benchmarks, still using the same `std::pair` value_type. While randomized, the sequence of keys was the same for each container tested. + +1. Random Insert: we measured the time needed to insert N entries into an initially empty, default sized container, without calling resize to preallocate the necessary memory. Keys were inserted in random order, i.e. the integer keys were not sorted. + API used: `pair insert(const value_type& val);` + + + ![insert](https://github.com/greg7mdp2/img/blob/master/sparsepp/goog_insert.PNG?raw=true) + + +2. Random Lookup: we measured the time needed to retrieve N entries known to be present in the array, plus N entries with only a 10% probablility to be present. + API used: `iterator find(const key_type& k);` + + + ![lookup](https://github.com/greg7mdp2/img/blob/master/sparsepp/goog_lookup.PNG?raw=true) + + +3. Delete: we measured the time needed to delete the N entries known to be present in the array. Entries had been inserted in random order, and are deleted in a different random order. + API used: `size_type erase(const key_type& k);` + + ![delete](https://github.com/greg7mdp2/img/blob/master/sparsepp/goog_delete.PNG?raw=true) + + +What can we conclude from these tests? Here are my observations: + +- Until it runs out of memory at 60M entries, Google's dense_hash_map is consistently the fastest, often by a quite significant margin. + +- Both Boost and g++ unordered_maps have very similar performance, which I would qualify as average amoung the alternatives tested. + +- Google's btree_map (from cpp-btree) does not perform very well on these tests (where the fact that it maintains the ordering of entries is not used). While it is competitive with the sparse_hash_map for insertion, the lookup time is typically at least 3 time higher than the slowest hash map, and deletion is slower as well. This is not unexpected as the btree complexity on insert, lookup and delete is O(log n). + +- Google's sparse_hash_map is very competitive, as fast as the std::unordered_maps on lookup, faster at deletion, but slower on insert. Considering its excellent memory usage characteristics, I would say it is the best compromise. + +So, if we are looking for a non-ordered associative container on linux[^1], I think the two Google offerings are most attractive: + +- Google's dense_hash_map: **extremely fast, very high memory requirement** (unless the maximum numbers of entries is known in advance, and is not just a little bit greater than a power of two). + +- Google's sparse_hash_map: **very memory efficient, fast lookup and deletion**, however slower that std::unordered_map on insertion. + + +### Introducing [Sparsepp](https://github.com/greg7mdp/sparsepp) + +At this point, I started wondering about the large speed difference between the sparse and dense hash_maps by Google. Could that performance gap be reduced somewhat? After all, both use [open adressing](https://en.wikipedia.org/wiki/Open_addressing) with internal [quadratic probing](https://en.wikipedia.org/wiki/Quadratic_probing). + +I was also intrigued by the remarkable sparse_hash_map memory efficiency, and wanted to fully understand its implementation, which is based on a [sparsetable](http://htmlpreview.github.io/?https://github.com/sparsehash/sparsehash/blob/master/doc/implementation.html). + +As I read the code and followed it under the debugger, I started having ideas on how to speed-up the sparse_hash_map, without significantly increasing the memory usage. That little game was addictive, and I found myself trying various ideas: some which provided significant benefits, and some that didn't pan out. + +Regardless, after a few months of work on evenings and week-ends, I am proud to present [Sparsepp](https://github.com/greg7mdp/sparsepp), a heavily modified version of Google's sparse_hash_map which offers significant performance improvements, while maintaining a a very low memory profile. + +The graphs below show the relative performance (purple line) of the [Sparsepp](https://github.com/greg7mdp/sparsepp) sparse_hash_map compared to the other implementations: + +`Note: "Sparse++" in the graphs legend is actually "Sparsepp".` + +1. Random Insert: [Sparsepp](https://github.com/greg7mdp/sparsepp), while still slower than the dense_hash_map, is significantly faster than the original sparse_hash_map and the btree_map, and as fast as the two std::unordered_map implementations. + + ![insert](https://github.com/greg7mdp2/img/blob/master/sparsepp/spp_insert.PNG?raw=true) + +2. Random Lookup (find): [Sparsepp](https://github.com/greg7mdp/sparsepp) is faster than all other alternatives, except for dense_hash_map. + + ![lookup](https://github.com/greg7mdp2/img/blob/master/sparsepp/spp_lookup.PNG?raw=true) + +3. Delete (erase): [Sparsepp](https://github.com/greg7mdp/sparsepp) is again doing very well, outperformed only by dense_hash_map. We should note that unlike the original sparse_hash_map, [Sparsepp](https://github.com/greg7mdp/sparsepp)'s sparse_hash_map does release the memory on erase, instead of just overwriting the memory with the deleted key value. Indeed, the non-standard APIs set_deleted_key() and set_empty_key(), while still present for compatibility with the original sparse_hash_map, are no longer necessary or useful. + + ![delete](https://github.com/greg7mdp2/img/blob/master/sparsepp/spp_delete.PNG?raw=true) + + +Looks good, but what is the cost of using [Sparsepp](https://github.com/greg7mdp/sparsepp) versus the original sparse_hash_map in memory usage: + + ![delete](https://github.com/greg7mdp2/img/blob/master/sparsepp/spp_mem.PNG?raw=true) + +Not bad! While [Sparsepp](https://github.com/greg7mdp/sparsepp) memory usage is a little bit higher than the original sparse_hash_map, it is still memory friendly, and there are no memory spikes when the map resizes. We can see that when moving from 60M entries to 70M entries, both Google's dense and [Sparsepp](https://github.com/greg7mdp/sparsepp) hash_maps needed a resize to accomodate the 70M elements. The resize proved fatal for the dense_hash_map, who could not allocate the 6GB needed for the resize + copy, while the [Sparsepp](https://github.com/greg7mdp/sparsepp) sparse_hash_map had no problem. + +In order to validate that the sparse hash tables can indeed grow to accomodate many more entries than regular hash tables, we decided to run a test that would gounsert items until all tables run out of memory, the result of which is presented in the two graphs below: + + ![SPP_ALLOC_SZ_0](https://github.com/greg7mdp2/img/blob/master/sparsepp/insert_large_0.PNG?raw=true) + +The table below display the maximum number of entries that could be added to each map before it ran out of memory. As a reminder, the VM had 5.7GB free before each test, and each entry is 16 bytes. + + Max entries | Google's dense_hash | Boost unordered | g++ unordered | Google's btree_map | Sparsepp | Google's sparse_hash +------------ | -------------- | --------------- | -------------- | --------- | -------- | --------------- + in millions | 60 M | 80 M | 120 M | 220 M | 220 M | 240 M + + +Both sparse hash implementations, as well as Google's btree_map are significantly more memory efficient than the classic unordered_maps. They are also significantly slower that [Sparsepp](https://github.com/greg7mdp/sparsepp). + +If we are willing to sacrifice a little bit of insertion performance for improved memory efficiency, it is easily done with a simple change in [Sparsepp](https://github.com/greg7mdp/sparsepp) header file `sparsepp.h`. Just change: + +`#define SPP_ALLOC_SZ 0` + +to + +`#define SPP_ALLOC_SZ 1` + +With this change, we get the graphs below: + + ![SPP_ALLOC_SZ_1](https://github.com/greg7mdp2/img/blob/master/sparsepp/insert_large_1.PNG?raw=true) + +Now the memory usage of [Sparsepp](https://github.com/greg7mdp/sparsepp) is reduced to just a little bit more than Google's sparse_hash_map, and both sparse map implementations are able to insert 240 Million entries, but choke at 260 Million. [Sparsepp](https://github.com/greg7mdp/sparsepp) is a little bit slower on insert, but still significantly faster than Google's sparse_hash_map. Lookup performance (not graphed) is unchanged. + +To conclude, we feel that [Sparsepp](https://github.com/greg7mdp/sparsepp) provides an unusual combination of performance and memory economy, and will be a useful addition to every developer toolkit. + +Here are some other features of [Sparsepp](https://github.com/greg7mdp/sparsepp) you may find attractive: + +* Single header implementation: the full [Sparsepp](https://github.com/greg7mdp/sparsepp) resides in the single header file sparsepp.h. Just copy this file in your project, #include it, and you are good to go. + +* C++11 compatible: move semantics, cbegin()/cend(), stateful allocators + + +### Benchmarks code + +```c++ +template +void _fill(vector &v) +{ + srand(1); // for a fair/deterministic comparison + for (size_t i = 0, sz = v.size(); i < sz; ++i) + v[i] = (T)(i * 10 + rand() % 10); +} + +template +void _shuffle(vector &v) +{ + for (size_t n = v.size(); n >= 2; --n) + std::swap(v[n - 1], v[static_cast(rand()) % n]); +} + +template +double _fill_random(vector &v, HT &hash) +{ + _fill(v); + _shuffle(v); + + double start_time = get_time(); + + for (size_t i = 0, sz = v.size(); i < sz; ++i) + hash.insert(typename HT::value_type(v[i], 0)); + return start_time; +} + +template +double _lookup(vector &v, HT &hash, size_t &num_present) +{ + _fill_random(v, hash); + + num_present = 0; + size_t max_val = v.size() * 10; + double start_time = get_time(); + + for (size_t i = 0, sz = v.size(); i < sz; ++i) + { + num_present += (size_t)(hash.find(v[i]) != hash.end()); + num_present += (size_t)(hash.find((T)(rand() % max_val)) != hash.end()); + } + return start_time; +} + +template +double _delete(vector &v, HT &hash) +{ + _fill_random(v, hash); + _shuffle(v); // don't delete in insertion order + + double start_time = get_time(); + + for(size_t i = 0, sz = v.size(); i < sz; ++i) + hash.erase(v[i]); + return start_time; +} +``` + +[^1]: Google's hash maps were most likely developed on linux/g++. When built on Windows with Visual Studio (2015), the lookup of items is very slow, to the point that the sparse hashtable is not much faster than Google's btree_map (from the cpp-btree library). The reason for this poor performance is that Google's implementation uses by default the hash functions provided by the compiler, and those provided by the Visual C++ compiler turn out to be very inefficient. If you are using Google's sparse_hash_map on Windows, you can look forward to an even greater performance increase when switching to [Sparsepp](https://github.com/greg7mdp/sparsepp). + + + diff --git a/resources/3rdparty/sparsepp/docs/.gitignore b/resources/3rdparty/sparsepp/docs/.gitignore new file mode 100644 index 000000000..cd2946ad7 --- /dev/null +++ b/resources/3rdparty/sparsepp/docs/.gitignore @@ -0,0 +1,47 @@ +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# ========================= +# Operating System Files +# ========================= + +# OSX +# ========================= + +.DS_Store +.AppleDouble +.LSOverride + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk diff --git a/resources/3rdparty/sparsepp/makefile b/resources/3rdparty/sparsepp/makefile new file mode 100644 index 000000000..3443f0e27 --- /dev/null +++ b/resources/3rdparty/sparsepp/makefile @@ -0,0 +1,11 @@ +all: spp_test + +clean: + /bin/rm spp_test + +test: + ./spp_test + +spp_test: spp_test.cc sparsepp.h makefile + $(CXX) -O2 -std=c++0x -D_CRT_SECURE_NO_WARNINGS spp_test.cc -o spp_test + diff --git a/resources/3rdparty/sparsepp/sparsepp.h b/resources/3rdparty/sparsepp/sparsepp.h new file mode 100644 index 000000000..5706adb0d --- /dev/null +++ b/resources/3rdparty/sparsepp/sparsepp.h @@ -0,0 +1,5626 @@ +#if !defined(sparsepp_h_guard_) +#define sparsepp_h_guard_ + + +// ---------------------------------------------------------------------- +// Copyright (c) 2016, Gregory Popovitch - greg7mdp@gmail.com +// All rights reserved. +// +// This work is derived from Google's sparsehash library +// +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ---------------------------------------------------------------------- + + +// --------------------------------------------------------------------------- +// Compiler detection code (SPP_ proprocessor macros) derived from Boost +// libraries. Therefore Boost software licence reproduced below. +// --------------------------------------------------------------------------- +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// --------------------------------------------------------------------------- + + +// some macros for portability +// --------------------------- +#define spp_ spp +#define SPP_NAMESPACE spp_ +#define SPP_START_NAMESPACE namespace spp { +#define SPP_END_NAMESPACE } +#define SPP_GROUP_SIZE 32 // must be 32 or 64 +#define SPP_ALLOC_SZ 0 // must be power of 2 (0 = agressive alloc, 1 = smallest memory usage, 2 = good compromise) +#define SPP_STORE_NUM_ITEMS 1 // little bit more memory, but faster!! + +#if (SPP_GROUP_SIZE == 32) + #define SPP_SHIFT_ 5 + #define SPP_MASK_ 0x1F +#elif (SPP_GROUP_SIZE == 64) + #define SPP_SHIFT_ 6 + #define SPP_MASK_ 0x3F +#else + #error "SPP_GROUP_SIZE must be either 32 or 64" +#endif + +// Boost like configuration +// ------------------------ +#if defined __clang__ + + #if defined(i386) + #include + inline void spp_cpuid(int info[4], int InfoType) { + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); + } + #endif + + #define SPP_POPCNT __builtin_popcount + #define SPP_POPCNT64 __builtin_popcountll + + #define SPP_HAS_CSTDINT + + #ifndef __has_extension + #define __has_extension __has_feature + #endif + + #if !__has_feature(cxx_exceptions) && !defined(SPP_NO_EXCEPTIONS) + #define SPP_NO_EXCEPTIONS + #endif + + #if !__has_feature(cxx_rtti) && !defined(SPP_NO_RTTI) + #define SPP_NO_RTTI + #endif + + #if !__has_feature(cxx_rtti) && !defined(SPP_NO_TYPEID) + #define SPP_NO_TYPEID + #endif + + #if defined(__int64) && !defined(__GNUC__) + #define SPP_HAS_MS_INT64 + #endif + + #define SPP_HAS_NRVO + + // Branch prediction hints + #if defined(__has_builtin) + #if __has_builtin(__builtin_expect) + #define SPP_LIKELY(x) __builtin_expect(x, 1) + #define SPP_UNLIKELY(x) __builtin_expect(x, 0) + #endif + #endif + + // Clang supports "long long" in all compilation modes. + #define SPP_HAS_LONG_LONG + + #if !__has_feature(cxx_constexpr) + #define SPP_NO_CXX11_CONSTEXPR + #endif + + #if !__has_feature(cxx_decltype) + #define SPP_NO_CXX11_DECLTYPE + #endif + + #if !__has_feature(cxx_decltype_incomplete_return_types) + #define SPP_NO_CXX11_DECLTYPE_N3276 + #endif + + #if !__has_feature(cxx_defaulted_functions) + #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS + #endif + + #if !__has_feature(cxx_deleted_functions) + #define SPP_NO_CXX11_DELETED_FUNCTIONS + #endif + + #if !__has_feature(cxx_explicit_conversions) + #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS + #endif + + #if !__has_feature(cxx_default_function_template_args) + #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS + #endif + + #if !__has_feature(cxx_generalized_initializers) + #define SPP_NO_CXX11_HDR_INITIALIZER_LIST + #endif + + #if !__has_feature(cxx_lambdas) + #define SPP_NO_CXX11_LAMBDAS + #endif + + #if !__has_feature(cxx_local_type_template_args) + #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS + #endif + + #if !__has_feature(cxx_nullptr) + #define SPP_NO_CXX11_NULLPTR + #endif + + #if !__has_feature(cxx_range_for) + #define SPP_NO_CXX11_RANGE_BASED_FOR + #endif + + #if !__has_feature(cxx_raw_string_literals) + #define SPP_NO_CXX11_RAW_LITERALS + #endif + + #if !__has_feature(cxx_reference_qualified_functions) + #define SPP_NO_CXX11_REF_QUALIFIERS + #endif + + #if !__has_feature(cxx_generalized_initializers) + #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX + #endif + + #if !__has_feature(cxx_rvalue_references) + #define SPP_NO_CXX11_RVALUE_REFERENCES + #endif + + #if !__has_feature(cxx_strong_enums) + #define SPP_NO_CXX11_SCOPED_ENUMS + #endif + + #if !__has_feature(cxx_static_assert) + #define SPP_NO_CXX11_STATIC_ASSERT + #endif + + #if !__has_feature(cxx_alias_templates) + #define SPP_NO_CXX11_TEMPLATE_ALIASES + #endif + + #if !__has_feature(cxx_unicode_literals) + #define SPP_NO_CXX11_UNICODE_LITERALS + #endif + + #if !__has_feature(cxx_variadic_templates) + #define SPP_NO_CXX11_VARIADIC_TEMPLATES + #endif + + #if !__has_feature(cxx_user_literals) + #define SPP_NO_CXX11_USER_DEFINED_LITERALS + #endif + + #if !__has_feature(cxx_alignas) + #define SPP_NO_CXX11_ALIGNAS + #endif + + #if !__has_feature(cxx_trailing_return) + #define SPP_NO_CXX11_TRAILING_RESULT_TYPES + #endif + + #if !__has_feature(cxx_inline_namespaces) + #define SPP_NO_CXX11_INLINE_NAMESPACES + #endif + + #if !__has_feature(cxx_override_control) + #define SPP_NO_CXX11_FINAL + #endif + + #if !(__has_feature(__cxx_binary_literals__) || __has_extension(__cxx_binary_literals__)) + #define SPP_NO_CXX14_BINARY_LITERALS + #endif + + #if !__has_feature(__cxx_decltype_auto__) + #define SPP_NO_CXX14_DECLTYPE_AUTO + #endif + + #if !__has_feature(__cxx_aggregate_nsdmi__) + #define SPP_NO_CXX14_AGGREGATE_NSDMI + #endif + + #if !__has_feature(__cxx_init_captures__) + #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES + #endif + + #if !__has_feature(__cxx_generic_lambdas__) + #define SPP_NO_CXX14_GENERIC_LAMBDAS + #endif + + + #if !__has_feature(__cxx_generic_lambdas__) || !__has_feature(__cxx_relaxed_constexpr__) + #define SPP_NO_CXX14_CONSTEXPR + #endif + + #if !__has_feature(__cxx_return_type_deduction__) + #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION + #endif + + #if !__has_feature(__cxx_variable_templates__) + #define SPP_NO_CXX14_VARIABLE_TEMPLATES + #endif + + #if __cplusplus < 201400 + #define SPP_NO_CXX14_DIGIT_SEPARATORS + #endif + + #if defined(__has_builtin) && __has_builtin(__builtin_unreachable) + #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable(); + #endif + + #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__)) + + #ifndef SPP_COMPILER + #define SPP_COMPILER "Clang version " __clang_version__ + #endif + + #define SPP_CLANG 1 + + +#elif defined __GNUC__ + + #define SPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + + // definition to expand macro then apply to pragma message + // #define VALUE_TO_STRING(x) #x + // #define VALUE(x) VALUE_TO_STRING(x) + // #define VAR_NAME_VALUE(var) #var "=" VALUE(var) + // #pragma message(VAR_NAME_VALUE(SPP_GCC_VERSION)) + + #if defined(i386) + #include + inline void spp_cpuid(int info[4], int InfoType) { + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); + } + #endif + + // __POPCNT__ defined when the compiled with popcount support + // (-mpopcnt compiler option is given for example) + #ifdef __POPCNT__ + // slower unless compiled iwith -mpopcnt + #define SPP_POPCNT __builtin_popcount + #define SPP_POPCNT64 __builtin_popcountll + #endif + + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) + #define SPP_GCC_CXX11 + #endif + + #if __GNUC__ == 3 + #if defined (__PATHSCALE__) + #define SPP_NO_TWO_PHASE_NAME_LOOKUP + #define SPP_NO_IS_ABSTRACT + #endif + + #if __GNUC_MINOR__ < 4 + #define SPP_NO_IS_ABSTRACT + #endif + + #define SPP_NO_CXX11_EXTERN_TEMPLATE + #endif + + #if __GNUC__ < 4 + // + // All problems to gcc-3.x and earlier here: + // + #define SPP_NO_TWO_PHASE_NAME_LOOKUP + #ifdef __OPEN64__ + #define SPP_NO_IS_ABSTRACT + #endif + #endif + + // GCC prior to 3.4 had #pragma once too but it didn't work well with filesystem links + #if SPP_GCC_VERSION >= 30400 + #define SPP_HAS_PRAGMA_ONCE + #endif + + #if SPP_GCC_VERSION < 40400 + // Previous versions of GCC did not completely implement value-initialization: + // GCC Bug 30111, "Value-initialization of POD base class doesn't initialize + // members", reported by Jonathan Wakely in 2006, + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30111 (fixed for GCC 4.4) + // GCC Bug 33916, "Default constructor fails to initialize array members", + // reported by Michael Elizabeth Chastain in 2007, + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33916 (fixed for GCC 4.2.4) + // See also: http://www.boost.org/libs/utility/value_init.htm #compiler_issues + #define SPP_NO_COMPLETE_VALUE_INITIALIZATION + #endif + + #if !defined(__EXCEPTIONS) && !defined(SPP_NO_EXCEPTIONS) + #define SPP_NO_EXCEPTIONS + #endif + + // + // Threading support: Turn this on unconditionally here (except for + // those platforms where we can know for sure). It will get turned off again + // later if no threading API is detected. + // + #if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__) + #define SPP_HAS_THREADS + #endif + + // + // gcc has "long long" + // Except on Darwin with standard compliance enabled (-pedantic) + // Apple gcc helpfully defines this macro we can query + // + #if !defined(__DARWIN_NO_LONG_LONG) + #define SPP_HAS_LONG_LONG + #endif + + // + // gcc implements the named return value optimization since version 3.1 + // + #define SPP_HAS_NRVO + + // Branch prediction hints + #define SPP_LIKELY(x) __builtin_expect(x, 1) + #define SPP_UNLIKELY(x) __builtin_expect(x, 0) + + // + // Dynamic shared object (DSO) and dynamic-link library (DLL) support + // + #if __GNUC__ >= 4 + #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && !defined(__CYGWIN__) + // All Win32 development environments, including 64-bit Windows and MinGW, define + // _WIN32 or one of its variant spellings. Note that Cygwin is a POSIX environment, + // so does not define _WIN32 or its variants. + #define SPP_HAS_DECLSPEC + #define SPP_SYMBOL_EXPORT __attribute__((__dllexport__)) + #define SPP_SYMBOL_IMPORT __attribute__((__dllimport__)) + #else + #define SPP_SYMBOL_EXPORT __attribute__((__visibility__("default"))) + #define SPP_SYMBOL_IMPORT + #endif + + #define SPP_SYMBOL_VISIBLE __attribute__((__visibility__("default"))) + #else + // config/platform/win32.hpp will define SPP_SYMBOL_EXPORT, etc., unless already defined + #define SPP_SYMBOL_EXPORT + #endif + + // + // RTTI and typeinfo detection is possible post gcc-4.3: + // + #if SPP_GCC_VERSION > 40300 + #ifndef __GXX_RTTI + #ifndef SPP_NO_TYPEID + #define SPP_NO_TYPEID + #endif + #ifndef SPP_NO_RTTI + #define SPP_NO_RTTI + #endif + #endif + #endif + + // + // Recent GCC versions have __int128 when in 64-bit mode. + // + // We disable this if the compiler is really nvcc with C++03 as it + // doesn't actually support __int128 as of CUDA_VERSION=7500 + // even though it defines __SIZEOF_INT128__. + // See https://svn.boost.org/trac/boost/ticket/8048 + // https://svn.boost.org/trac/boost/ticket/11852 + // Only re-enable this for nvcc if you're absolutely sure + // of the circumstances under which it's supported: + // + #if defined(__CUDACC__) + #if defined(SPP_GCC_CXX11) + #define SPP_NVCC_CXX11 + #else + #define SPP_NVCC_CXX03 + #endif + #endif + + #if defined(__SIZEOF_INT128__) && !defined(SPP_NVCC_CXX03) + #define SPP_HAS_INT128 + #endif + // + // Recent GCC versions have a __float128 native type, we need to + // include a std lib header to detect this - not ideal, but we'll + // be including later anyway when we select the std lib. + // + // Nevertheless, as of CUDA 7.5, using __float128 with the host + // compiler in pre-C++11 mode is still not supported. + // See https://svn.boost.org/trac/boost/ticket/11852 + // + #ifdef __cplusplus + #include + #else + #include + #endif + + #if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__) && !defined(SPP_NVCC_CXX03) + #define SPP_HAS_FLOAT128 + #endif + + // C++0x features in 4.3.n and later + // + #if (SPP_GCC_VERSION >= 40300) && defined(SPP_GCC_CXX11) + // C++0x features are only enabled when -std=c++0x or -std=gnu++0x are + // passed on the command line, which in turn defines + // __GXX_EXPERIMENTAL_CXX0X__. + #define SPP_HAS_DECLTYPE + #define SPP_HAS_RVALUE_REFS + #define SPP_HAS_STATIC_ASSERT + #define SPP_HAS_VARIADIC_TMPL + #define SPP_HAS_CSTDINT + #else + #define SPP_NO_CXX11_DECLTYPE + #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS + #define SPP_NO_CXX11_RVALUE_REFERENCES + #define SPP_NO_CXX11_STATIC_ASSERT + #endif + + // C++0x features in 4.4.n and later + // + #if (SPP_GCC_VERSION < 40400) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_AUTO_DECLARATIONS + #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS + #define SPP_NO_CXX11_CHAR16_T + #define SPP_NO_CXX11_CHAR32_T + #define SPP_NO_CXX11_HDR_INITIALIZER_LIST + #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS + #define SPP_NO_CXX11_DELETED_FUNCTIONS + #define SPP_NO_CXX11_TRAILING_RESULT_TYPES + #define SPP_NO_CXX11_INLINE_NAMESPACES + #define SPP_NO_CXX11_VARIADIC_TEMPLATES + #endif + + #if SPP_GCC_VERSION < 40500 + #define SPP_NO_SFINAE_EXPR + #endif + + // GCC 4.5 forbids declaration of defaulted functions in private or protected sections + #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 5) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_NON_PUBLIC_DEFAULTED_FUNCTIONS + #endif + + // C++0x features in 4.5.0 and later + // + #if (SPP_GCC_VERSION < 40500) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS + #define SPP_NO_CXX11_LAMBDAS + #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS + #define SPP_NO_CXX11_RAW_LITERALS + #define SPP_NO_CXX11_UNICODE_LITERALS + #endif + + // C++0x features in 4.5.1 and later + // + #if (SPP_GCC_VERSION < 40501) || !defined(SPP_GCC_CXX11) + // scoped enums have a serious bug in 4.4.0, so define SPP_NO_CXX11_SCOPED_ENUMS before 4.5.1 + // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38064 + #define SPP_NO_CXX11_SCOPED_ENUMS + #endif + + // C++0x features in 4.6.n and later + // + #if (SPP_GCC_VERSION < 40600) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_CONSTEXPR + #define SPP_NO_CXX11_NULLPTR + #define SPP_NO_CXX11_RANGE_BASED_FOR + #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX + #endif + + // C++0x features in 4.7.n and later + // + #if (SPP_GCC_VERSION < 40700) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_FINAL + #define SPP_NO_CXX11_TEMPLATE_ALIASES + #define SPP_NO_CXX11_USER_DEFINED_LITERALS + #define SPP_NO_CXX11_FIXED_LENGTH_VARIADIC_TEMPLATE_EXPANSION_PACKS + #endif + + // C++0x features in 4.8.n and later + // + #if (SPP_GCC_VERSION < 40800) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_ALIGNAS + #endif + + // C++0x features in 4.8.1 and later + // + #if (SPP_GCC_VERSION < 40801) || !defined(SPP_GCC_CXX11) + #define SPP_NO_CXX11_DECLTYPE_N3276 + #define SPP_NO_CXX11_REF_QUALIFIERS + #define SPP_NO_CXX14_BINARY_LITERALS + #endif + + // C++14 features in 4.9.0 and later + // + #if (SPP_GCC_VERSION < 40900) || (__cplusplus < 201300) + #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION + #define SPP_NO_CXX14_GENERIC_LAMBDAS + #define SPP_NO_CXX14_DIGIT_SEPARATORS + #define SPP_NO_CXX14_DECLTYPE_AUTO + #if !((SPP_GCC_VERSION >= 40801) && (SPP_GCC_VERSION < 40900) && defined(SPP_GCC_CXX11)) + #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES + #endif + #endif + + + // C++ 14: + #if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304) + #define SPP_NO_CXX14_AGGREGATE_NSDMI + #endif + #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304) + #define SPP_NO_CXX14_CONSTEXPR + #endif + #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304) + #define SPP_NO_CXX14_VARIABLE_TEMPLATES + #endif + + // + // Unused attribute: + #if __GNUC__ >= 4 + #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__)) + #endif + // + // __builtin_unreachable: + #if SPP_GCC_VERSION >= 40800 + #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable(); + #endif + + #ifndef SPP_COMPILER + #define SPP_COMPILER "GNU C++ version " __VERSION__ + #endif + + // ConceptGCC compiler: + // http://www.generic-programming.org/software/ConceptGCC/ + #ifdef __GXX_CONCEPTS__ + #define SPP_HAS_CONCEPTS + #define SPP_COMPILER "ConceptGCC version " __VERSION__ + #endif + + +#elif defined _MSC_VER + + #include // for __popcnt() + + #define SPP_POPCNT_CHECK // slower when defined, but we have to check! + #define spp_cpuid(info, x) __cpuid(info, x) + + #define SPP_POPCNT __popcnt + #if (SPP_GROUP_SIZE == 64 && INTPTR_MAX == INT64_MAX) + #define SPP_POPCNT64 __popcnt64 + #endif + + // Attempt to suppress VC6 warnings about the length of decorated names (obsolete): + #pragma warning( disable : 4503 ) // warning: decorated name length exceeded + + #define SPP_HAS_PRAGMA_ONCE + #define SPP_HAS_CSTDINT + + // + // versions check: + // we don't support Visual C++ prior to version 7.1: + #if _MSC_VER < 1310 + #error "Antique compiler not supported" + #endif + + #if _MSC_FULL_VER < 180020827 + #define SPP_NO_FENV_H + #endif + + #if _MSC_VER < 1400 + // although a conforming signature for swprint exists in VC7.1 + // it appears not to actually work: + #define SPP_NO_SWPRINTF + + // Our extern template tests also fail for this compiler: + #define SPP_NO_CXX11_EXTERN_TEMPLATE + + // Variadic macros do not exist for VC7.1 and lower + #define SPP_NO_CXX11_VARIADIC_MACROS + #endif + + #if _MSC_VER < 1500 // 140X == VC++ 8.0 + #undef SPP_HAS_CSTDINT + #define SPP_NO_MEMBER_TEMPLATE_FRIENDS + #endif + + #if _MSC_VER < 1600 // 150X == VC++ 9.0 + // A bug in VC9: + #define SPP_NO_ADL_BARRIER + #endif + + + // MSVC (including the latest checked version) has not yet completely + // implemented value-initialization, as is reported: + // "VC++ does not value-initialize members of derived classes without + // user-declared constructor", reported in 2009 by Sylvester Hesp: + // https: //connect.microsoft.com/VisualStudio/feedback/details/484295 + // "Presence of copy constructor breaks member class initialization", + // reported in 2009 by Alex Vakulenko: + // https: //connect.microsoft.com/VisualStudio/feedback/details/499606 + // "Value-initialization in new-expression", reported in 2005 by + // Pavel Kuznetsov (MetaCommunications Engineering): + // https: //connect.microsoft.com/VisualStudio/feedback/details/100744 + // See also: http: //www.boost.org/libs/utility/value_init.htm #compiler_issues + // (Niels Dekker, LKEB, May 2010) + #define SPP_NO_COMPLETE_VALUE_INITIALIZATION + + #ifndef _NATIVE_WCHAR_T_DEFINED + #define SPP_NO_INTRINSIC_WCHAR_T + #endif + + // + // check for exception handling support: + #if !defined(_CPPUNWIND) && !defined(SPP_NO_EXCEPTIONS) + #define SPP_NO_EXCEPTIONS + #endif + + // + // __int64 support: + // + #define SPP_HAS_MS_INT64 + #if defined(_MSC_EXTENSIONS) || (_MSC_VER >= 1400) + #define SPP_HAS_LONG_LONG + #else + #define SPP_NO_LONG_LONG + #endif + + #if (_MSC_VER >= 1400) && !defined(_DEBUG) + #define SPP_HAS_NRVO + #endif + + #if _MSC_VER >= 1500 // 150X == VC++ 9.0 + #define SPP_HAS_PRAGMA_DETECT_MISMATCH + #endif + + // + // disable Win32 API's if compiler extensions are + // turned off: + // + #if !defined(_MSC_EXTENSIONS) && !defined(SPP_DISABLE_WIN32) + #define SPP_DISABLE_WIN32 + #endif + + #if !defined(_CPPRTTI) && !defined(SPP_NO_RTTI) + #define SPP_NO_RTTI + #endif + + // + // TR1 features: + // + #if _MSC_VER >= 1700 + // #define SPP_HAS_TR1_HASH // don't know if this is true yet. + // #define SPP_HAS_TR1_TYPE_TRAITS // don't know if this is true yet. + #define SPP_HAS_TR1_UNORDERED_MAP + #define SPP_HAS_TR1_UNORDERED_SET + #endif + + // + // C++0x features + // + // See above for SPP_NO_LONG_LONG + + // C++ features supported by VC++ 10 (aka 2010) + // + #if _MSC_VER < 1600 + #define SPP_NO_CXX11_AUTO_DECLARATIONS + #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS + #define SPP_NO_CXX11_LAMBDAS + #define SPP_NO_CXX11_RVALUE_REFERENCES + #define SPP_NO_CXX11_STATIC_ASSERT + #define SPP_NO_CXX11_NULLPTR + #define SPP_NO_CXX11_DECLTYPE + #endif // _MSC_VER < 1600 + + #if _MSC_VER >= 1600 + #define SPP_HAS_STDINT_H + #endif + + // C++11 features supported by VC++ 11 (aka 2012) + // + #if _MSC_VER < 1700 + #define SPP_NO_CXX11_FINAL + #define SPP_NO_CXX11_RANGE_BASED_FOR + #define SPP_NO_CXX11_SCOPED_ENUMS + #endif // _MSC_VER < 1700 + + // C++11 features supported by VC++ 12 (aka 2013). + // + #if _MSC_FULL_VER < 180020827 + #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS + #define SPP_NO_CXX11_DELETED_FUNCTIONS + #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS + #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS + #define SPP_NO_CXX11_RAW_LITERALS + #define SPP_NO_CXX11_TEMPLATE_ALIASES + #define SPP_NO_CXX11_TRAILING_RESULT_TYPES + #define SPP_NO_CXX11_VARIADIC_TEMPLATES + #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX + #define SPP_NO_CXX11_DECLTYPE_N3276 + #endif + + // C++11 features supported by VC++ 14 (aka 2014) CTP1 + #if (_MSC_FULL_VER < 190021730) + #define SPP_NO_CXX11_REF_QUALIFIERS + #define SPP_NO_CXX11_USER_DEFINED_LITERALS + #define SPP_NO_CXX11_ALIGNAS + #define SPP_NO_CXX11_INLINE_NAMESPACES + #define SPP_NO_CXX14_DECLTYPE_AUTO + #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES + #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION + #define SPP_NO_CXX11_HDR_INITIALIZER_LIST + #endif + + // C++11 features not supported by any versions + #define SPP_NO_CXX11_CHAR16_T + #define SPP_NO_CXX11_CHAR32_T + #define SPP_NO_CXX11_CONSTEXPR + #define SPP_NO_CXX11_UNICODE_LITERALS + #define SPP_NO_SFINAE_EXPR + #define SPP_NO_TWO_PHASE_NAME_LOOKUP + + // C++ 14: + #if !defined(__cpp_aggregate_nsdmi) || (__cpp_aggregate_nsdmi < 201304) + #define SPP_NO_CXX14_AGGREGATE_NSDMI + #endif + + #if !defined(__cpp_binary_literals) || (__cpp_binary_literals < 201304) + #define SPP_NO_CXX14_BINARY_LITERALS + #endif + + #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304) + #define SPP_NO_CXX14_CONSTEXPR + #endif + + #if (__cplusplus < 201304) // There's no SD6 check for this.... + #define SPP_NO_CXX14_DIGIT_SEPARATORS + #endif + + #if !defined(__cpp_generic_lambdas) || (__cpp_generic_lambdas < 201304) + #define SPP_NO_CXX14_GENERIC_LAMBDAS + #endif + + #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304) + #define SPP_NO_CXX14_VARIABLE_TEMPLATES + #endif + +#endif + +// from boost/config/suffix.hpp +// ---------------------------- +#ifndef SPP_ATTRIBUTE_UNUSED + #define SPP_ATTRIBUTE_UNUSED +#endif + +// includes +// -------- +#if defined(SPP_HAS_CSTDINT) && (__cplusplus >= 201103) + #include +#else + #if defined(__FreeBSD__) || defined(__IBMCPP__) || defined(_AIX) + #include + #else + #include + #endif +#endif + +#include +#include +#include +#include // for numeric_limits +#include // For swap(), eg +#include // for iterator tags +#include // for equal_to<>, select1st<>, std::unary_function, etc +#include // for alloc, uninitialized_copy, uninitialized_fill +#include // for malloc/realloc/free +#include // for ptrdiff_t +#include // for placement new +#include // For length_error +#include // for pair<> +#include +#include +#include + +#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST) + #include +#endif + +#if (SPP_GROUP_SIZE == 32) + typedef uint32_t group_bm_type; +#else + typedef uint64_t group_bm_type; +#endif + +template class HashObject; // for Google's benchmark, not in spp namespace! + +// ---------------------------------------------------------------------- +// H A S H F U N C T I O N S +// ---------------------------- +// +// Implements spp::spp_hash() and spp::hash_combine() +// +// This is exactly the content of spp_utils.h, except for the copyright +// attributions at the beginning +// +// WARNING: Any change here has to be duplicated in spp_utils.h. +// ---------------------------------------------------------------------- + +#if !defined(spp_utils_h_guard_) +#define spp_utils_h_guard_ + +#if defined(_MSC_VER) + #if (_MSC_VER >= 1600 ) // vs2010 (1900 is vs2015) + #include + #define SPP_HASH_CLASS std::hash + #else + #include + #define SPP_HASH_CLASS stdext::hash_compare + #endif + #if (_MSC_FULL_VER < 190021730) + #define SPP_NO_CXX11_NOEXCEPT + #endif +#elif defined(__GNUC__) + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) + #include + #define SPP_HASH_CLASS std::hash + + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600 + #define SPP_NO_CXX11_NOEXCEPT + #endif + #else + #include + #define SPP_HASH_CLASS std::tr1::hash + #define SPP_NO_CXX11_NOEXCEPT + #endif +#elif defined __clang__ + #include + #define SPP_HASH_CLASS std::hash + + #if !__has_feature(cxx_noexcept) + #define SPP_NO_CXX11_NOEXCEPT + #endif +#else + #include + #define SPP_HASH_CLASS std::hash +#endif + +#ifdef SPP_NO_CXX11_NOEXCEPT + #define SPP_NOEXCEPT +#else + #define SPP_NOEXCEPT noexcept +#endif + +#define SPP_INLINE + +#ifndef SPP_NAMESPACE + #define SPP_NAMESPACE spp +#endif + +namespace SPP_NAMESPACE +{ + +template +struct spp_hash +{ + SPP_INLINE size_t operator()(const T &__v) const SPP_NOEXCEPT + { + SPP_HASH_CLASS hasher; + return hasher(__v); + } +}; + +template +struct spp_hash +{ + static size_t spp_log2 (size_t val) SPP_NOEXCEPT + { + size_t res = 0; + while (val > 1) + { + val >>= 1; + res++; + } + return res; + } + + SPP_INLINE size_t operator()(const T *__v) const SPP_NOEXCEPT + { + static const size_t shift = spp_log2(1 + sizeof(T)); + return static_cast((*(reinterpret_cast(&__v))) >> shift); + } +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(bool __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(signed char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(wchar_t __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(short __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned short __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(int __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned int __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(long __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned long __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(float __v) const SPP_NOEXCEPT + { + // -0.0 and 0.0 should return same hash + uint32_t *as_int = reinterpret_cast(&__v); + return (__v == 0) ? static_cast(0) : static_cast(*as_int); + } +}; + +#if 0 +// todo: we should not ignore half of the double => see libcxx/include/functional +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(double __v) const SPP_NOEXCEPT + { + // -0.0 and 0.0 should return same hash + return (__v == 0) ? (size_t)0 : (size_t)*((uint64_t *)&__v); + } +}; +#endif + +template struct Combiner +{ + inline void operator()(T& seed, T value); +}; + +template struct Combiner +{ + inline void operator()(T& seed, T value) + { + seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } +}; + +template struct Combiner +{ + inline void operator()(T& seed, T value) + { + seed ^= value + T(0xc6a4a7935bd1e995) + (seed << 6) + (seed >> 2); + } +}; + +template +inline void hash_combine(std::size_t& seed, T const& v) +{ + spp::spp_hash hasher; + Combiner combiner; + + combiner(seed, hasher(v)); +} + +}; + +#endif // spp_utils_h_guard_ + +SPP_START_NAMESPACE + +// ---------------------------------------------------------------------- +// U T I L F U N C T I O N S +// ---------------------------------------------------------------------- +template +inline void throw_exception(const E& exception) +{ +#if !defined(SPP_NO_EXCEPTIONS) + throw exception; +#else + assert(0); + abort(); +#endif +} + +// ---------------------------------------------------------------------- +// M U T A B L E P A I R H A C K +// turn mutable std::pair into correct value_type std::pair +// ---------------------------------------------------------------------- +template +struct cvt +{ + typedef T type; +}; + +template +struct cvt > +{ + typedef std::pair type; +}; + +template +struct cvt > +{ + typedef const std::pair type; +}; + +// ---------------------------------------------------------------------- +// M O V E I T E R A T O R +// ---------------------------------------------------------------------- +#ifdef SPP_NO_CXX11_RVALUE_REFERENCES + #define MK_MOVE_IT(p) (p) +#else + #define MK_MOVE_IT(p) std::make_move_iterator(p) +#endif + + +// ---------------------------------------------------------------------- +// A L L O C A T O R S T U F F +// ---------------------------------------------------------------------- +template +class libc_allocator_with_realloc +{ +public: + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + libc_allocator_with_realloc() {} + libc_allocator_with_realloc(const libc_allocator_with_realloc& /*unused*/) {} + ~libc_allocator_with_realloc() {} + + pointer address(reference r) const { return &r; } + const_pointer address(const_reference r) const { return &r; } + + pointer allocate(size_type n, const_pointer /*unused*/= 0) + { + return static_cast(malloc(n * sizeof(value_type))); + } + + void deallocate(pointer p, size_type /*unused*/) + { + free(p); + } + + pointer reallocate(pointer p, size_type n) + { + return static_cast(realloc(p, n * sizeof(value_type))); + } + + size_type max_size() const + { + return static_cast(-1) / sizeof(value_type); + } + + void construct(pointer p, const value_type& val) + { + new(p) value_type(val); + } + + void destroy(pointer p) { p->~value_type(); } + + template + explicit libc_allocator_with_realloc(const libc_allocator_with_realloc& /*unused*/) {} + + template + struct rebind + { + typedef libc_allocator_with_realloc other; + }; +}; + +// ---------------------------------------------------------------------- +// libc_allocator_with_realloc specialization. +// ---------------------------------------------------------------------- +template<> +class libc_allocator_with_realloc +{ +public: + typedef void value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef void* pointer; + typedef const void* const_pointer; + + template + struct rebind + { + typedef libc_allocator_with_realloc other; + }; +}; + +template +inline bool operator==(const libc_allocator_with_realloc& /*unused*/, + const libc_allocator_with_realloc& /*unused*/) +{ + return true; +} + +template +inline bool operator!=(const libc_allocator_with_realloc& /*unused*/, + const libc_allocator_with_realloc& /*unused*/) +{ + return false; +} + +// ---------------------------------------------------------------------- +// I N T E R N A L S T U F F +// ---------------------------------------------------------------------- +#ifdef SPP_NO_CXX11_STATIC_ASSERT + template struct SppCompileAssert { }; + #define SPP_COMPILE_ASSERT(expr, msg) \ + SPP_ATTRIBUTE_UNUSED typedef SppCompileAssert<(bool(expr))> spp_bogus_[bool(expr) ? 1 : -1] +#else + #define SPP_COMPILE_ASSERT static_assert +#endif + +namespace sparsehash_internal +{ + +// Adaptor methods for reading/writing data from an INPUT or OUPTUT +// variable passed to serialize() or unserialize(). For now we +// have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note +// they are pointers, unlike typical use), or else a pointer to +// something that supports a Read()/Write() method. +// +// For technical reasons, we implement read_data/write_data in two +// stages. The actual work is done in *_data_internal, which takes +// the stream argument twice: once as a template type, and once with +// normal type information. (We only use the second version.) We do +// this because of how C++ picks what function overload to use. If we +// implemented this the naive way: +// bool read_data(istream* is, const void* data, size_t length); +// template read_data(T* fp, const void* data, size_t length); +// C++ would prefer the second version for every stream type except +// istream. However, we want C++ to prefer the first version for +// streams that are *subclasses* of istream, such as istringstream. +// This is not possible given the way template types are resolved. So +// we split the stream argument in two, one of which is templated and +// one of which is not. The specialized functions (like the istream +// version above) ignore the template arg and use the second, 'type' +// arg, getting subclass matching as normal. The 'catch-all' +// functions (the second version above) use the template arg to deduce +// the type, and use a second, void* arg to achieve the desired +// 'catch-all' semantics. + + // ----- low-level I/O for FILE* ---- + + template + inline bool read_data_internal(Ignored* /*unused*/, FILE* fp, + void* data, size_t length) + { + return fread(data, length, 1, fp) == 1; + } + + template + inline bool write_data_internal(Ignored* /*unused*/, FILE* fp, + const void* data, size_t length) + { + return fwrite(data, length, 1, fp) == 1; + } + + // ----- low-level I/O for iostream ---- + + // We want the caller to be responsible for #including , not + // us, because iostream is a big header! According to the standard, + // it's only legal to delay the instantiation the way we want to if + // the istream/ostream is a template type. So we jump through hoops. + template + inline bool read_data_internal_for_istream(ISTREAM* fp, + void* data, size_t length) + { + return fp->read(reinterpret_cast(data), + static_cast(length)).good(); + } + template + inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp, + void* data, size_t length) + { + return read_data_internal_for_istream(fp, data, length); + } + + template + inline bool write_data_internal_for_ostream(OSTREAM* fp, + const void* data, size_t length) + { + return fp->write(reinterpret_cast(data), + static_cast(length)).good(); + } + template + inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp, + const void* data, size_t length) + { + return write_data_internal_for_ostream(fp, data, length); + } + + // ----- low-level I/O for custom streams ---- + + // The INPUT type needs to support a Read() method that takes a + // buffer and a length and returns the number of bytes read. + template + inline bool read_data_internal(INPUT* fp, void* /*unused*/, + void* data, size_t length) + { + return static_cast(fp->Read(data, length)) == length; + } + + // The OUTPUT type needs to support a Write() operation that takes + // a buffer and a length and returns the number of bytes written. + template + inline bool write_data_internal(OUTPUT* fp, void* /*unused*/, + const void* data, size_t length) + { + return static_cast(fp->Write(data, length)) == length; + } + + // ----- low-level I/O: the public API ---- + + template + inline bool read_data(INPUT* fp, void* data, size_t length) + { + return read_data_internal(fp, fp, data, length); + } + + template + inline bool write_data(OUTPUT* fp, const void* data, size_t length) + { + return write_data_internal(fp, fp, data, length); + } + + // Uses read_data() and write_data() to read/write an integer. + // length is the number of bytes to read/write (which may differ + // from sizeof(IntType), allowing us to save on a 32-bit system + // and load on a 64-bit system). Excess bytes are taken to be 0. + // INPUT and OUTPUT must match legal inputs to read/write_data (above). + // -------------------------------------------------------------------- + template + bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) + { + *value = 0; + unsigned char byte; + // We require IntType to be unsigned or else the shifting gets all screwy. + SPP_COMPILE_ASSERT(static_cast(-1) > static_cast(0), "serializing_int_requires_an_unsigned_type"); + for (size_t i = 0; i < length; ++i) + { + if (!read_data(fp, &byte, sizeof(byte))) + return false; + *value |= static_cast(byte) << ((length - 1 - i) * 8); + } + return true; + } + + template + bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) + { + unsigned char byte; + // We require IntType to be unsigned or else the shifting gets all screwy. + SPP_COMPILE_ASSERT(static_cast(-1) > static_cast(0), "serializing_int_requires_an_unsigned_type"); + for (size_t i = 0; i < length; ++i) + { + byte = (sizeof(value) <= length-1 - i) + ? static_cast(0) : static_cast((value >> ((length-1 - i) * 8)) & 255); + if (!write_data(fp, &byte, sizeof(byte))) return false; + } + return true; + } + + // If your keys and values are simple enough, you can pass this + // serializer to serialize()/unserialize(). "Simple enough" means + // value_type is a POD type that contains no pointers. Note, + // however, we don't try to normalize endianness. + // This is the type used for NopointerSerializer. + // --------------------------------------------------------------- + template struct pod_serializer + { + template + bool operator()(INPUT* fp, value_type* value) const + { + return read_data(fp, value, sizeof(*value)); + } + + template + bool operator()(OUTPUT* fp, const value_type& value) const + { + return write_data(fp, &value, sizeof(value)); + } + }; + + + // Settings contains parameters for growing and shrinking the table. + // It also packages zero-size functor (ie. hasher). + // + // It does some munging of the hash value in cases where we think + // (fear) the original hash function might not be very good. In + // particular, the default hash of pointers is the identity hash, + // so probably all the low bits are 0. We identify when we think + // we're hashing a pointer, and chop off the low bits. Note this + // isn't perfect: even when the key is a pointer, we can't tell + // for sure that the hash is the identity hash. If it's not, this + // is needless work (and possibly, though not likely, harmful). + // --------------------------------------------------------------- + template + class sh_hashtable_settings : public HashFunc + { + private: + template struct Mixer + { + inline T operator()(T h) const; + }; + + template struct Mixer + { + inline T operator()(T h) const + { + return h + (h >> 7) + (h >> 13) + (h >> 23); + } + }; + + template struct Mixer + { + inline T operator()(T h) const + { + return h + (h >> 7) + (h >> 13) + (h >> 23) + (h >> 32); + } + }; + + public: + typedef Key key_type; + typedef HashFunc hasher; + typedef SizeType size_type; + + public: + sh_hashtable_settings(const hasher& hf, + const float ht_occupancy_flt, + const float ht_empty_flt) + : hasher(hf), + enlarge_threshold_(0), + shrink_threshold_(0), + consider_shrink_(false), + num_ht_copies_(0) + { + set_enlarge_factor(ht_occupancy_flt); + set_shrink_factor(ht_empty_flt); + } + + size_t hash(const key_type& v) const + { + size_t h = hasher::operator()(v); + Mixer mixer; + + return mixer(h); + } + + float enlarge_factor() const { return enlarge_factor_; } + void set_enlarge_factor(float f) { enlarge_factor_ = f; } + float shrink_factor() const { return shrink_factor_; } + void set_shrink_factor(float f) { shrink_factor_ = f; } + + size_type enlarge_threshold() const { return enlarge_threshold_; } + void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; } + size_type shrink_threshold() const { return shrink_threshold_; } + void set_shrink_threshold(size_type t) { shrink_threshold_ = t; } + + size_type enlarge_size(size_type x) const { return static_cast(x * enlarge_factor_); } + size_type shrink_size(size_type x) const { return static_cast(x * shrink_factor_); } + + bool consider_shrink() const { return consider_shrink_; } + void set_consider_shrink(bool t) { consider_shrink_ = t; } + + unsigned int num_ht_copies() const { return num_ht_copies_; } + void inc_num_ht_copies() { ++num_ht_copies_; } + + // Reset the enlarge and shrink thresholds + void reset_thresholds(size_type num_buckets) + { + set_enlarge_threshold(enlarge_size(num_buckets)); + set_shrink_threshold(shrink_size(num_buckets)); + // whatever caused us to reset already considered + set_consider_shrink(false); + } + + // Caller is resposible for calling reset_threshold right after + // set_resizing_parameters. + // ------------------------------------------------------------ + void set_resizing_parameters(float shrink, float grow) + { + assert(shrink >= 0.0); + assert(grow <= 1.0); + if (shrink > grow/2.0f) + shrink = grow / 2.0f; // otherwise we thrash hashtable size + set_shrink_factor(shrink); + set_enlarge_factor(grow); + } + + // This is the smallest size a hashtable can be without being too crowded + // If you like, you can give a min #buckets as well as a min #elts + // ---------------------------------------------------------------------- + size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) + { + float enlarge = enlarge_factor(); + size_type sz = HT_MIN_BUCKETS; // min buckets allowed + while (sz < min_buckets_wanted || + num_elts >= static_cast(sz * enlarge)) + { + // This just prevents overflowing size_type, since sz can exceed + // max_size() here. + // ------------------------------------------------------------- + if (static_cast(sz * 2) < sz) + throw_exception(std::length_error("resize overflow")); // protect against overflow + sz *= 2; + } + return sz; + } + + private: + size_type enlarge_threshold_; // table.size() * enlarge_factor + size_type shrink_threshold_; // table.size() * shrink_factor + float enlarge_factor_; // how full before resize + float shrink_factor_; // how empty before resize + bool consider_shrink_; // if we should try to shrink before next insert + + unsigned int num_ht_copies_; // num_ht_copies is a counter incremented every Copy/Move + }; + +} // namespace sparsehash_internal + +#undef SPP_COMPILE_ASSERT + +// ---------------------------------------------------------------------- +// S P A R S E T A B L E +// ---------------------------------------------------------------------- +// +// A sparsetable is a random container that implements a sparse array, +// that is, an array that uses very little memory to store unassigned +// indices (in this case, between 1-2 bits per unassigned index). For +// instance, if you allocate an array of size 5 and assign a[2] = , then a[2] will take up a lot of memory but a[0], a[1], +// a[3], and a[4] will not. Array elements that have a value are +// called "assigned". Array elements that have no value yet, or have +// had their value cleared using erase() or clear(), are called +// "unassigned". +// +// Unassigned values seem to have the default value of T (see below). +// Nevertheless, there is a difference between an unassigned index and +// one explicitly assigned the value of T(). The latter is considered +// assigned. +// +// Access to an array element is constant time, as is insertion and +// deletion. Insertion and deletion may be fairly slow, however: +// because of this container's memory economy, each insert and delete +// causes a memory reallocation. +// +// NOTE: You should not test(), get(), or set() any index that is +// greater than sparsetable.size(). If you need to do that, call +// resize() first. +// +// --- Template parameters +// PARAMETER DESCRIPTION DEFAULT +// T The value of the array: the type of -- +// object that is stored in the array. +// +// Alloc: Allocator to use to allocate memory. libc_allocator_with_realloc +// +// --- Model of +// Random Access Container +// +// --- Type requirements +// T must be Copy Constructible. It need not be Assignable. +// +// --- Public base classes +// None. +// +// --- Members +// +// [*] All iterators are const in a sparsetable (though nonempty_iterators +// may not be). Use get() and set() to assign values, not iterators. +// +// [+] iterators are random-access iterators. nonempty_iterators are +// bidirectional iterators. + +// [*] If you shrink a sparsetable using resize(), assigned elements +// past the end of the table are removed using erase(). If you grow +// a sparsetable, new unassigned indices are created. +// +// [+] Note that operator[] returns a const reference. You must use +// set() to change the value of a table element. +// +// [!] Unassignment also calls the destructor. +// +// Iterators are invalidated whenever an item is inserted or +// deleted (ie set() or erase() is used) or when the size of +// the table changes (ie resize() or clear() is used). + + +// --------------------------------------------------------------------------- +// type_traits we need +// --------------------------------------------------------------------------- +template +struct integral_constant { static const T value = v; }; + +template const T integral_constant::value; + +typedef integral_constant true_type; +typedef integral_constant false_type; + +template struct is_same : public false_type { }; +template struct is_same : public true_type { }; + +template struct remove_const { typedef T type; }; +template struct remove_const { typedef T type; }; + +template struct remove_volatile { typedef T type; }; +template struct remove_volatile { typedef T type; }; + +template struct remove_cv { + typedef typename remove_const::type>::type type; +}; + +// ---------------- is_integral ---------------------------------------- +template struct is_integral; +template struct is_integral : false_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +#ifdef SPP_HAS_LONG_LONG + template<> struct is_integral : true_type { }; + template<> struct is_integral : true_type { }; +#endif +template struct is_integral : is_integral { }; +template struct is_integral : is_integral { }; +template struct is_integral : is_integral { }; + +// ---------------- is_floating_point ---------------------------------------- +template struct is_floating_point; +template struct is_floating_point : false_type { }; +template<> struct is_floating_point : true_type { }; +template<> struct is_floating_point : true_type { }; +template<> struct is_floating_point : true_type { }; +template struct is_floating_point : is_floating_point { }; +template struct is_floating_point : is_floating_point { }; +template struct is_floating_point : is_floating_point { }; + +// ---------------- is_pointer ---------------------------------------- +template struct is_pointer; +template struct is_pointer : false_type { }; +template struct is_pointer : true_type { }; +template struct is_pointer : is_pointer { }; +template struct is_pointer : is_pointer { }; +template struct is_pointer : is_pointer { }; + +// ---------------- is_reference ---------------------------------------- +template struct is_reference; +template struct is_reference : false_type {}; +template struct is_reference : true_type {}; + +// ---------------- is_relocatable ---------------------------------------- +// relocatable values can be moved around in memory using memcpy and remain +// correct. Most types are relocatable, an example of a type who is not would +// be a struct which contains a pointer to a buffer inside itself - this is the +// case for std::string in gcc 5. +// ------------------------------------------------------------------------ +template struct is_relocatable; +template struct is_relocatable : + integral_constant::value || is_floating_point::value)> +{ }; + +template struct is_relocatable > : true_type { }; + +template struct is_relocatable : is_relocatable { }; +template struct is_relocatable : is_relocatable { }; +template struct is_relocatable : is_relocatable { }; +template struct is_relocatable : is_relocatable { }; +template struct is_relocatable > : + integral_constant::value && is_relocatable::value)> +{ }; + +// --------------------------------------------------------------------------- +// Our iterator as simple as iterators can be: basically it's just +// the index into our table. Dereference, the only complicated +// thing, we punt to the table class. This just goes to show how +// much machinery STL requires to do even the most trivial tasks. +// +// A NOTE ON ASSIGNING: +// A sparse table does not actually allocate memory for entries +// that are not filled. Because of this, it becomes complicated +// to have a non-const iterator: we don't know, if the iterator points +// to a not-filled bucket, whether you plan to fill it with something +// or whether you plan to read its value (in which case you'll get +// the default bucket value). Therefore, while we can define const +// operations in a pretty 'normal' way, for non-const operations, we +// define something that returns a helper object with operator= and +// operator& that allocate a bucket lazily. We use this for table[] +// and also for regular table iterators. + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template +class table_element_adaptor +{ +public: + typedef typename tabletype::value_type value_type; + typedef typename tabletype::size_type size_type; + typedef typename tabletype::reference reference; + typedef typename tabletype::pointer pointer; + + table_element_adaptor(tabletype *tbl, size_type p) : + table(tbl), pos(p) + { } + + table_element_adaptor& operator=(const value_type &val) + { + table->set(pos, val, false); + return *this; + } + + operator value_type() { return table->get(pos); } // we look like a value + + pointer operator& () { return &table->mutating_get(pos); } + +private: + tabletype* table; + size_type pos; +}; + +// Our iterator as simple as iterators can be: basically it's just +// the index into our table. Dereference, the only complicated +// thing, we punt to the table class. This just goes to show how +// much machinery STL requires to do even the most trivial tasks. +// +// By templatizing over tabletype, we have one iterator type which +// we can use for both sparsetables and sparsebins. In fact it +// works on any class that allows size() and operator[] (eg vector), +// as long as it does the standard STL typedefs too (eg value_type). + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template +class table_iterator +{ +public: + typedef table_iterator iterator; + + typedef std::random_access_iterator_tag iterator_category; + typedef typename tabletype::value_type value_type; + typedef typename tabletype::difference_type difference_type; + typedef typename tabletype::size_type size_type; + typedef table_element_adaptor reference; + typedef table_element_adaptor* pointer; + + explicit table_iterator(tabletype *tbl = 0, size_type p = 0) : + table(tbl), pos(p) + { } + + // The main thing our iterator does is dereference. If the table entry + // we point to is empty, we return the default value type. + // This is the big different function from the const iterator. + reference operator*() + { + return table_element_adaptor(table, pos); + } + + pointer operator->() { return &(operator*()); } + + // Helper function to assert things are ok; eg pos is still in range + void check() const + { + assert(table); + assert(pos <= table->size()); + } + + // Arithmetic: we just do arithmetic on pos. We don't even need to + // do bounds checking, since STL doesn't consider that its job. :-) + iterator& operator+=(size_type t) { pos += t; check(); return *this; } + iterator& operator-=(size_type t) { pos -= t; check(); return *this; } + iterator& operator++() { ++pos; check(); return *this; } + iterator& operator--() { --pos; check(); return *this; } + iterator operator++(int) + { + iterator tmp(*this); // for x++ + ++pos; check(); return tmp; + } + + iterator operator--(int) + { + iterator tmp(*this); // for x-- + --pos; check(); return tmp; + } + + iterator operator+(difference_type i) const + { + iterator tmp(*this); + tmp += i; return tmp; + } + + iterator operator-(difference_type i) const + { + iterator tmp(*this); + tmp -= i; return tmp; + } + + difference_type operator-(iterator it) const + { // for "x = it2 - it" + assert(table == it.table); + return pos - it.pos; + } + + reference operator[](difference_type n) const + { + return *(*this + n); // simple though not totally efficient + } + + // Comparisons. + bool operator==(const iterator& it) const + { + return table == it.table && pos == it.pos; + } + + bool operator<(const iterator& it) const + { + assert(table == it.table); // life is bad bad bad otherwise + return pos < it.pos; + } + + bool operator!=(const iterator& it) const { return !(*this == it); } + bool operator<=(const iterator& it) const { return !(it < *this); } + bool operator>(const iterator& it) const { return it < *this; } + bool operator>=(const iterator& it) const { return !(*this < it); } + + // Here's the info we actually need to be an iterator + tabletype *table; // so we can dereference and bounds-check + size_type pos; // index into the table +}; + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template +class const_table_iterator +{ +public: + typedef table_iterator iterator; + typedef const_table_iterator const_iterator; + + typedef std::random_access_iterator_tag iterator_category; + typedef typename tabletype::value_type value_type; + typedef typename tabletype::difference_type difference_type; + typedef typename tabletype::size_type size_type; + typedef typename tabletype::const_reference reference; // we're const-only + typedef typename tabletype::const_pointer pointer; + + // The "real" constructor + const_table_iterator(const tabletype *tbl, size_type p) + : table(tbl), pos(p) { } + + // The default constructor, used when I define vars of type table::iterator + const_table_iterator() : table(NULL), pos(0) { } + + // The copy constructor, for when I say table::iterator foo = tbl.begin() + // Also converts normal iterators to const iterators // not explicit on purpose + const_table_iterator(const iterator &from) + : table(from.table), pos(from.pos) { } + + // The default destructor is fine; we don't define one + // The default operator= is fine; we don't define one + + // The main thing our iterator does is dereference. If the table entry + // we point to is empty, we return the default value type. + reference operator*() const { return (*table)[pos]; } + pointer operator->() const { return &(operator*()); } + + // Helper function to assert things are ok; eg pos is still in range + void check() const + { + assert(table); + assert(pos <= table->size()); + } + + // Arithmetic: we just do arithmetic on pos. We don't even need to + // do bounds checking, since STL doesn't consider that its job. :-) + const_iterator& operator+=(size_type t) { pos += t; check(); return *this; } + const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; } + const_iterator& operator++() { ++pos; check(); return *this; } + const_iterator& operator--() { --pos; check(); return *this; } + const_iterator operator++(int) { const_iterator tmp(*this); // for x++ + ++pos; check(); return tmp; } + const_iterator operator--(int) { const_iterator tmp(*this); // for x-- + --pos; check(); return tmp; } + const_iterator operator+(difference_type i) const + { + const_iterator tmp(*this); + tmp += i; + return tmp; + } + const_iterator operator-(difference_type i) const + { + const_iterator tmp(*this); + tmp -= i; + return tmp; + } + difference_type operator-(const_iterator it) const + { // for "x = it2 - it" + assert(table == it.table); + return pos - it.pos; + } + reference operator[](difference_type n) const + { + return *(*this + n); // simple though not totally efficient + } + + // Comparisons. + bool operator==(const const_iterator& it) const + { + return table == it.table && pos == it.pos; + } + + bool operator<(const const_iterator& it) const + { + assert(table == it.table); // life is bad bad bad otherwise + return pos < it.pos; + } + bool operator!=(const const_iterator& it) const { return !(*this == it); } + bool operator<=(const const_iterator& it) const { return !(it < *this); } + bool operator>(const const_iterator& it) const { return it < *this; } + bool operator>=(const const_iterator& it) const { return !(*this < it); } + + // Here's the info we actually need to be an iterator + const tabletype *table; // so we can dereference and bounds-check + size_type pos; // index into the table +}; + +// --------------------------------------------------------------------------- +// This is a 2-D iterator. You specify a begin and end over a list +// of *containers*. We iterate over each container by iterating over +// it. It's actually simple: +// VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---, +// | ________________________________________________/ +// | \_> VECTOR[1].begin() --------> VECTOR[1].end() -, +// | ___________________________________________________/ +// v \_> ...... +// VECTOR.end() +// +// It's impossible to do random access on one of these things in constant +// time, so it's just a bidirectional iterator. +// +// Unfortunately, because we need to use this for a non-empty iterator, +// we use ne_begin() and ne_end() instead of begin() and end() +// (though only going across, not down). +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template +class Two_d_iterator : public std::iterator +{ +public: + typedef Two_d_iterator iterator; + + // T can be std::pair, but we need to return std::pair + // --------------------------------------------------------------------- + typedef typename spp_::cvt::type value_type; + typedef value_type& reference; + typedef value_type* pointer; + + explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0) + { + if (row_current && !row_current->is_marked()) + { + col_current = row_current->ne_begin(); + advance_past_end(); // in case cur->begin() == cur->end() + } + } + + explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr), col_current(col) + { + assert(col); + } + + // The default constructor + Two_d_iterator() : row_current(0), col_current(0) { } + + // Need this explicitly so we can convert normal iterators <=> const iterators + // not explicit on purpose + // --------------------------------------------------------------------------- + template + Two_d_iterator(const Two_d_iterator& it) : + row_current (*(row_it *)&it.row_current), + col_current (*(col_it *)&it.col_current) + { } + + // The default destructor is fine; we don't define one + // The default operator= is fine; we don't define one + + reference operator*() const { return *(col_current); } + pointer operator->() const { return &(operator*()); } + + // Arithmetic: we just do arithmetic on pos. We don't even need to + // do bounds checking, since STL doesn't consider that its job. :-) + // NOTE: this is not amortized constant time! What do we do about it? + // ------------------------------------------------------------------ + void advance_past_end() + { + // used when col_current points to end() + while (col_current == row_current->ne_end()) + { + // end of current row + // ------------------ + ++row_current; // go to beginning of next + if (!row_current->is_marked()) // col is irrelevant at end + col_current = row_current->ne_begin(); + else + break; // don't go past row_end + } + } + + friend size_t operator-(iterator l, iterator f) + { + if (f.row_current->is_marked()) + return 0; + + size_t diff(0); + while (f != l) + { + ++diff; + ++f; + } + return diff; + } + + iterator& operator++() + { + // assert(!row_current->is_marked()); // how to ++ from there? + ++col_current; + advance_past_end(); // in case col_current is at end() + return *this; + } + + iterator& operator--() + { + while (row_current->is_marked() || + col_current == row_current->ne_begin()) + { + --row_current; + col_current = row_current->ne_end(); // this is 1 too far + } + --col_current; + return *this; + } + iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } + iterator operator--(int) { iterator tmp(*this); --*this; return tmp; } + + + // Comparisons. + bool operator==(const iterator& it) const + { + return (row_current == it.row_current && + (!row_current || row_current->is_marked() || col_current == it.col_current)); + } + + bool operator!=(const iterator& it) const { return !(*this == it); } + + // Here's the info we actually need to be an iterator + // These need to be public so we convert from iterator to const_iterator + // --------------------------------------------------------------------- + row_it row_current; + col_it col_current; +}; + + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template +class Two_d_destructive_iterator : public Two_d_iterator +{ +public: + typedef Two_d_destructive_iterator iterator; + + Two_d_destructive_iterator(Alloc &alloc, row_it curr) : + _alloc(alloc) + { + this->row_current = curr; + this->col_current = 0; + if (this->row_current && !this->row_current->is_marked()) + { + this->col_current = this->row_current->ne_begin(); + advance_past_end(); // in case cur->begin() == cur->end() + } + } + + // Arithmetic: we just do arithmetic on pos. We don't even need to + // do bounds checking, since STL doesn't consider that its job. :-) + // NOTE: this is not amortized constant time! What do we do about it? + // ------------------------------------------------------------------ + void advance_past_end() + { + // used when col_current points to end() + while (this->col_current == this->row_current->ne_end()) + { + this->row_current->clear(_alloc, true); // This is what differs from non-destructive iterators above + + // end of current row + // ------------------ + ++this->row_current; // go to beginning of next + if (!this->row_current->is_marked()) // col is irrelevant at end + this->col_current = this->row_current->ne_begin(); + else + break; // don't go past row_end + } + } + + iterator& operator++() + { + // assert(!this->row_current->is_marked()); // how to ++ from there? + ++this->col_current; + advance_past_end(); // in case col_current is at end() + return *this; + } + +private: + Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator &o); + + Alloc &_alloc; +}; + + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +static const char spp_bits_in[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +static inline uint32_t s_spp_popcount_default_lut(uint32_t i) +{ + uint32_t res = static_cast(spp_bits_in[i & 0xFF]); + res += static_cast(spp_bits_in[(i >> 8) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 16) & 0xFF]); + res += static_cast(spp_bits_in[i >> 24]); + return res; +} + +static inline uint32_t s_spp_popcount_default_lut(uint64_t i) +{ + uint32_t res = static_cast(spp_bits_in[i & 0xFF]); + res += static_cast(spp_bits_in[(i >> 8) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 16) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 24) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 32) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 40) & 0xFF]); + res += static_cast(spp_bits_in[(i >> 48) & 0xFF]); + res += static_cast(spp_bits_in[i >> 56]); + return res; +} + +// faster than the lookup table (LUT) +// ---------------------------------- +static inline uint32_t s_spp_popcount_default(uint32_t i) +{ + i = i - ((i >> 1) & 0x55555555); + i = (i & 0x33333333) + ((i >> 2) & 0x33333333); + return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; +} + +// faster than the lookup table (LUT) +// ---------------------------------- +static inline uint32_t s_spp_popcount_default(uint64_t x) +{ + const uint64_t m1 = uint64_t(0x5555555555555555); // binary: 0101... + const uint64_t m2 = uint64_t(0x3333333333333333); // binary: 00110011.. + const uint64_t m4 = uint64_t(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ... + const uint64_t h01 = uint64_t(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3... + + x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits + x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits + x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits + return (x * h01)>>56; // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24)+... +} + +#if defined(SPP_POPCNT_CHECK) +static inline bool spp_popcount_check() +{ + int cpuInfo[4] = { -1 }; + spp_cpuid(cpuInfo, 1); + if (cpuInfo[2] & (1 << 23)) + return true; // means SPP_POPCNT supported + return false; +} +#endif + +#if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT) + +static inline uint32_t spp_popcount(uint32_t i) +{ + static const bool s_ok = spp_popcount_check(); + return s_ok ? SPP_POPCNT(i) : s_spp_popcount_default(i); +} + +#else + +static inline uint32_t spp_popcount(uint32_t i) +{ +#if defined(SPP_POPCNT) + return static_cast(SPP_POPCNT(i)); +#else + return s_spp_popcount_default(i); +#endif +} + +#endif + +#if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT64) + +static inline uint32_t spp_popcount(uint64_t i) +{ + static const bool s_ok = spp_popcount_check(); + return s_ok ? (uint32_t)SPP_POPCNT64(i) : s_spp_popcount_default(i); +} + +#else + +static inline uint32_t spp_popcount(uint64_t i) +{ +#if defined(SPP_POPCNT64) + return static_cast(SPP_POPCNT64(i)); +#elif 1 + return s_spp_popcount_default(i); +#endif +} + +#endif + +// --------------------------------------------------------------------------- +// SPARSE-TABLE +// ------------ +// The idea is that a table with (logically) t buckets is divided +// into t/M *groups* of M buckets each. (M is a constant, typically +// 32) Each group is stored sparsely. +// Thus, inserting into the table causes some array to grow, which is +// slow but still constant time. Lookup involves doing a +// logical-position-to-sparse-position lookup, which is also slow but +// constant time. The larger M is, the slower these operations are +// but the less overhead (slightly). +// +// To store the sparse array, we store a bitmap B, where B[i] = 1 iff +// bucket i is non-empty. Then to look up bucket i we really look up +// array[# of 1s before i in B]. This is constant time for fixed M. +// +// Terminology: the position of an item in the overall table (from +// 1 .. t) is called its "location." The logical position in a group +// (from 1 .. M) is called its "position." The actual location in +// the array (from 1 .. # of non-empty buckets in the group) is +// called its "offset." +// --------------------------------------------------------------------------- + +template +class sparsegroup +{ +public: + // Basic types + typedef typename spp::cvt::type value_type; + typedef Alloc allocator_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + typedef table_element_adaptor > element_adaptor; + typedef uint8_t size_type; // max # of buckets + + // These are our special iterators, that go over non-empty buckets in a + // group. These aren't const-only because you can change non-empty bcks. + // --------------------------------------------------------------------- + typedef pointer ne_iterator; + typedef const_pointer const_ne_iterator; + typedef std::reverse_iterator reverse_ne_iterator; + typedef std::reverse_iterator const_reverse_ne_iterator; + + // We'll have versions for our special non-empty iterator too + // ---------------------------------------------------------- + ne_iterator ne_begin() { return reinterpret_cast(_group); } + const_ne_iterator ne_begin() const { return reinterpret_cast(_group); } + const_ne_iterator ne_cbegin() const { return reinterpret_cast(_group); } + ne_iterator ne_end() { return reinterpret_cast(_group + _num_items()); } + const_ne_iterator ne_end() const { return reinterpret_cast(_group + _num_items()); } + const_ne_iterator ne_cend() const { return reinterpret_cast(_group + _num_items()); } + reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); } + const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_cend()); } + const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_cend()); } + reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); } + const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_cbegin()); } + const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_cbegin()); } + + + // This gives us the "default" value to return for an empty bucket. + // We just use the default constructor on T, the template type + // ---------------------------------------------------------------- + const_reference default_value() const + { + static value_type defaultval = value_type(); + return defaultval; + } + +private: + // T can be std::pair, but we need to return std::pair + // --------------------------------------------------------------------- + typedef T mutable_value_type; + typedef mutable_value_type& mutable_reference; + typedef const mutable_value_type& const_mutable_reference; + typedef mutable_value_type* mutable_pointer; + typedef const mutable_value_type* const_mutable_pointer; + +#define spp_mutable_ref(x) (*(reinterpret_cast(&(x)))) +#define spp_const_mutable_ref(x) (*(reinterpret_cast(&(x)))) + + typedef typename Alloc::template rebind::other value_alloc_type; + + bool _bmtest(size_type i) const { return !!(_bitmap & (static_cast(1) << i)); } + void _bmset(size_type i) { _bitmap |= static_cast(1) << i; } + void _bmclear(size_type i) { _bitmap &= ~(static_cast(1) << i); } + + bool _bme_test(size_type i) const { return !!(_bm_erased & (static_cast(1) << i)); } + void _bme_set(size_type i) { _bm_erased |= static_cast(1) << i; } + void _bme_clear(size_type i) { _bm_erased &= ~(static_cast(1) << i); } + + bool _bmtest_strict(size_type i) const + { return !!((_bitmap | _bm_erased) & (static_cast(1) << i)); } + + + static uint32_t _sizing(uint32_t n) + { +#if !defined(SPP_ALLOC_SZ) || (SPP_ALLOC_SZ == 0) + // aggressive allocation first, then decreasing as sparsegroups fill up + // -------------------------------------------------------------------- + static uint8_t s_alloc_batch_sz[SPP_GROUP_SIZE] = { 0 }; + if (!s_alloc_batch_sz[0]) + { + // 32 bit bitmap + // ........ .... .... .. .. .. .. . . . . . . . . + // 8 12 16 18 20 22 24 25 26 ... 32 + // ------------------------------------------------------ + uint8_t group_sz = SPP_GROUP_SIZE / 4; + uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4; + uint8_t alloc_sz = group_start_alloc; + for (int i=0; i<4; ++i) + { + for (int j=0; j 2) + group_start_alloc /= 2; + alloc_sz += group_start_alloc; + } + } + + return n ? static_cast(s_alloc_batch_sz[n-1]) : 0; // more aggressive alloc at the beginning + +#elif (SPP_ALLOC_SZ == 1) + // use as little memory as possible - slowest insert/delete in table + // ----------------------------------------------------------------- + return n; +#else + // decent compromise when SPP_ALLOC_SZ == 2 + // ---------------------------------------- + static size_type sz_minus_1 = SPP_ALLOC_SZ - 1; + return (n + sz_minus_1) & ~sz_minus_1; +#endif + } + + mutable_pointer _allocate_group(Alloc &alloc, uint32_t n /* , bool tight = false */) + { + // ignore tight since we don't store num_alloc + // num_alloc = (uint8_t)(tight ? n : _sizing(n)); + + uint32_t num_alloc = (uint8_t)_sizing(n); + _set_num_alloc(num_alloc); + mutable_pointer retval = alloc.allocate(static_cast(num_alloc)); + if (retval == NULL) + { + // the allocator is supposed to throw an exception if the allocation fails. + fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %d groups\n", num_alloc); + exit(1); + } + return retval; + } + + void _free_group(Alloc &alloc, uint32_t num_alloc) + { + if (_group) + { + uint32_t num_buckets = _num_items(); + if (num_buckets) + { + mutable_pointer end_it = _group + num_buckets; + for (mutable_pointer p = _group; p != end_it; ++p) + p->~mutable_value_type(); + } + alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc); + _group = NULL; + } + } + + // private because should not be called - no allocator! + sparsegroup &operator=(const sparsegroup& x); + + static size_type _pos_to_offset(group_bm_type bm, size_type pos) + { + //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos))); + //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0); + return static_cast(spp_popcount(bm & ((static_cast(1) << pos) - 1))); + } + +public: + + // get_iter() in sparsetable needs it + size_type pos_to_offset(size_type pos) const + { + return _pos_to_offset(_bitmap, pos); + } + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + + // Returns the (logical) position in the bm[] array, i, such that + // bm[i] is the offset-th set bit in the array. It is the inverse + // of pos_to_offset. get_pos() uses this function to find the index + // of an ne_iterator in the table. Bit-twiddling from + // http://hackersdelight.org/basics.pdf + // ----------------------------------------------------------------- + static size_type offset_to_pos(group_bm_type bm, size_type offset) + { + for (; offset > 0; offset--) + bm &= (bm-1); // remove right-most set bit + + // Clear all bits to the left of the rightmost bit (the &), + // and then clear the rightmost bit but set all bits to the + // right of it (the -1). + // -------------------------------------------------------- + bm = (bm & -bm) - 1; + return static_cast(spp_popcount(bm)); + } + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + size_type offset_to_pos(size_type offset) const + { + return offset_to_pos(_bitmap, offset); + } + +public: + // Constructors -- default and copy -- and destructor + explicit sparsegroup() : + _group(0), _bitmap(0), _bm_erased(0) + { + _set_num_items(0); + _set_num_alloc(0); + } + + sparsegroup(const sparsegroup& x) : + _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased) + { + _set_num_items(0); + _set_num_alloc(0); + assert(_group == 0); if (_group) exit(1); + } + + sparsegroup(const sparsegroup& x, allocator_type& a) : + _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased) + { + _set_num_items(0); + _set_num_alloc(0); + + uint32_t num_items = x._num_items(); + if (num_items) + { + _group = _allocate_group(a, num_items /* , true */); + _set_num_items(num_items); + std::uninitialized_copy(x._group, x._group + num_items, _group); + } + } + + ~sparsegroup() { assert(_group == 0); if (_group) exit(1); } + + void destruct(allocator_type& a) { _free_group(a, _num_alloc()); } + + // Many STL algorithms use swap instead of copy constructors + void swap(sparsegroup& x) + { + using std::swap; + + swap(_group, x._group); + swap(_bitmap, x._bitmap); + swap(_bm_erased, x._bm_erased); +#ifdef SPP_STORE_NUM_ITEMS + swap(_num_buckets, x._num_buckets); + swap(_num_allocated, x._num_allocated); +#endif + } + + // It's always nice to be able to clear a table without deallocating it + void clear(Alloc &alloc, bool erased) + { + _free_group(alloc, _num_alloc()); + _bitmap = 0; + if (erased) + _bm_erased = 0; + _set_num_items(0); + _set_num_alloc(0); + } + + // Functions that tell you about size. Alas, these aren't so useful + // because our table is always fixed size. + size_type size() const { return static_cast(SPP_GROUP_SIZE); } + size_type max_size() const { return static_cast(SPP_GROUP_SIZE); } + + bool empty() const { return false; } + + // We also may want to know how many *used* buckets there are + size_type num_nonempty() const { return (size_type)_num_items(); } + + // get()/set() are explicitly const/non-const. You can use [] if + // you want something that can be either (potentially more expensive). + const_reference get(size_type i) const + { + if (_bmtest(i)) // bucket i is occupied + return (const_reference)_group[pos_to_offset(i)]; + else + return default_value(); // return the default reference + } + + // TODO(csilvers): make protected + friend + // This is used by sparse_hashtable to get an element from the table + // when we know it exists. + reference unsafe_get(size_type i) const + { + // assert(_bmtest(i)); + return (reference)_group[pos_to_offset(i)]; + } + + typedef std::pair SetResult; + + // returns a reference which can be assigned, so we have to create an entry if not + // already there + // ------------------------------------------------------------------------------- + reference mutating_get(Alloc &alloc, size_type i) + { + // fills bucket i before getting + if (!_bmtest(i)) + { + SetResult sr = set(alloc, i, false); + if (!sr.second) + ::new (sr.first) mutable_value_type(); + return *((pointer)sr.first); + } + + return _group[pos_to_offset(i)]; + } + + // Syntactic sugar. It's easy to return a const reference. To + // return a non-const reference, we need to use the assigner adaptor. + const_reference operator[](size_type i) const + { + return get(i); + } + + element_adaptor operator[](size_type i) + { + return element_adaptor(this, i); + } + +private: + typedef spp_::integral_constant::value && + spp_::is_same >::value)> + realloc_and_memmove_ok; + + // Our default allocator - try to merge memory buffers + // right now it uses Google's traits, but we should use something like folly::IsRelocatable + // return true if the slot was constructed (i.e. contains a valid mutable_value_type + // --------------------------------------------------------------------------------- + bool _set_aux(Alloc &alloc, size_type offset, spp_::true_type) + { + //static int x=0; if (++x < 10) printf("x\n"); // check we are getting here + + uint32_t num_items = _num_items(); + uint32_t num_alloc = _sizing(num_items); + + if (num_items == num_alloc) + { + num_alloc = _sizing(num_items + 1); + _group = alloc.reallocate(_group, num_alloc); + _set_num_alloc(num_alloc); + } + + for (uint32_t i = num_items; i > offset; --i) + memcpy(_group + i, _group + i-1, sizeof(*_group)); + return false; + } + + // Create space at _group[offset], without special assumptions about value_type + // and allocator_type, with a default value + // return true if the slot was constructed (i.e. contains a valid mutable_value_type + // --------------------------------------------------------------------------------- + bool _set_aux(Alloc &alloc, size_type offset, spp_::false_type) + { + uint32_t num_items = _num_items(); + uint32_t num_alloc = _sizing(num_items); + + //assert(num_alloc == (uint32_t)_num_allocated); + if (num_items < num_alloc) + { + // create new object at end and rotate it to position + ::new (&_group[num_items]) mutable_value_type(); + std::rotate(_group + offset, _group + num_items, _group + num_items + 1); + return true; + } + + // This is valid because 0 <= offset <= num_items + mutable_pointer p = _allocate_group(alloc, _sizing(num_items + 1)); + if (offset) + std::uninitialized_copy(MK_MOVE_IT(_group), + MK_MOVE_IT(_group + offset), + p); + if (num_items > offset) + std::uninitialized_copy(MK_MOVE_IT(_group + offset), + MK_MOVE_IT(_group + num_items), + p + offset + 1); + _free_group(alloc, num_alloc); + _group = p; + return false; + } + +public: + + // TODO(austern): Make this exception safe: handle exceptions from + // value_type's copy constructor. + // return true if the slot was constructed (i.e. contains a valid mutable_value_type) + // ---------------------------------------------------------------------------------- + bool _set(Alloc &alloc, size_type i, size_type offset, bool erased) + { + if (erased) + { + // assert(_bme_test(i)); + _bme_clear(i); + } + + if (!_bmtest(i)) + { + bool res = _set_aux(alloc, offset, realloc_and_memmove_ok()); + _incr_num_items(); + _bmset(i); + return res; + } + return true; + } + + // This returns a pair (first is a pointer to the item's location, second is whether + // that location is constructed (i.e. contains a valid mutable_value_type) + // --------------------------------------------------------------------------------- + SetResult set(Alloc &alloc, size_type i, bool erased) + { + size_type offset = pos_to_offset(i); + bool constructed = _set(alloc, i, offset, erased); // may change _group pointer + return std::make_pair(_group + offset, constructed); + } + + // used in _move_from (where we can move the old value instead of copying it + // ------------------------------------------------------------------------- + void move(Alloc &alloc, size_type i, reference val) + { + // assert(!_bmtest(i)); + + size_type offset = pos_to_offset(i); + if (!_set(alloc, i, offset, false)) + ::new (&_group[offset]) mutable_value_type(); + + using std::swap; + swap(_group[offset], spp_mutable_ref(val)); // called from _move_from, OK to swap + } + + // We let you see if a bucket is non-empty without retrieving it + // ------------------------------------------------------------- + bool test(size_type i) const + { + return _bmtest(i); + } + + // also tests for erased values + // ---------------------------- + bool test_strict(size_type i) const + { + return _bmtest_strict(i); + } + +private: + // Shrink the array, assuming value_type has trivial copy + // constructor and destructor, and the allocator_type is the default + // libc_allocator_with_alloc. + // ----------------------------------------------------------------------- + void _group_erase_aux(Alloc &alloc, size_type offset, spp_::true_type) + { + // static int x=0; if (++x < 10) printf("Y\n"); // check we are getting here + uint32_t num_items = _num_items(); + uint32_t num_alloc = _sizing(num_items); + + if (num_items == 1) + { + assert(offset == 0); + _free_group(alloc, num_alloc); + _set_num_alloc(0); + return; + } + + _group[offset].~mutable_value_type(); + + for (size_type i = offset; i < num_items - 1; ++i) + memcpy(_group + i, _group + i + 1, sizeof(*_group)); + + if (_sizing(num_items - 1) != num_alloc) + { + num_alloc = _sizing(num_items - 1); + assert(num_alloc); // because we have at least 1 item left + _set_num_alloc(num_alloc); + _group = alloc.reallocate(_group, num_alloc); + } + } + + // Shrink the array, without any special assumptions about value_type and + // allocator_type. + // -------------------------------------------------------------------------- + void _group_erase_aux(Alloc &alloc, size_type offset, spp_::false_type) + { + uint32_t num_items = _num_items(); + uint32_t num_alloc = _sizing(num_items); + + if (_sizing(num_items - 1) != num_alloc) + { + mutable_pointer p = 0; + if (num_items > 1) + { + p = _allocate_group(alloc, num_items - 1); + if (offset) + std::uninitialized_copy(MK_MOVE_IT(_group), + MK_MOVE_IT(_group + offset), + p); + if (static_cast(offset + 1) < num_items) + std::uninitialized_copy(MK_MOVE_IT(_group + offset + 1), + MK_MOVE_IT(_group + num_items), + p + offset); + } + else + { + assert(offset == 0); + _set_num_alloc(0); + } + _free_group(alloc, num_alloc); + _group = p; + } + else + { + std::rotate(_group + offset, _group + offset + 1, _group + num_items); + _group[num_items - 1].~mutable_value_type(); + } + } + + void _group_erase(Alloc &alloc, size_type offset) + { + _group_erase_aux(alloc, offset, realloc_and_memmove_ok()); + } + +public: + template + bool erase_ne(Alloc &alloc, twod_iter &it) + { + assert(_group && it.col_current != ne_end()); + size_type offset = (size_type)(it.col_current - ne_begin()); + size_type pos = offset_to_pos(offset); + + if (_num_items() <= 1) + { + clear(alloc, false); + it.col_current = 0; + } + else + { + _group_erase(alloc, offset); + _decr_num_items(); + _bmclear(pos); + + // in case _group_erase reallocated the buffer + it.col_current = reinterpret_cast(_group) + offset; + } + _bme_set(pos); // remember that this position has been erased + it.advance_past_end(); + return true; + } + + + // This takes the specified elements out of the group. This is + // "undefining", rather than "clearing". + // TODO(austern): Make this exception safe: handle exceptions from + // value_type's copy constructor. + // --------------------------------------------------------------- + void erase(Alloc &alloc, size_type i) + { + if (_bmtest(i)) + { + // trivial to erase empty bucket + if (_num_items() == 1) + clear(alloc, false); + else + { + _group_erase(alloc, pos_to_offset(i)); + _decr_num_items(); + _bmclear(i); + } + _bme_set(i); // remember that this position has been erased + } + } + + // I/O + // We support reading and writing groups to disk. We don't store + // the actual array contents (which we don't know how to store), + // just the bitmap and size. Meant to be used with table I/O. + // -------------------------------------------------------------- + template bool write_metadata(OUTPUT *fp) const + { + // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the + // original google sparsehash + // ------------------------------------------------------------------ + if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap))) + return false; + + return true; + } + + // Reading destroys the old group contents! Returns true if all was ok. + template bool read_metadata(Alloc &alloc, INPUT *fp) + { + clear(alloc, true); + + if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap))) + return false; + + // We'll allocate the space, but we won't fill it: it will be + // left as uninitialized raw memory. + uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set + _set_num_items(num_items); + _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0; + return true; + } + + // Again, only meaningful if value_type is a POD. + template bool read_nopointer_data(INPUT *fp) + { + for (ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it))) + return false; + return true; + } + + // If your keys and values are simple enough, we can write them + // to disk for you. "simple enough" means POD and no pointers. + // However, we don't try to normalize endianness. + // ------------------------------------------------------------ + template bool write_nopointer_data(OUTPUT *fp) const + { + for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it))) + return false; + return true; + } + + + // Comparisons. We only need to define == and < -- we get + // != > <= >= via relops.h (which we happily included above). + // Note the comparisons are pretty arbitrary: we compare + // values of the first index that isn't equal (using default + // value for empty buckets). + // --------------------------------------------------------- + bool operator==(const sparsegroup& x) const + { + return (_bitmap == x._bitmap && + _bm_erased == x._bm_erased && + std::equal(_group, _group + _num_items(), x._group)); + } + + bool operator<(const sparsegroup& x) const + { + // also from + return std::lexicographical_compare(_group, _group + _num_items(), + x._group, x._group + x._num_items()); + } + + bool operator!=(const sparsegroup& x) const { return !(*this == x); } + bool operator<=(const sparsegroup& x) const { return !(x < *this); } + bool operator> (const sparsegroup& x) const { return x < *this; } + bool operator>=(const sparsegroup& x) const { return !(*this < x); } + + void mark() { _group = (mutable_value_type *)static_cast(-1); } + bool is_marked() const { return _group == (mutable_value_type *)static_cast(-1); } + +private: + // --------------------------------------------------------------------------- + template + class alloc_impl : public A + { + public: + typedef typename A::pointer pointer; + typedef typename A::size_type size_type; + + // Convert a normal allocator to one that has realloc_or_die() + explicit alloc_impl(const A& a) : A(a) { } + + // realloc_or_die should only be used when using the default + // allocator (libc_allocator_with_realloc). + pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) + { + fprintf(stderr, "realloc_or_die is only supported for " + "libc_allocator_with_realloc\n"); + exit(1); + return NULL; + } + }; + + // A template specialization of alloc_impl for + // libc_allocator_with_realloc that can handle realloc_or_die. + // ----------------------------------------------------------- + template + class alloc_impl > + : public libc_allocator_with_realloc + { + public: + typedef typename libc_allocator_with_realloc::pointer pointer; + typedef typename libc_allocator_with_realloc::size_type size_type; + + explicit alloc_impl(const libc_allocator_with_realloc& a) + : libc_allocator_with_realloc(a) + { } + + pointer realloc_or_die(pointer ptr, size_type n) + { + pointer retval = this->reallocate(ptr, n); + if (retval == NULL) { + fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate " + "%lu elements for ptr %p", static_cast(n), ptr); + exit(1); + } + return retval; + } + }; + +#ifdef SPP_STORE_NUM_ITEMS + uint32_t _num_items() const { return (uint32_t)_num_buckets; } + void _set_num_items(uint32_t val) { _num_buckets = static_cast(val); } + void _incr_num_items() { ++_num_buckets; } + void _decr_num_items() { --_num_buckets; } + uint32_t _num_alloc() const { return (uint32_t)_num_allocated; } + void _set_num_alloc(uint32_t val) { _num_allocated = static_cast(val); } +#else + uint32_t _num_items() const { return spp_popcount(_bitmap); } + void _set_num_items(uint32_t ) { } + void _incr_num_items() { } + void _decr_num_items() { } + uint32_t _num_alloc() const { return _sizing(_num_items()); } + void _set_num_alloc(uint32_t val) { } +#endif + + // The actual data + // --------------- + mutable_value_type * _group; // (small) array of T's + group_bm_type _bitmap; + group_bm_type _bm_erased; // ones where items have been erased + +#ifdef SPP_STORE_NUM_ITEMS + size_type _num_buckets; + size_type _num_allocated; +#endif +}; + +// --------------------------------------------------------------------------- +// We need a global swap as well +// --------------------------------------------------------------------------- +template +inline void swap(sparsegroup &x, sparsegroup &y) +{ + x.swap(y); +} + +// --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +template > +class sparsetable +{ +private: + typedef typename Alloc::template rebind::other value_alloc_type; + + typedef typename Alloc::template rebind< + sparsegroup >::other group_alloc_type; + typedef typename group_alloc_type::size_type group_size_type; + + typedef T mutable_value_type; + typedef mutable_value_type* mutable_pointer; + typedef const mutable_value_type* const_mutable_pointer; + +public: + // Basic types + // ----------- + typedef typename spp::cvt::type value_type; + typedef Alloc allocator_type; + typedef typename value_alloc_type::size_type size_type; + typedef typename value_alloc_type::difference_type difference_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + typedef sparsegroup group_type; + + typedef group_type& GroupsReference; + typedef const group_type& GroupsConstReference; + + typedef typename group_type::ne_iterator ColIterator; + typedef typename group_type::const_ne_iterator ColConstIterator; + + typedef table_iterator > iterator; // defined with index + typedef const_table_iterator > const_iterator; // defined with index + typedef table_element_adaptor > element_adaptor; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + // These are our special iterators, that go over non-empty buckets in a + // table. These aren't const only because you can change non-empty bcks. + // ---------------------------------------------------------------------- + typedef Two_d_iterator ne_iterator; + + typedef Two_d_iterator const_ne_iterator; + + // Another special iterator: it frees memory as it iterates (used to resize). + // Obviously, you can only iterate over it once, which is why it's an input iterator + // --------------------------------------------------------------------------------- + typedef Two_d_destructive_iterator destructive_iterator; + + typedef std::reverse_iterator reverse_ne_iterator; + typedef std::reverse_iterator const_reverse_ne_iterator; + + + // Iterator functions + // ------------------ + iterator begin() { return iterator(this, 0); } + const_iterator begin() const { return const_iterator(this, 0); } + const_iterator cbegin() const { return const_iterator(this, 0); } + iterator end() { return iterator(this, size()); } + const_iterator end() const { return const_iterator(this, size()); } + const_iterator cend() const { return const_iterator(this, size()); } + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(cend()); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(cbegin()); } + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + // Versions for our special non-empty iterator + // ------------------------------------------ + ne_iterator ne_begin() { return ne_iterator (_first_group); } + const_ne_iterator ne_begin() const { return const_ne_iterator(_first_group); } + const_ne_iterator ne_cbegin() const { return const_ne_iterator(_first_group); } + ne_iterator ne_end() { return ne_iterator (_last_group); } + const_ne_iterator ne_end() const { return const_ne_iterator(_last_group); } + const_ne_iterator ne_cend() const { return const_ne_iterator(_last_group); } + + reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); } + const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_end()); } + const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_end()); } + reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); } + const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_begin()); } + const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_begin()); } + + destructive_iterator destructive_begin() + { + return destructive_iterator(_alloc, _first_group); + } + + destructive_iterator destructive_end() + { + return destructive_iterator(_alloc, _last_group); + } + + // How to deal with the proper group + static group_size_type num_groups(group_size_type num) + { + // how many to hold num buckets + return num == 0 ? (group_size_type)0 : + (group_size_type)(((num-1) / SPP_GROUP_SIZE) + 1); + } + + typename group_type::size_type pos_in_group(size_type i) const + { + return static_cast(i & SPP_MASK_); + } + + size_type group_num(size_type i) const + { + return (size_type)(i >> SPP_SHIFT_); + } + + GroupsReference which_group(size_type i) + { + return _first_group[group_num(i)]; + } + + GroupsConstReference which_group(size_type i) const + { + return _first_group[group_num(i)]; + } + + void _alloc_group_array(group_size_type sz, group_type *&first, group_type *&last) + { + if (sz) + { + first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker + first[sz].mark(); // for the ne_iterator + last = first + sz; + } + } + + void _free_group_array(group_type *&first, group_type *&last) + { + if (first) + { + _group_alloc.deallocate(first, (group_size_type)(last - first + 1)); // + 1 for end marker + first = last = 0; + } + } + + void _allocate_groups(size_type sz) + { + if (sz) + { + _alloc_group_array(sz, _first_group, _last_group); + std::uninitialized_fill(_first_group, _last_group, group_type()); + } + } + + void _free_groups() + { + if (_first_group) + { + for (group_type *g = _first_group; g != _last_group; ++g) + g->destruct(_alloc); + _free_group_array(_first_group, _last_group); + } + } + + void _cleanup() + { + _free_groups(); // sets _first_group = _last_group = 0 + _table_size = 0; + _num_buckets = 0; + } + + void _init() + { + _first_group = 0; + _last_group = 0; + _table_size = 0; + _num_buckets = 0; + } + + void _copy(const sparsetable &o) + { + _table_size = o._table_size; + _num_buckets = o._num_buckets; + _alloc = o._alloc; // todo - copy or move allocator according to... + _group_alloc = o._group_alloc; // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map + + group_size_type sz = (group_size_type)(o._last_group - o._first_group); + if (sz) + { + _alloc_group_array(sz, _first_group, _last_group); + for (group_size_type i=0; iswap(o); + } + + sparsetable(sparsetable&& o, const Alloc &alloc) + { + _init(); + this->swap(o); + _alloc = alloc; // [gp todo] is this correct? + } + + sparsetable& operator=(sparsetable&& o) + { + _cleanup(); + this->swap(o); + return *this; + } +#endif + + // Many STL algorithms use swap instead of copy constructors + void swap(sparsetable& o) + { + using std::swap; + + swap(_first_group, o._first_group); + swap(_last_group, o._last_group); + swap(_table_size, o._table_size); + swap(_num_buckets, o._num_buckets); + if (_alloc != o._alloc) + swap(_alloc, o._alloc); + if (_group_alloc != o._group_alloc) + swap(_group_alloc, o._group_alloc); + } + + // It's always nice to be able to clear a table without deallocating it + void clear() + { + _free_groups(); + _num_buckets = 0; + _table_size = 0; + } + + inline allocator_type get_allocator() const + { + return _alloc; + } + + + // Functions that tell you about size. + // NOTE: empty() is non-intuitive! It does not tell you the number + // of not-empty buckets (use num_nonempty() for that). Instead + // it says whether you've allocated any buckets or not. + // ---------------------------------------------------------------- + size_type size() const { return _table_size; } + size_type max_size() const { return _alloc.max_size(); } + bool empty() const { return _table_size == 0; } + size_type num_nonempty() const { return _num_buckets; } + + // OK, we'll let you resize one of these puppies + void resize(size_type new_size) + { + group_size_type sz = num_groups(new_size); + group_size_type old_sz = (group_size_type)(_last_group - _first_group); + + if (sz != old_sz) + { + // resize group array + // ------------------ + group_type *first = 0, *last = 0; + if (sz) + { + _alloc_group_array(sz, first, last); + memcpy(first, _first_group, sizeof(*first) * (std::min)(sz, old_sz)); + } + + if (sz < old_sz) + { + for (group_type *g = _first_group + sz; g != _last_group; ++g) + g->destruct(_alloc); + } + else + std::uninitialized_fill(first + old_sz, last, group_type()); + + _free_group_array(_first_group, _last_group); + _first_group = first; + _last_group = last; + } +#if 0 + // used only in test program + // todo: fix if sparsetable to be used directly + // -------------------------------------------- + if (new_size < _table_size) + { + // lower num_buckets, clear last group + if (pos_in_group(new_size) > 0) // need to clear inside last group + groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size), + groups.back().end()); + _num_buckets = 0; // refigure # of used buckets + for (const group_type *group = _first_group; group != _last_group; ++group) + _num_buckets += group->num_nonempty(); + } +#endif + _table_size = new_size; + } + + // We let you see if a bucket is non-empty without retrieving it + // ------------------------------------------------------------- + bool test(size_type i) const + { + // assert(i < _table_size); + return which_group(i).test(pos_in_group(i)); + } + + // also tests for erased values + // ---------------------------- + bool test_strict(size_type i) const + { + // assert(i < _table_size); + return which_group(i).test_strict(pos_in_group(i)); + } + + friend struct GrpPos; + + struct GrpPos + { + typedef typename sparsetable::ne_iterator ne_iter; + GrpPos(const sparsetable &table, size_type i) : + grp(table.which_group(i)), pos(table.pos_in_group(i)) {} + + bool test_strict() const { return grp.test_strict(pos); } + bool test() const { return grp.test(pos); } + typename sparsetable::reference unsafe_get() const { return grp.unsafe_get(pos); } + ne_iter get_iter(typename sparsetable::reference ref) + { + return ne_iter((group_type *)&grp, &ref); + } + + void erase(sparsetable &table) // item *must* be present + { + assert(table._num_buckets); + ((group_type &)grp).erase(table._alloc, pos); + --table._num_buckets; + } + + private: + GrpPos* operator=(const GrpPos&); + + const group_type &grp; + typename group_type::size_type pos; + }; + + bool test(iterator pos) const + { + return which_group(pos.pos).test(pos_in_group(pos.pos)); + } + + bool test(const_iterator pos) const + { + return which_group(pos.pos).test(pos_in_group(pos.pos)); + } + + // We only return const_references because it's really hard to + // return something settable for empty buckets. Use set() instead. + const_reference get(size_type i) const + { + assert(i < _table_size); + return which_group(i).get(pos_in_group(i)); + } + + // TODO(csilvers): make protected + friend + // This is used by sparse_hashtable to get an element from the table + // when we know it exists (because the caller has called test(i)). + // ----------------------------------------------------------------- + reference unsafe_get(size_type i) const + { + assert(i < _table_size); + // assert(test(i)); + return which_group(i).unsafe_get(pos_in_group(i)); + } + + // TODO(csilvers): make protected + friend element_adaptor + reference mutating_get(size_type i) + { + // fills bucket i before getting + assert(i < _table_size); + + GroupsReference grp(which_group(i)); + typename group_type::size_type old_numbuckets = grp.num_nonempty(); + reference retval = grp.mutating_get(_alloc, pos_in_group(i)); + _num_buckets += grp.num_nonempty() - old_numbuckets; + return retval; + } + + // Syntactic sugar. As in sparsegroup, the non-const version is harder + const_reference operator[](size_type i) const + { + return get(i); + } + + element_adaptor operator[](size_type i) + { + return element_adaptor(this, i); + } + + // Needed for hashtables, gets as a ne_iterator. Crashes for empty bcks + const_ne_iterator get_iter(size_type i) const + { + //assert(test(i)); // how can a ne_iterator point to an empty bucket? + + size_type grp_idx = group_num(i); + + return const_ne_iterator(_first_group + grp_idx, + (_first_group[grp_idx].ne_begin() + + _first_group[grp_idx].pos_to_offset(pos_in_group(i)))); + } + + const_ne_iterator get_iter(size_type i, ColIterator col_it) const + { + return const_ne_iterator(_first_group + group_num(i), col_it); + } + + // For nonempty we can return a non-const version + ne_iterator get_iter(size_type i) + { + //assert(test(i)); // how can a nonempty_iterator point to an empty bucket? + + size_type grp_idx = group_num(i); + + return ne_iterator(_first_group + grp_idx, + (_first_group[grp_idx].ne_begin() + + _first_group[grp_idx].pos_to_offset(pos_in_group(i)))); + } + + ne_iterator get_iter(size_type i, ColIterator col_it) + { + return ne_iterator(_first_group + group_num(i), col_it); + } + + // And the reverse transformation. + size_type get_pos(const const_ne_iterator& it) const + { + difference_type current_row = it.row_current - _first_group; + difference_type current_col = (it.col_current - _first_group[current_row].ne_begin()); + return ((current_row * SPP_GROUP_SIZE) + + _first_group[current_row].offset_to_pos(current_col)); + } + + // This returns a reference to the inserted item (which is a copy of val) + // The trick is to figure out whether we're replacing or inserting anew + // ---------------------------------------------------------------------- + reference set(size_type i, const_reference val, bool erased = false) + { + assert(i < _table_size); + group_type &group = which_group(i); + typename group_type::size_type old_numbuckets = group.num_nonempty(); + typename group_type::SetResult sr(group.set(_alloc, pos_in_group(i), erased)); + if (!sr.second) + ::new (sr.first) mutable_value_type(val); + else + *sr.first = spp_const_mutable_ref(val); + _num_buckets += group.num_nonempty() - old_numbuckets; + return *((pointer)sr.first); + } + + // used in _move_from (where we can move the old value instead of copying it + void move(size_type i, reference val) + { + assert(i < _table_size); + which_group(i).move(_alloc, pos_in_group(i), val); + ++_num_buckets; + } + + // This takes the specified elements out of the table. + // -------------------------------------------------- + void erase(size_type i) + { + assert(i < _table_size); + + GroupsReference grp(which_group(i)); + typename group_type::size_type old_numbuckets = grp.num_nonempty(); + grp.erase(_alloc, pos_in_group(i)); + _num_buckets += grp.num_nonempty() - old_numbuckets; + } + + void erase(iterator pos) + { + erase(pos.pos); + } + + void erase(iterator start_it, iterator end_it) + { + // This could be more efficient, but then we'd need to figure + // out if we spanned groups or not. Doesn't seem worth it. + for (; start_it != end_it; ++start_it) + erase(start_it); + } + + const_ne_iterator erase(const_ne_iterator it) + { + ne_iterator res(it); + if (res.row_current->erase_ne(_alloc, res)) + _num_buckets--; + return res; + } + + const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l) + { + size_t diff = l - f; + while (diff--) + f = erase(f); + return f; + } + + // We support reading and writing tables to disk. We don't store + // the actual array contents (which we don't know how to store), + // just the groups and sizes. Returns true if all went ok. + +private: + // Every time the disk format changes, this should probably change too + typedef unsigned long MagicNumberType; + static const MagicNumberType MAGIC_NUMBER = 0x24687531; + + // Old versions of this code write all data in 32 bits. We need to + // support these files as well as having support for 64-bit systems. + // So we use the following encoding scheme: for values < 2^32-1, we + // store in 4 bytes in big-endian order. For values > 2^32, we + // store 0xFFFFFFF followed by 8 bytes in big-endian order. This + // causes us to mis-read old-version code that stores exactly + // 0xFFFFFFF, but I don't think that is likely to have happened for + // these particular values. + template + static bool write_32_or_64(OUTPUT* fp, IntType value) + { + if (value < 0xFFFFFFFFULL) { // fits in 4 bytes + if (!sparsehash_internal::write_bigendian_number(fp, value, 4)) + return false; + } + else + { + if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4)) + return false; + if (!sparsehash_internal::write_bigendian_number(fp, value, 8)) + return false; + } + return true; + } + + template + static bool read_32_or_64(INPUT* fp, IntType *value) + { // reads into value + MagicNumberType first4 = 0; // a convenient 32-bit unsigned type + if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4)) + return false; + + if (first4 < 0xFFFFFFFFULL) + { + *value = first4; + } + else + { + if (!sparsehash_internal::read_bigendian_number(fp, value, 8)) + return false; + } + return true; + } + +public: + // read/write_metadata() and read_write/nopointer_data() are DEPRECATED. + // Use serialize() and unserialize(), below, for new code. + + template + bool write_metadata(OUTPUT *fp) const + { + if (!write_32_or_64(fp, MAGIC_NUMBER)) return false; + if (!write_32_or_64(fp, _table_size)) return false; + if (!write_32_or_64(fp, _num_buckets)) return false; + + for (const group_type *group = _first_group; group != _last_group; ++group) + if (group->write_metadata(fp) == false) + return false; + return true; + } + + // Reading destroys the old table contents! Returns true if read ok. + template + bool read_metadata(INPUT *fp) + { + size_type magic_read = 0; + if (!read_32_or_64(fp, &magic_read)) return false; + if (magic_read != MAGIC_NUMBER) + { + clear(); // just to be consistent + return false; + } + + if (!read_32_or_64(fp, &_table_size)) return false; + if (!read_32_or_64(fp, &_num_buckets)) return false; + + resize(_table_size); // so the vector's sized ok + for (group_type *group = _first_group; group != _last_group; ++group) + if (group->read_metadata(_alloc, fp) == false) + return false; + return true; + } + + // This code is identical to that for SparseGroup + // If your keys and values are simple enough, we can write them + // to disk for you. "simple enough" means no pointers. + // However, we don't try to normalize endianness + bool write_nopointer_data(FILE *fp) const + { + for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!fwrite(&*it, sizeof(*it), 1, fp)) + return false; + return true; + } + + // When reading, we have to override the potential const-ness of *it + bool read_nopointer_data(FILE *fp) + { + for (ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!fread(reinterpret_cast(&(*it)), sizeof(*it), 1, fp)) + return false; + return true; + } + + // INPUT and OUTPUT must be either a FILE, *or* a C++ stream + // (istream, ostream, etc) *or* a class providing + // Read(void*, size_t) and Write(const void*, size_t) + // (respectively), which writes a buffer into a stream + // (which the INPUT/OUTPUT instance presumably owns). + + typedef sparsehash_internal::pod_serializer NopointerSerializer; + + // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) + template + bool serialize(ValueSerializer serializer, OUTPUT *fp) + { + if (!write_metadata(fp)) + return false; + for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!serializer(fp, *it)) + return false; + return true; + } + + // ValueSerializer: a functor. operator()(INPUT*, value_type*) + template + bool unserialize(ValueSerializer serializer, INPUT *fp) + { + clear(); + if (!read_metadata(fp)) + return false; + for (ne_iterator it = ne_begin(); it != ne_end(); ++it) + if (!serializer(fp, &*it)) + return false; + return true; + } + + // Comparisons. Note the comparisons are pretty arbitrary: we + // compare values of the first index that isn't equal (using default + // value for empty buckets). + bool operator==(const sparsetable& x) const + { + return (_table_size == x._table_size && + _num_buckets == x._num_buckets && + _first_group == x._first_group); + } + + bool operator<(const sparsetable& x) const + { + return std::lexicographical_compare(begin(), end(), x.begin(), x.end()); + } + bool operator!=(const sparsetable& x) const { return !(*this == x); } + bool operator<=(const sparsetable& x) const { return !(x < *this); } + bool operator>(const sparsetable& x) const { return x < *this; } + bool operator>=(const sparsetable& x) const { return !(*this < x); } + + +private: + // The actual data + // --------------- + group_type * _first_group; + group_type * _last_group; + size_type _table_size; // how many buckets they want + size_type _num_buckets; // number of non-empty buckets + group_alloc_type _group_alloc; + value_alloc_type _alloc; +}; + +// We need a global swap as well +// --------------------------------------------------------------------------- +template +inline void swap(sparsetable &x, sparsetable &y) +{ + x.swap(y); +} + + +// ---------------------------------------------------------------------- +// S P A R S E _ H A S H T A B L E +// ---------------------------------------------------------------------- +// Hashtable class, used to implement the hashed associative containers +// hash_set and hash_map. +// +// Value: what is stored in the table (each bucket is a Value). +// Key: something in a 1-to-1 correspondence to a Value, that can be used +// to search for a Value in the table (find() takes a Key). +// HashFcn: Takes a Key and returns an integer, the more unique the better. +// ExtractKey: given a Value, returns the unique Key associated with it. +// Must inherit from unary_function, or at least have a +// result_type enum indicating the return type of operator(). +// EqualKey: Given two Keys, says whether they are the same (that is, +// if they are both associated with the same Value). +// Alloc: STL allocator to use to allocate memory. +// +// ---------------------------------------------------------------------- + +// The probing method +// ------------------ +// Linear probing +// #define JUMP_(key, num_probes) ( 1 ) +// Quadratic probing +#define JUMP_(key, num_probes) ( num_probes ) + + +// ------------------------------------------------------------------- +// ------------------------------------------------------------------- +template +class sparse_hashtable +{ +private: + typedef Value mutable_value_type; + typedef typename Alloc::template rebind::other value_alloc_type; + +public: + typedef Key key_type; + typedef typename spp::cvt::type value_type; + typedef HashFcn hasher; + typedef EqualKey key_equal; + typedef Alloc allocator_type; + + typedef typename value_alloc_type::size_type size_type; + typedef typename value_alloc_type::difference_type difference_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + // Table is the main storage class. + typedef sparsetable Table; + typedef typename Table::ne_iterator ne_it; + typedef typename Table::const_ne_iterator cne_it; + typedef typename Table::destructive_iterator dest_it; + typedef typename Table::ColIterator ColIterator; + + typedef ne_it iterator; + typedef cne_it const_iterator; + typedef dest_it destructive_iterator; + + // These come from tr1. For us they're the same as regular iterators. + // ------------------------------------------------------------------- + typedef iterator local_iterator; + typedef const_iterator const_local_iterator; + + // How full we let the table get before we resize + // ---------------------------------------------- + static const int HT_OCCUPANCY_PCT; // = 80 (out of 100); + + // How empty we let the table get before we resize lower, by default. + // (0.0 means never resize lower.) + // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing + // ------------------------------------------------------------------ + static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT; + + // Minimum size we're willing to let hashtables be. + // Must be a power of two, and at least 4. + // Note, however, that for a given hashtable, the initial size is a + // function of the first constructor arg, and may be >HT_MIN_BUCKETS. + // ------------------------------------------------------------------ + static const size_type HT_MIN_BUCKETS = 4; + + // By default, if you don't specify a hashtable size at + // construction-time, we use this size. Must be a power of two, and + // at least HT_MIN_BUCKETS. + // ----------------------------------------------------------------- + static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; + + // iterators + // --------- + iterator begin() { return _mk_iterator(table.ne_begin()); } + iterator end() { return _mk_iterator(table.ne_end()); } + const_iterator begin() const { return _mk_const_iterator(table.ne_cbegin()); } + const_iterator end() const { return _mk_const_iterator(table.ne_cend()); } + const_iterator cbegin() const { return _mk_const_iterator(table.ne_cbegin()); } + const_iterator cend() const { return _mk_const_iterator(table.ne_cend()); } + + // These come from tr1 unordered_map. They iterate over 'bucket' n. + // For sparsehashtable, we could consider each 'group' to be a bucket, + // I guess, but I don't really see the point. We'll just consider + // bucket n to be the n-th element of the sparsetable, if it's occupied, + // or some empty element, otherwise. + // --------------------------------------------------------------------- + local_iterator begin(size_type i) + { + return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end()); + } + + local_iterator end(size_type i) + { + local_iterator it = begin(i); + if (table.test(i)) + ++it; + return _mk_iterator(it); + } + + const_local_iterator begin(size_type i) const + { + return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend()); + } + + const_local_iterator end(size_type i) const + { + const_local_iterator it = begin(i); + if (table.test(i)) + ++it; + return _mk_const_iterator(it); + } + + const_local_iterator cbegin(size_type i) const { return begin(i); } + const_local_iterator cend(size_type i) const { return end(i); } + + // This is used when resizing + // -------------------------- + destructive_iterator destructive_begin() { return _mk_destructive_iterator(table.destructive_begin()); } + destructive_iterator destructive_end() { return _mk_destructive_iterator(table.destructive_end()); } + + + // accessor functions for the things we templatize on, basically + // ------------------------------------------------------------- + hasher hash_funct() const { return settings; } + key_equal key_eq() const { return key_info; } + allocator_type get_allocator() const { return table.get_allocator(); } + + // Accessor function for statistics gathering. + unsigned int num_table_copies() const { return settings.num_ht_copies(); } + +private: + // This is used as a tag for the copy constructor, saying to destroy its + // arg We have two ways of destructively copying: with potentially growing + // the hashtable as we copy, and without. To make sure the outside world + // can't do a destructive copy, we make the typename private. + // ----------------------------------------------------------------------- + enum MoveDontCopyT {MoveDontCopy, MoveDontGrow}; + + void _squash_deleted() + { + // gets rid of any deleted entries we have + // --------------------------------------- + if (num_deleted) + { + // get rid of deleted before writing + sparse_hashtable tmp(MoveDontGrow, *this); + swap(tmp); // now we are tmp + } + assert(num_deleted == 0); + } + + // creating iterators from sparsetable::ne_iterators + // ------------------------------------------------- + iterator _mk_iterator(ne_it it) const { return it; } + const_iterator _mk_const_iterator(cne_it it) const { return it; } + destructive_iterator _mk_destructive_iterator(dest_it it) const { return it; } + +public: + size_type size() const { return table.num_nonempty(); } + size_type max_size() const { return table.max_size(); } + bool empty() const { return size() == 0; } + size_type bucket_count() const { return table.size(); } + size_type max_bucket_count() const { return max_size(); } + // These are tr1 methods. Their idea of 'bucket' doesn't map well to + // what we do. We just say every bucket has 0 or 1 items in it. + size_type bucket_size(size_type i) const + { + return (size_type)(begin(i) == end(i) ? 0 : 1); + } + +private: + // Because of the above, size_type(-1) is never legal; use it for errors + // --------------------------------------------------------------------- + static const size_type ILLEGAL_BUCKET = size_type(-1); + + // Used after a string of deletes. Returns true if we actually shrunk. + // TODO(csilvers): take a delta so we can take into account inserts + // done after shrinking. Maybe make part of the Settings class? + // -------------------------------------------------------------------- + bool _maybe_shrink() + { + assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two + assert(bucket_count() >= HT_MIN_BUCKETS); + bool retval = false; + + // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, + // we'll never shrink until you get relatively big, and we'll never + // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something + // like "dense_hash_set x; x.insert(4); x.erase(4);" will + // shrink us down to HT_MIN_BUCKETS buckets, which is too small. + // --------------------------------------------------------------- + const size_type num_remain = table.num_nonempty(); + const size_type shrink_threshold = settings.shrink_threshold(); + if (shrink_threshold > 0 && num_remain < shrink_threshold && + bucket_count() > HT_DEFAULT_STARTING_BUCKETS) + { + const float shrink_factor = settings.shrink_factor(); + size_type sz = (size_type)(bucket_count() / 2); // find how much we should shrink + while (sz > HT_DEFAULT_STARTING_BUCKETS && + num_remain < static_cast(sz * shrink_factor)) + { + sz /= 2; // stay a power of 2 + } + sparse_hashtable tmp(MoveDontCopy, *this, sz); + swap(tmp); // now we are tmp + retval = true; + } + settings.set_consider_shrink(false); // because we just considered it + return retval; + } + + // We'll let you resize a hashtable -- though this makes us copy all! + // When you resize, you say, "make it big enough for this many more elements" + // Returns true if we actually resized, false if size was already ok. + // -------------------------------------------------------------------------- + bool _resize_delta(size_type delta) + { + bool did_resize = false; + if (settings.consider_shrink()) + { + // see if lots of deletes happened + if (_maybe_shrink()) + did_resize = true; + } + if (table.num_nonempty() >= + (std::numeric_limits::max)() - delta) + { + throw_exception(std::length_error("resize overflow")); + } + + size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted); + + if (bucket_count() >= HT_MIN_BUCKETS && + (num_occupied + delta) <= settings.enlarge_threshold()) + return did_resize; // we're ok as we are + + // Sometimes, we need to resize just to get rid of all the + // "deleted" buckets that are clogging up the hashtable. So when + // deciding whether to resize, count the deleted buckets (which + // are currently taking up room). + // ------------------------------------------------------------- + const size_type needed_size = + settings.min_buckets((size_type)(num_occupied + delta), (size_type)0); + + if (needed_size <= bucket_count()) // we have enough buckets + return did_resize; + + size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta), bucket_count()); + + if (resize_to < needed_size && // may double resize_to + resize_to < (std::numeric_limits::max)() / 2) + { + // This situation means that we have enough deleted elements, + // that once we purge them, we won't actually have needed to + // grow. But we may want to grow anyway: if we just purge one + // element, say, we'll have to grow anyway next time we + // insert. Might as well grow now, since we're already going + // through the trouble of copying (in order to purge the + // deleted elements). + const size_type target = + static_cast(settings.shrink_size((size_type)(resize_to*2))); + if (table.num_nonempty() + delta >= target) + { + // Good, we won't be below the shrink threshhold even if we double. + resize_to *= 2; + } + } + + sparse_hashtable tmp(MoveDontCopy, *this, resize_to); + swap(tmp); // now we are tmp + return true; + } + + // Used to actually do the rehashing when we grow/shrink a hashtable + // ----------------------------------------------------------------- + void _copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted) + { + clear(); // clear table, set num_deleted to 0 + + // If we need to change the size of our table, do it now + const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted); + + if (resize_to > bucket_count()) + { + // we don't have enough buckets + table.resize(resize_to); // sets the number of buckets + settings.reset_thresholds(bucket_count()); + } + + // We use a normal iterator to get bcks from ht + // We could use insert() here, but since we know there are + // no duplicates, we can be more efficient + assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two + for (const_iterator it = ht.begin(); it != ht.end(); ++it) + { + size_type num_probes = 0; // how many times we've probed + size_type bucknum; + const size_type bucket_count_minus_one = bucket_count() - 1; + for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; + table.test(bucknum); // table.test() OK since no erase() + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) + { + ++num_probes; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + table.set(bucknum, *it, false); // copies the value to here + } + settings.inc_num_ht_copies(); + } + + // Implementation is like _copy_from, but it destroys the table of the + // "from" guy by freeing sparsetable memory as we iterate. This is + // useful in resizing, since we're throwing away the "from" guy anyway. + // -------------------------------------------------------------------- + void _move_from(MoveDontCopyT mover, sparse_hashtable &ht, + size_type min_buckets_wanted) + { + clear(); + + // If we need to change the size of our table, do it now + size_type resize_to; + if (mover == MoveDontGrow) + resize_to = ht.bucket_count(); // keep same size as old ht + else // MoveDontCopy + resize_to = settings.min_buckets(ht.size(), min_buckets_wanted); + if (resize_to > bucket_count()) + { + // we don't have enough buckets + table.resize(resize_to); // sets the number of buckets + settings.reset_thresholds(bucket_count()); + } + + // We use a normal iterator to get bcks from ht + // We could use insert() here, but since we know there are + // no duplicates, we can be more efficient + assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two + const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1); + + // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): + for (destructive_iterator it = ht.destructive_begin(); + it != ht.destructive_end(); ++it) + { + size_type num_probes = 0; + size_type bucknum; + for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; + table.test(bucknum); // table.test() OK since no erase() + bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & (bucket_count()-1))) + { + ++num_probes; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + table.move(bucknum, *it); // moves the value to here + } + settings.inc_num_ht_copies(); + } + + + // Required by the spec for hashed associative container +public: + // Though the docs say this should be num_buckets, I think it's much + // more useful as num_elements. As a special feature, calling with + // req_elements==0 will cause us to shrink if we can, saving space. + // ----------------------------------------------------------------- + void resize(size_type req_elements) + { + // resize to this or larger + if (settings.consider_shrink() || req_elements == 0) + _maybe_shrink(); + if (req_elements > table.num_nonempty()) // we only grow + _resize_delta((size_type)(req_elements - table.num_nonempty())); + } + + // Get and change the value of shrink_factor and enlarge_factor. The + // description at the beginning of this file explains how to choose + // the values. Setting the shrink parameter to 0.0 ensures that the + // table never shrinks. + // ------------------------------------------------------------------ + void get_resizing_parameters(float* shrink, float* grow) const + { + *shrink = settings.shrink_factor(); + *grow = settings.enlarge_factor(); + } + + float get_shrink_factor() const { return settings.shrink_factor(); } + float get_enlarge_factor() const { return settings.enlarge_factor(); } + + void set_resizing_parameters(float shrink, float grow) { + settings.set_resizing_parameters(shrink, grow); + settings.reset_thresholds(bucket_count()); + } + + void set_shrink_factor(float shrink) + { + set_resizing_parameters(shrink, get_enlarge_factor()); + } + + void set_enlarge_factor(float grow) + { + set_resizing_parameters(get_shrink_factor(), grow); + } + + // CONSTRUCTORS -- as required by the specs, we take a size, + // but also let you specify a hashfunction, key comparator, + // and key extractor. We also define a copy constructor and =. + // DESTRUCTOR -- the default is fine, surprisingly. + // ------------------------------------------------------------ + explicit sparse_hashtable(size_type expected_max_items_in_table = 0, + const HashFcn& hf = HashFcn(), + const EqualKey& eql = EqualKey(), + const ExtractKey& ext = ExtractKey(), + const SetKey& set = SetKey(), + const Alloc& alloc = Alloc()) + : settings(hf), + key_info(ext, set, eql), + num_deleted(0), + table((expected_max_items_in_table == 0 + ? HT_DEFAULT_STARTING_BUCKETS + : settings.min_buckets(expected_max_items_in_table, 0)), + value_alloc_type(alloc)) + { + settings.reset_thresholds(bucket_count()); + } + + // As a convenience for resize(), we allow an optional second argument + // which lets you make this new hashtable a different size than ht. + // We also provide a mechanism of saying you want to "move" the ht argument + // into us instead of copying. + // ------------------------------------------------------------------------ + sparse_hashtable(const sparse_hashtable& ht, + size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) + : settings(ht.settings), + key_info(ht.key_info), + num_deleted(0), + table(0) + { + settings.reset_thresholds(bucket_count()); + _copy_from(ht, min_buckets_wanted); + } + +#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES) + + sparse_hashtable(sparse_hashtable&& o) : + settings(std::move(o.settings)), + key_info(std::move(o.key_info)), + num_deleted(o.num_deleted), + table(std::move(o.table)) + { + } + + sparse_hashtable(sparse_hashtable&& o, const Alloc& alloc) : + settings(std::move(o.settings)), + key_info(std::move(o.key_info)), + num_deleted(o.num_deleted), + table(std::move(o.table), alloc) + { + } + + sparse_hashtable& operator=(sparse_hashtable&& o) + { + using std::swap; + + sparse_hashtable tmp(std::move(o)); + swap(tmp, *this); + return *this; + } +#endif + + sparse_hashtable(MoveDontCopyT mover, + sparse_hashtable& ht, + size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) + : settings(ht.settings), + key_info(ht.key_info), + num_deleted(0), + table(min_buckets_wanted, ht.table.get_allocator()) + { + settings.reset_thresholds(bucket_count()); + _move_from(mover, ht, min_buckets_wanted); + } + + sparse_hashtable& operator=(const sparse_hashtable& ht) + { + if (&ht == this) + return *this; // don't copy onto ourselves + settings = ht.settings; + key_info = ht.key_info; + num_deleted = ht.num_deleted; + + // _copy_from() calls clear and sets num_deleted to 0 too + _copy_from(ht, HT_MIN_BUCKETS); + + // we purposefully don't copy the allocator, which may not be copyable + return *this; + } + + // Many STL algorithms use swap instead of copy constructors + void swap(sparse_hashtable& ht) + { + using std::swap; + + swap(settings, ht.settings); + swap(key_info, ht.key_info); + swap(num_deleted, ht.num_deleted); + table.swap(ht.table); + settings.reset_thresholds(bucket_count()); // also resets consider_shrink + ht.settings.reset_thresholds(ht.bucket_count()); + // we purposefully don't swap the allocator, which may not be swap-able + } + + // It's always nice to be able to clear a table without deallocating it + void clear() + { + if (!empty() || num_deleted != 0) + { + table.clear(); + table = Table(HT_DEFAULT_STARTING_BUCKETS); + } + settings.reset_thresholds(bucket_count()); + num_deleted = 0; + } + + // LOOKUP ROUTINES +private: + + enum pos_type { pt_empty = 0, pt_erased, pt_full }; + // ------------------------------------------------------------------- + class Position + { + public: + + Position() : _t(pt_empty) {} + Position(pos_type t, size_type idx) : _t(t), _idx(idx) {} + + pos_type _t; + size_type _idx; + }; + + // Returns a pair: + // - 'first' is a code, 2 if key already present, 0 or 1 otherwise. + // - 'second' is a position, where the key should go + // Note: because of deletions where-to-insert is not trivial: it's the + // first deleted bucket we see, as long as we don't find the key later + // ------------------------------------------------------------------- + Position _find_position(const key_type &key) const + { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1); + size_type bucknum = hash(key) & bucket_count_minus_one; + Position pos; + + while (1) + { + // probe until something happens + // ----------------------------- + typename Table::GrpPos grp_pos(table, bucknum); + + if (!grp_pos.test_strict()) + { + // bucket is empty => key not present + return pos._t ? pos : Position(pt_empty, bucknum); + } + else if (grp_pos.test()) + { + reference ref(grp_pos.unsafe_get()); + + if (equals(key, get_key(ref))) + return Position(pt_full, bucknum); + } + else if (pos._t == pt_empty) + { + // first erased position + pos._t = pt_erased; + pos._idx = bucknum; + } + + ++num_probes; // we're doing another probe + bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one); + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + +public: + // I hate to duplicate find() like that, but it is + // significantly faster to not have the intermediate pair + // ------------------------------------------------------------------ + iterator find(const key_type& key) + { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = bucket_count() - 1; + size_type bucknum = hash(key) & bucket_count_minus_one; + + while (1) // probe until something happens + { + typename Table::GrpPos grp_pos(table, bucknum); + + if (!grp_pos.test_strict()) + return end(); // bucket is empty + if (grp_pos.test()) + { + reference ref(grp_pos.unsafe_get()); + + if (equals(key, get_key(ref))) + return grp_pos.get_iter(ref); + } + ++num_probes; // we're doing another probe + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + + // Wish I could avoid the duplicate find() const and non-const. + // ------------------------------------------------------------ + const_iterator find(const key_type& key) const + { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = bucket_count() - 1; + size_type bucknum = hash(key) & bucket_count_minus_one; + + while (1) // probe until something happens + { + typename Table::GrpPos grp_pos(table, bucknum); + + if (!grp_pos.test_strict()) + return end(); // bucket is empty + else if (grp_pos.test()) + { + reference ref(grp_pos.unsafe_get()); + + if (equals(key, get_key(ref))) + return _mk_const_iterator(table.get_iter(bucknum, &ref)); + } + ++num_probes; // we're doing another probe + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + + // This is a tr1 method: the bucket a given key is in, or what bucket + // it would be put in, if it were to be inserted. Shrug. + // ------------------------------------------------------------------ + size_type bucket(const key_type& key) const + { + Position pos = _find_position(key); + return pos._idx; + } + + // Counts how many elements have key key. For maps, it's either 0 or 1. + // --------------------------------------------------------------------- + size_type count(const key_type &key) const + { + Position pos = _find_position(key); + return (size_type)(pos._t == pt_full ? 1 : 0); + } + + // Likewise, equal_range doesn't really make sense for us. Oh well. + // ----------------------------------------------------------------- + std::pair equal_range(const key_type& key) + { + iterator pos = find(key); // either an iterator or end + if (pos == end()) + return std::pair(pos, pos); + else + { + const iterator startpos = pos++; + return std::pair(startpos, pos); + } + } + + std::pair equal_range(const key_type& key) const + { + const_iterator pos = find(key); // either an iterator or end + if (pos == end()) + return std::pair(pos, pos); + else + { + const const_iterator startpos = pos++; + return std::pair(startpos, pos); + } + } + + + // INSERTION ROUTINES +private: + // Private method used by insert_noresize and find_or_insert. + reference _insert_at(const_reference obj, size_type pos, bool erased) + { + if (size() >= max_size()) + { + throw_exception(std::length_error("insert overflow")); + } + if (erased) + { + assert(num_deleted); + --num_deleted; + } + return table.set(pos, obj, erased); + } + + // If you know *this is big enough to hold obj, use this routine + std::pair _insert_noresize(const_reference obj) + { + Position pos = _find_position(get_key(obj)); + bool already_there = (pos._t == pt_full); + + if (!already_there) + { + reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased)); + return std::pair(_mk_iterator(table.get_iter(pos._idx, &ref)), true); + } + return std::pair(_mk_iterator(table.get_iter(pos._idx)), false); + } + + // Specializations of insert(it, it) depending on the power of the iterator: + // (1) Iterator supports operator-, resize before inserting + template + void _insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag /*unused*/) + { + int64_t dist = std::distance(f, l); + if (dist < 0 || static_cast(dist) >= (std::numeric_limits::max)()) + throw_exception(std::length_error("insert-range overflow")); + + _resize_delta(static_cast(dist)); + + for (; dist > 0; --dist, ++f) + _insert_noresize(*f); + } + + // (2) Arbitrary iterator, can't tell how much to resize + template + void _insert(InputIterator f, InputIterator l, std::input_iterator_tag /*unused*/) + { + for (; f != l; ++f) + _insert(*f); + } + +public: + +#if 0 && !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES) + template + pair emplace(Args&&... args) + { + return rep.emplace_unique(std::forward(args)...); + } + + template + iterator emplace_hint(const_iterator p, Args&&... args) + { + return rep.emplace_unique(std::forward(args)...).first; + } +#endif + + // This is the normal insert routine, used by the outside world + std::pair insert(const_reference obj) + { + _resize_delta(1); // adding an object, grow if need be + return _insert_noresize(obj); + } + + // When inserting a lot at a time, we specialize on the type of iterator + template + void insert(InputIterator f, InputIterator l) + { + // specializes on iterator type + _insert(f, l, + typename std::iterator_traits::iterator_category()); + } + + // DefaultValue is a functor that takes a key and returns a value_type + // representing the default value to be inserted if none is found. + template + value_type& find_or_insert(const key_type& key) + { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = bucket_count() - 1; + size_type bucknum = hash(key) & bucket_count_minus_one; + DefaultValue default_value; + size_type erased_pos = 0; + bool erased = false; + + while (1) // probe until something happens + { + typename Table::GrpPos grp_pos(table, bucknum); + + if (!grp_pos.test_strict()) + { + // not found + if (_resize_delta(1)) + { + // needed to rehash to make room + // Since we resized, we can't use pos, so recalculate where to insert. + return *(_insert_noresize(default_value(key)).first); + } + else + { + // no need to rehash, insert right here + return _insert_at(default_value(key), erased ? erased_pos : bucknum, erased); + } + } + if (grp_pos.test()) + { + reference ref(grp_pos.unsafe_get()); + + if (equals(key, get_key(ref))) + return ref; + } + else if (!erased) + { + // first erased position + erased_pos = bucknum; + erased = true; + } + + ++num_probes; // we're doing another probe + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + + size_type erase(const key_type& key) + { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = bucket_count() - 1; + size_type bucknum = hash(key) & bucket_count_minus_one; + + while (1) // probe until something happens + { + typename Table::GrpPos grp_pos(table, bucknum); + + if (!grp_pos.test_strict()) + return 0; // bucket is empty, we deleted nothing + if (grp_pos.test()) + { + reference ref(grp_pos.unsafe_get()); + + if (equals(key, get_key(ref))) + { + grp_pos.erase(table); + ++num_deleted; + settings.set_consider_shrink(true); // will think about shrink after next insert + return 1; // because we deleted one thing + } + } + ++num_probes; // we're doing another probe + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + + const_iterator erase(const_iterator pos) + { + if (pos == cend()) + return cend(); // sanity check + + const_iterator nextpos = table.erase(pos); + ++num_deleted; + settings.set_consider_shrink(true); + return nextpos; + } + + const_iterator erase(const_iterator f, const_iterator l) + { + if (f == cend()) + return cend(); // sanity check + + size_type num_before = table.num_nonempty(); + const_iterator nextpos = table.erase(f, l); + num_deleted += num_before - table.num_nonempty(); + settings.set_consider_shrink(true); + return nextpos; + } + + // Deleted key routines - just to keep google test framework happy + // we don't actually use the deleted key + // --------------------------------------------------------------- + void set_deleted_key(const key_type& key) + { + _squash_deleted(); + key_info.delkey = key; + } + + void clear_deleted_key() + { + _squash_deleted(); + } + + key_type deleted_key() const + { + return key_info.delkey; + } + + + bool operator==(const sparse_hashtable& ht) const + { + if (this == &ht) + return true; + + if (size() != ht.size()) + return false; + + for (const_iterator it = begin(); it != end(); ++it) + { + const_iterator it2 = ht.find(get_key(*it)); + if ((it2 == ht.end()) || (*it != *it2)) + return false; + } + + return true; + } + + bool operator!=(const sparse_hashtable& ht) const + { + return !(*this == ht); + } + + + // I/O + // We support reading and writing hashtables to disk. NOTE that + // this only stores the hashtable metadata, not the stuff you've + // actually put in the hashtable! Alas, since I don't know how to + // write a hasher or key_equal, you have to make sure everything + // but the table is the same. We compact before writing. + // + // The OUTPUT type needs to support a Write() operation. File and + // OutputBuffer are appropriate types to pass in. + // + // The INPUT type needs to support a Read() operation. File and + // InputBuffer are appropriate types to pass in. + // ------------------------------------------------------------- + template + bool write_metadata(OUTPUT *fp) + { + _squash_deleted(); // so we don't have to worry about delkey + return table.write_metadata(fp); + } + + template + bool read_metadata(INPUT *fp) + { + num_deleted = 0; // since we got rid before writing + const bool result = table.read_metadata(fp); + settings.reset_thresholds(bucket_count()); + return result; + } + + // Only meaningful if value_type is a POD. + template + bool write_nopointer_data(OUTPUT *fp) + { + return table.write_nopointer_data(fp); + } + + // Only meaningful if value_type is a POD. + template + bool read_nopointer_data(INPUT *fp) + { + return table.read_nopointer_data(fp); + } + + // INPUT and OUTPUT must be either a FILE, *or* a C++ stream + // (istream, ostream, etc) *or* a class providing + // Read(void*, size_t) and Write(const void*, size_t) + // (respectively), which writes a buffer into a stream + // (which the INPUT/OUTPUT instance presumably owns). + + typedef sparsehash_internal::pod_serializer NopointerSerializer; + + // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) + template + bool serialize(ValueSerializer serializer, OUTPUT *fp) + { + _squash_deleted(); // so we don't have to worry about delkey + return table.serialize(serializer, fp); + } + + // ValueSerializer: a functor. operator()(INPUT*, value_type*) + template + bool unserialize(ValueSerializer serializer, INPUT *fp) + { + num_deleted = 0; // since we got rid before writing + const bool result = table.unserialize(serializer, fp); + settings.reset_thresholds(bucket_count()); + return result; + } + +private: + + // Package templated functors with the other types to eliminate memory + // needed for storing these zero-size operators. Since ExtractKey and + // hasher's operator() might have the same function signature, they + // must be packaged in different classes. + // ------------------------------------------------------------------------- + struct Settings : + sparsehash_internal::sh_hashtable_settings + { + explicit Settings(const hasher& hf) + : sparsehash_internal::sh_hashtable_settings + (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} + }; + + // KeyInfo stores delete key and packages zero-size functors: + // ExtractKey and SetKey. + // --------------------------------------------------------- + class KeyInfo : public ExtractKey, public SetKey, public EqualKey + { + public: + KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) + : ExtractKey(ek), SetKey(sk), EqualKey(eq) + { + } + + // We want to return the exact same type as ExtractKey: Key or const Key& + typename ExtractKey::result_type get_key(const_reference v) const + { + return ExtractKey::operator()(v); + } + + bool equals(const key_type& a, const key_type& b) const + { + return EqualKey::operator()(a, b); + } + + typename spp_::remove_const::type delkey; + }; + + // Utility functions to access the templated operators + size_t hash(const key_type& v) const + { + return settings.hash(v); + } + + bool equals(const key_type& a, const key_type& b) const + { + return key_info.equals(a, b); + } + + typename ExtractKey::result_type get_key(const_reference v) const + { + return key_info.get_key(v); + } + +private: + // Actual data + // ----------- + Settings settings; + KeyInfo key_info; + size_type num_deleted; + Table table; // holds num_buckets and num_elements too +}; + + +// We need a global swap as well +// ----------------------------- +template +inline void swap(sparse_hashtable &x, + sparse_hashtable &y) +{ + x.swap(y); +} + +#undef JUMP_ + +// ----------------------------------------------------------------------------- +template +const typename sparse_hashtable::size_type +sparse_hashtable::ILLEGAL_BUCKET; + +// How full we let the table get before we resize. Knuth says .8 is +// good -- higher causes us to probe too much, though saves memory +// ----------------------------------------------------------------------------- +template +const int sparse_hashtable::HT_OCCUPANCY_PCT = 50; + +// How empty we let the table get before we resize lower. +// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing +// ----------------------------------------------------------------------------- +template +const int sparse_hashtable::HT_EMPTY_PCT += static_cast(0.4 * + sparse_hashtable::HT_OCCUPANCY_PCT); + + + + +// ---------------------------------------------------------------------- +// S P A R S E _ H A S H _ M A P +// ---------------------------------------------------------------------- +template , + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc > > +class sparse_hash_map +{ +private: + // Apparently select1st is not stl-standard, so we define our own + struct SelectKey + { + typedef const Key& result_type; + + inline const Key& operator()(const std::pair& p) const + { + return p.first; + } + }; + + struct SetKey + { + inline void operator()(std::pair* value, const Key& new_key) const + { + *const_cast(&value->first) = new_key; + } + }; + + // For operator[]. + struct DefaultValue + { + inline std::pair operator()(const Key& key) const + { + return std::make_pair(key, T()); + } + }; + + // The actual data + typedef sparse_hashtable::type, T>, Key, HashFcn, SelectKey, + SetKey, EqualKey, Alloc> ht; + +public: + typedef typename ht::key_type key_type; + typedef T data_type; + typedef T mapped_type; + typedef typename std::pair value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef Alloc allocator_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::pointer pointer; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::reference reference; + typedef typename ht::const_reference const_reference; + + typedef typename ht::iterator iterator; + typedef typename ht::const_iterator const_iterator; + typedef typename ht::local_iterator local_iterator; + typedef typename ht::const_local_iterator const_local_iterator; + + // Iterator functions + iterator begin() { return rep.begin(); } + iterator end() { return rep.end(); } + const_iterator begin() const { return rep.cbegin(); } + const_iterator end() const { return rep.cend(); } + const_iterator cbegin() const { return rep.cbegin(); } + const_iterator cend() const { return rep.cend(); } + + // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. + local_iterator begin(size_type i) { return rep.begin(i); } + local_iterator end(size_type i) { return rep.end(i); } + const_local_iterator begin(size_type i) const { return rep.begin(i); } + const_local_iterator end(size_type i) const { return rep.end(i); } + const_local_iterator cbegin(size_type i) const { return rep.cbegin(i); } + const_local_iterator cend(size_type i) const { return rep.cend(i); } + + // Accessor functions + // ------------------ + allocator_type get_allocator() const { return rep.get_allocator(); } + hasher hash_funct() const { return rep.hash_funct(); } + hasher hash_function() const { return hash_funct(); } + key_equal key_eq() const { return rep.key_eq(); } + + + // Constructors + // ------------ + explicit sparse_hash_map(size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(n, hf, eql, SelectKey(), SetKey(), alloc) + { + } + + explicit sparse_hash_map(const allocator_type& alloc) : + rep(0, hasher(), key_equal(), SelectKey(), SetKey(), alloc) + { + } + + sparse_hash_map(size_type n, const allocator_type& alloc) : + rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc) + { + } + + sparse_hash_map(size_type n, const hasher& hf, const allocator_type& alloc) : + rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc) + { + } + + template + sparse_hash_map(InputIterator f, InputIterator l, + size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(n, hf, eql, SelectKey(), SetKey(), alloc) + { + rep.insert(f, l); + } + + template + sparse_hash_map(InputIterator f, InputIterator l, + size_type n, const allocator_type& alloc) + : rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc) + { + rep.insert(f, l); + } + + template + sparse_hash_map(InputIterator f, InputIterator l, + size_type n, const hasher& hf, const allocator_type& alloc) + : rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc) + { + rep.insert(f, l); + } + + sparse_hash_map(const sparse_hash_map &o) : + rep(o.rep) + {} + + sparse_hash_map(const sparse_hash_map &o, + const allocator_type& alloc) : + rep(o.rep, alloc) + {} + +#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES) + sparse_hash_map(const sparse_hash_map &&o) : + rep(std::move(o.rep)) + {} + + sparse_hash_map(const sparse_hash_map &&o, + const allocator_type& alloc) : + rep(std::move(o.rep), alloc) + {} +#endif + +#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST) + sparse_hash_map(std::initializer_list init, + size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(n, hf, eql, SelectKey(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_map(std::initializer_list init, + size_type n, const allocator_type& alloc) : + rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_map(std::initializer_list init, + size_type n, const hasher& hf, const allocator_type& alloc) : + rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_map& operator=(std::initializer_list init) + { + rep.clear(); + rep.insert(init.begin(), init.end()); + return *this; + } + + void insert(std::initializer_list init) + { + rep.insert(init.begin(), init.end()); + } +#endif + + sparse_hash_map& operator=(const sparse_hash_map &o) + { + rep = o.rep; + return *this; + } + + void clear() { rep.clear(); } + void swap(sparse_hash_map& hs) { rep.swap(hs.rep); } + + // Functions concerning size + // ------------------------- + size_type size() const { return rep.size(); } + size_type max_size() const { return rep.max_size(); } + bool empty() const { return rep.empty(); } + size_type bucket_count() const { return rep.bucket_count(); } + size_type max_bucket_count() const { return rep.max_bucket_count(); } + + size_type bucket_size(size_type i) const { return rep.bucket_size(i); } + size_type bucket(const key_type& key) const { return rep.bucket(key); } + float load_factor() const { return size() * 1.0f / bucket_count(); } + + float max_load_factor() const { return rep.get_enlarge_factor(); } + void max_load_factor(float grow) { rep.set_enlarge_factor(grow); } + + float min_load_factor() const { return rep.get_shrink_factor(); } + void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); } + + void set_resizing_parameters(float shrink, float grow) + { + rep.set_resizing_parameters(shrink, grow); + } + + void resize(size_type cnt) { rep.resize(cnt); } + void rehash(size_type cnt) { resize(cnt); } // c++11 name + void reserve(size_type cnt) { resize(cnt); } // c++11 + + // Lookup + // ------ + iterator find(const key_type& key) { return rep.find(key); } + const_iterator find(const key_type& key) const { return rep.find(key); } + + mapped_type& operator[](const key_type& key) + { + return rep.template find_or_insert(key).second; + } + + size_type count(const key_type& key) const { return rep.count(key); } + + std::pair + equal_range(const key_type& key) { return rep.equal_range(key); } + + std::pair + equal_range(const key_type& key) const { return rep.equal_range(key); } + + mapped_type& at(const key_type& key) + { + iterator it = rep.find(key); + if (it == rep.end()) + throw_exception(std::out_of_range("at: key not present")); + return it->second; + } + + const mapped_type& at(const key_type& key) const + { + const_iterator it = rep.find(key); + if (it == rep.cend()) + throw_exception(std::out_of_range("at: key not present")); + return it->second; + } + + // Insert + // ------ + std::pair + insert(const value_type& obj) { return rep.insert(obj); } + + template + void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } + + void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } + + iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; } + iterator insert(const_iterator /*unused*/, const value_type& obj) { return insert(obj).first; } + + // Deleted key routines - just to keep google test framework happy + // we don't actually use the deleted key + // --------------------------------------------------------------- + void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } + void clear_deleted_key() { rep.clear_deleted_key(); } + key_type deleted_key() const { return rep.deleted_key(); } + + // Erase + // ----- + size_type erase(const key_type& key) { return rep.erase(key); } + iterator erase(iterator it) { return rep.erase(it); } + iterator erase(iterator f, iterator l) { return rep.erase(f, l); } + iterator erase(const_iterator it) { return rep.erase(it); } + iterator erase(const_iterator f, const_iterator l){ return rep.erase(f, l); } + + // Comparison + // ---------- + bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; } + bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; } + + + // I/O -- this is an add-on for writing metainformation to disk + // + // For maximum flexibility, this does not assume a particular + // file type (though it will probably be a FILE *). We just pass + // the fp through to rep. + + // If your keys and values are simple enough, you can pass this + // serializer to serialize()/unserialize(). "Simple enough" means + // value_type is a POD type that contains no pointers. Note, + // however, we don't try to normalize endianness. + // --------------------------------------------------------------- + typedef typename ht::NopointerSerializer NopointerSerializer; + + // serializer: a class providing operator()(OUTPUT*, const value_type&) + // (writing value_type to OUTPUT). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a + // pointer to a class providing size_t Write(const void*, size_t), + // which writes a buffer into a stream (which fp presumably + // owns) and returns the number of bytes successfully written. + // Note basic_ostream is not currently supported. + // --------------------------------------------------------------- + template + bool serialize(ValueSerializer serializer, OUTPUT* fp) + { + return rep.serialize(serializer, fp); + } + + // serializer: a functor providing operator()(INPUT*, value_type*) + // (reading from INPUT and into value_type). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a + // pointer to a class providing size_t Read(void*, size_t), + // which reads into a buffer from a stream (which fp presumably + // owns) and returns the number of bytes successfully read. + // Note basic_istream is not currently supported. + // NOTE: Since value_type is std::pair, ValueSerializer + // may need to do a const cast in order to fill in the key. + // NOTE: if Key or T are not POD types, the serializer MUST use + // placement-new to initialize their values, rather than a normal + // equals-assignment or similar. (The value_type* passed into the + // serializer points to garbage memory.) + // --------------------------------------------------------------- + template + bool unserialize(ValueSerializer serializer, INPUT* fp) + { + return rep.unserialize(serializer, fp); + } + + // The four methods below are DEPRECATED. + // Use serialize() and unserialize() for new code. + // ----------------------------------------------- + template + bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); } + + template + bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); } + + template + bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); } + + template + bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); } + + +private: + // The actual data + // --------------- + ht rep; +}; + +// We need a global swap as well +template +inline void swap(sparse_hash_map& hm1, + sparse_hash_map& hm2) +{ + hm1.swap(hm2); +} + +// ---------------------------------------------------------------------- +// S P A R S E _ H A S H _ S E T +// ---------------------------------------------------------------------- + +template , + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc > +class sparse_hash_set +{ +private: + // Apparently identity is not stl-standard, so we define our own + struct Identity + { + typedef const Value& result_type; + const Value& operator()(const Value& v) const { return v; } + }; + + struct SetKey + { + void operator()(Value* value, const Value& new_key) const + { + *value = new_key; + } + }; + + typedef sparse_hashtable ht; + +public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef Alloc allocator_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer pointer; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference reference; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator iterator; + typedef typename ht::const_iterator const_iterator; + typedef typename ht::const_local_iterator local_iterator; + typedef typename ht::const_local_iterator const_local_iterator; + + + // Iterator functions -- recall all iterators are const + iterator begin() const { return rep.begin(); } + iterator end() const { return rep.end(); } + const_iterator cbegin() const { return rep.cbegin(); } + const_iterator cend() const { return rep.cend(); } + + // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. + local_iterator begin(size_type i) const { return rep.begin(i); } + local_iterator end(size_type i) const { return rep.end(i); } + local_iterator cbegin(size_type i) const { return rep.cbegin(i); } + local_iterator cend(size_type i) const { return rep.cend(i); } + + + // Accessor functions + // ------------------ + allocator_type get_allocator() const { return rep.get_allocator(); } + hasher hash_funct() const { return rep.hash_funct(); } + hasher hash_function() const { return hash_funct(); } // tr1 name + key_equal key_eq() const { return rep.key_eq(); } + + + // Constructors + // ------------ + explicit sparse_hash_set(size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) : + rep(n, hf, eql, Identity(), SetKey(), alloc) + { + } + + explicit sparse_hash_set(const allocator_type& alloc) : + rep(0, hasher(), key_equal(), Identity(), SetKey(), alloc) + { + } + + sparse_hash_set(size_type n, const allocator_type& alloc) : + rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc) + { + } + + sparse_hash_set(size_type n, const hasher& hf, + const allocator_type& alloc) : + rep(n, hf, key_equal(), Identity(), SetKey(), alloc) + { + } + + template + sparse_hash_set(InputIterator f, InputIterator l, + size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(n, hf, eql, Identity(), SetKey(), alloc) + { + rep.insert(f, l); + } + + template + sparse_hash_set(InputIterator f, InputIterator l, + size_type n, const allocator_type& alloc) + : rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc) + { + rep.insert(f, l); + } + + template + sparse_hash_set(InputIterator f, InputIterator l, + size_type n, const hasher& hf, const allocator_type& alloc) + : rep(n, hf, key_equal(), Identity(), SetKey(), alloc) + { + rep.insert(f, l); + } + + sparse_hash_set(const sparse_hash_set &o) : + rep(o.rep) + {} + + sparse_hash_set(const sparse_hash_set &o, + const allocator_type& alloc) : + rep(o.rep, alloc) + {} + +#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES) + sparse_hash_set(const sparse_hash_set &&o) : + rep(std::move(o.rep)) + {} + + sparse_hash_set(const sparse_hash_set &&o, + const allocator_type& alloc) : + rep(std::move(o.rep), alloc) + {} +#endif + +#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST) + sparse_hash_set(std::initializer_list init, + size_type n = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) : + rep(n, hf, eql, Identity(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_set(std::initializer_list init, + size_type n, const allocator_type& alloc) : + rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_set(std::initializer_list init, + size_type n, const hasher& hf, + const allocator_type& alloc) : + rep(n, hf, key_equal(), Identity(), SetKey(), alloc) + { + rep.insert(init.begin(), init.end()); + } + + sparse_hash_set& operator=(std::initializer_list init) + { + rep.clear(); + rep.insert(init.begin(), init.end()); + return *this; + } + + void insert(std::initializer_list init) + { + rep.insert(init.begin(), init.end()); + } + +#endif + + sparse_hash_set& operator=(const sparse_hash_set &o) + { + rep = o.rep; + return *this; + } + + void clear() { rep.clear(); } + void swap(sparse_hash_set& hs) { rep.swap(hs.rep); } + + + // Functions concerning size + // ------------------------- + size_type size() const { return rep.size(); } + size_type max_size() const { return rep.max_size(); } + bool empty() const { return rep.empty(); } + size_type bucket_count() const { return rep.bucket_count(); } + size_type max_bucket_count() const { return rep.max_bucket_count(); } + + size_type bucket_size(size_type i) const { return rep.bucket_size(i); } + size_type bucket(const key_type& key) const { return rep.bucket(key); } + + float load_factor() const { return size() * 1.0f / bucket_count(); } + + float max_load_factor() const { return rep.get_enlarge_factor(); } + void max_load_factor(float grow) { rep.set_enlarge_factor(grow); } + + float min_load_factor() const { return rep.get_shrink_factor(); } + void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); } + + void set_resizing_parameters(float shrink, float grow) + { + rep.set_resizing_parameters(shrink, grow); + } + + void resize(size_type cnt) { rep.resize(cnt); } + void rehash(size_type cnt) { resize(cnt); } // c++11 name + void reserve(size_type cnt) { resize(cnt); } // c++11 + + // Lookup + // ------ + iterator find(const key_type& key) const { return rep.find(key); } + + size_type count(const key_type& key) const { return rep.count(key); } + + std::pair + equal_range(const key_type& key) const { return rep.equal_range(key); } + +#if 0 && !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES) + template + pair emplace(Args&&... args) + { + return rep.emplace_unique(std::forward(args)...); + } + + template + iterator emplace_hint(const_iterator p, Args&&... args) + { + return rep.emplace_unique(std::forward(args)...).first; + } +#endif + + // Insert + // ------ + std::pair insert(const value_type& obj) + { + std::pair p = rep.insert(obj); + return std::pair(p.first, p.second); // const to non-const + } + + template + void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } + + void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } + + iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; } + + // Deleted key - do nothing - just to keep google test framework happy + // ------------------------------------------------------------------- + void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } + void clear_deleted_key() { rep.clear_deleted_key(); } + key_type deleted_key() const { return rep.deleted_key(); } + + // Erase + // ----- + size_type erase(const key_type& key) { return rep.erase(key); } + iterator erase(iterator it) { return rep.erase(it); } + iterator erase(iterator f, iterator l) { return rep.erase(f, l); } + + // Comparison + // ---------- + bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; } + bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; } + + + // I/O -- this is an add-on for writing metainformation to disk + // + // For maximum flexibility, this does not assume a particular + // file type (though it will probably be a FILE *). We just pass + // the fp through to rep. + + // If your keys and values are simple enough, you can pass this + // serializer to serialize()/unserialize(). "Simple enough" means + // value_type is a POD type that contains no pointers. Note, + // however, we don't try to normalize endianness. + // --------------------------------------------------------------- + typedef typename ht::NopointerSerializer NopointerSerializer; + + // serializer: a class providing operator()(OUTPUT*, const value_type&) + // (writing value_type to OUTPUT). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a + // pointer to a class providing size_t Write(const void*, size_t), + // which writes a buffer into a stream (which fp presumably + // owns) and returns the number of bytes successfully written. + // Note basic_ostream is not currently supported. + // --------------------------------------------------------------- + template + bool serialize(ValueSerializer serializer, OUTPUT* fp) + { + return rep.serialize(serializer, fp); + } + + // serializer: a functor providing operator()(INPUT*, value_type*) + // (reading from INPUT and into value_type). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a + // pointer to a class providing size_t Read(void*, size_t), + // which reads into a buffer from a stream (which fp presumably + // owns) and returns the number of bytes successfully read. + // Note basic_istream is not currently supported. + // NOTE: Since value_type is const Key, ValueSerializer + // may need to do a const cast in order to fill in the key. + // NOTE: if Key is not a POD type, the serializer MUST use + // placement-new to initialize its value, rather than a normal + // equals-assignment or similar. (The value_type* passed into + // the serializer points to garbage memory.) + // --------------------------------------------------------------- + template + bool unserialize(ValueSerializer serializer, INPUT* fp) + { + return rep.unserialize(serializer, fp); + } + + // The four methods below are DEPRECATED. + // Use serialize() and unserialize() for new code. + // ----------------------------------------------- + template + bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); } + + template + bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); } + + template + bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); } + + template + bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); } + +private: + // The actual data + // --------------- + ht rep; +}; + +template +inline void swap(sparse_hash_set& hs1, + sparse_hash_set& hs2) +{ + hs1.swap(hs2); +} + + +SPP_END_NAMESPACE + +#endif // sparsepp_h_guard_ diff --git a/resources/3rdparty/sparsepp/spp_test.cc b/resources/3rdparty/sparsepp/spp_test.cc new file mode 100644 index 000000000..281db9154 --- /dev/null +++ b/resources/3rdparty/sparsepp/spp_test.cc @@ -0,0 +1,2923 @@ +// ---------------------------------------------------------------------- +// Copyright (c) 2016, Steven Gregory Popovitch - greg7mdp@gmail.com +// All rights reserved. +// +// This work is derived from Google's sparsehash library +// (see https://github.com/sparsehash/sparsehash) whose copyright appears +// below this one. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * The name of Steven Gregory Popovitch may not be used to +// endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ---------------------------------------------------------------------- + +// ---------------------------------------------------------------------- +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ---------------------------------------------------------------------- + +#ifdef _MSC_VER + #pragma warning( disable : 4820 ) // '6' bytes padding added after data member... + #pragma warning( disable : 4710 ) // function not inlined + #pragma warning( disable : 4514 ) // unreferenced inline function has been removed + #pragma warning( disable : 4996 ) // 'fopen': This function or variable may be unsafe +#endif + +#include "sparsepp.h" + +#ifdef _MSC_VER + #pragma warning( disable : 4127 ) // conditional expression is constant + #pragma warning(push, 0) +#endif + + +#include +#include // for size_t +#include +#include +#include +#include +#include +#include +#include // for class typeinfo (returned by typeid) +#include +#include // for length_error + +namespace sparsehash_internal = SPP_NAMESPACE::sparsehash_internal; +using SPP_NAMESPACE::sparsetable; +using SPP_NAMESPACE::sparse_hashtable; +using SPP_NAMESPACE::sparse_hash_map; +using SPP_NAMESPACE::sparse_hash_set; + + + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +#ifndef _MSC_VER // windows defines its own version + #define _strdup strdup + #ifdef __MINGW32__ // mingw has trouble writing to /tmp + static std::string TmpFile(const char* basename) + { + return std::string("./#") + basename; + } + #endif +#else + #pragma warning(disable : 4996) + #define snprintf sprintf_s + #define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ + #include + std::string TmpFile(const char* basename) + { + char tmppath_buffer[1024]; + int tmppath_len = GetTempPathA(sizeof(tmppath_buffer), tmppath_buffer); + if (tmppath_len <= 0 || tmppath_len >= sizeof(tmppath_buffer)) + return basename; // an error, so just bail on tmppath + + sprintf_s(tmppath_buffer + tmppath_len, 1024 - tmppath_len, "\\%s", basename); + return tmppath_buffer; + } +#endif + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + + +// --------------------------------------------------------------------- +// This is the "default" interface, which just passes everything +// through to the underlying hashtable. You'll need to subclass it to +// specialize behavior for an individual hashtable. +// --------------------------------------------------------------------- +template +class BaseHashtableInterface +{ +public: + virtual ~BaseHashtableInterface() {} + + typedef typename HT::key_type key_type; + typedef typename HT::value_type value_type; + typedef typename HT::hasher hasher; + typedef typename HT::key_equal key_equal; + typedef typename HT::allocator_type allocator_type; + + typedef typename HT::size_type size_type; + typedef typename HT::difference_type difference_type; + typedef typename HT::pointer pointer; + typedef typename HT::const_pointer const_pointer; + typedef typename HT::reference reference; + typedef typename HT::const_reference const_reference; + + class const_iterator; + + class iterator : public HT::iterator + { + public: + iterator() : parent_(NULL) { } // this allows code like "iterator it;" + iterator(typename HT::iterator it, const BaseHashtableInterface* parent) + : HT::iterator(it), parent_(parent) { } + key_type key() { return parent_->it_to_key(*this); } + + private: + friend class BaseHashtableInterface::const_iterator; // for its ctor + const BaseHashtableInterface* parent_; + }; + + class const_iterator : public HT::const_iterator + { + public: + const_iterator() : parent_(NULL) { } + const_iterator(typename HT::const_iterator it, + const BaseHashtableInterface* parent) + : HT::const_iterator(it), parent_(parent) { } + + const_iterator(typename HT::iterator it, + BaseHashtableInterface* parent) + : HT::const_iterator(it), parent_(parent) { } + + // The parameter type here *should* just be "iterator", but MSVC + // gets confused by that, so I'm overly specific. + const_iterator(typename BaseHashtableInterface::iterator it) + : HT::const_iterator(it), parent_(it.parent_) { } + + key_type key() { return parent_->it_to_key(*this); } + + private: + const BaseHashtableInterface* parent_; + }; + + class const_local_iterator; + + class local_iterator : public HT::local_iterator + { + public: + local_iterator() : parent_(NULL) { } + local_iterator(typename HT::local_iterator it, + const BaseHashtableInterface* parent) + : HT::local_iterator(it), parent_(parent) { } + key_type key() { return parent_->it_to_key(*this); } + + private: + friend class BaseHashtableInterface::const_local_iterator; // for its ctor + const BaseHashtableInterface* parent_; + }; + + class const_local_iterator : public HT::const_local_iterator + { + public: + const_local_iterator() : parent_(NULL) { } + const_local_iterator(typename HT::const_local_iterator it, + const BaseHashtableInterface* parent) + : HT::const_local_iterator(it), parent_(parent) { } + const_local_iterator(typename HT::local_iterator it, + BaseHashtableInterface* parent) + : HT::const_local_iterator(it), parent_(parent) { } + const_local_iterator(local_iterator it) + : HT::const_local_iterator(it), parent_(it.parent_) { } + key_type key() { return parent_->it_to_key(*this); } + + private: + const BaseHashtableInterface* parent_; + }; + + iterator begin() { return iterator(ht_.begin(), this); } + iterator end() { return iterator(ht_.end(), this); } + const_iterator begin() const { return const_iterator(ht_.begin(), this); } + const_iterator end() const { return const_iterator(ht_.end(), this); } + local_iterator begin(size_type i) { return local_iterator(ht_.begin(i), this); } + local_iterator end(size_type i) { return local_iterator(ht_.end(i), this); } + const_local_iterator begin(size_type i) const { return const_local_iterator(ht_.begin(i), this); } + const_local_iterator end(size_type i) const { return const_local_iterator(ht_.end(i), this); } + + hasher hash_funct() const { return ht_.hash_funct(); } + hasher hash_function() const { return ht_.hash_function(); } + key_equal key_eq() const { return ht_.key_eq(); } + allocator_type get_allocator() const { return ht_.get_allocator(); } + + BaseHashtableInterface(size_type expected_max_items_in_table, + const hasher& hf, + const key_equal& eql, + const allocator_type& alloc) + : ht_(expected_max_items_in_table, hf, eql, alloc) { } + + // Not all ht_'s support this constructor: you should only call it + // from a subclass if you know your ht supports it. Otherwise call + // the previous constructor, followed by 'insert(f, l);'. + template + BaseHashtableInterface(InputIterator f, InputIterator l, + size_type expected_max_items_in_table, + const hasher& hf, + const key_equal& eql, + const allocator_type& alloc) + : ht_(f, l, expected_max_items_in_table, hf, eql, alloc) { + } + + // This is the version of the constructor used by dense_*, which + // requires an empty key in the constructor. + template + BaseHashtableInterface(InputIterator f, InputIterator l, key_type empty_k, + size_type expected_max_items_in_table, + const hasher& hf, + const key_equal& eql, + const allocator_type& alloc) + : ht_(f, l, empty_k, expected_max_items_in_table, hf, eql, alloc) { + } + + // This is the constructor appropriate for {dense,sparse}hashtable. + template + BaseHashtableInterface(size_type expected_max_items_in_table, + const hasher& hf, + const key_equal& eql, + const ExtractKey& ek, + const SetKey& sk, + const allocator_type& alloc) + : ht_(expected_max_items_in_table, hf, eql, ek, sk, alloc) { } + + + void clear() { ht_.clear(); } + void swap(BaseHashtableInterface& other) { ht_.swap(other.ht_); } + + // Only part of the API for some hashtable implementations. + void clear_no_resize() { clear(); } + + size_type size() const { return ht_.size(); } + size_type max_size() const { return ht_.max_size(); } + bool empty() const { return ht_.empty(); } + size_type bucket_count() const { return ht_.bucket_count(); } + size_type max_bucket_count() const { return ht_.max_bucket_count(); } + + size_type bucket_size(size_type i) const { + return ht_.bucket_size(i); + } + size_type bucket(const key_type& key) const { + return ht_.bucket(key); + } + + float load_factor() const { return ht_.load_factor(); } + float max_load_factor() const { return ht_.max_load_factor(); } + void max_load_factor(float grow) { ht_.max_load_factor(grow); } + float min_load_factor() const { return ht_.min_load_factor(); } + void min_load_factor(float shrink) { ht_.min_load_factor(shrink); } + void set_resizing_parameters(float shrink, float grow) { + ht_.set_resizing_parameters(shrink, grow); + } + + void resize(size_type hint) { ht_.resize(hint); } + void rehash(size_type hint) { ht_.rehash(hint); } + + iterator find(const key_type& key) { + return iterator(ht_.find(key), this); + } + + const_iterator find(const key_type& key) const { + return const_iterator(ht_.find(key), this); + } + + // Rather than try to implement operator[], which doesn't make much + // sense for set types, we implement two methods: bracket_equal and + // bracket_assign. By default, bracket_equal(a, b) returns true if + // ht[a] == b, and false otherwise. (Note that this follows + // operator[] semantics exactly, including inserting a if it's not + // already in the hashtable, before doing the equality test.) For + // sets, which have no operator[], b is ignored, and bracket_equal + // returns true if key is in the set and false otherwise. + // bracket_assign(a, b) is equivalent to ht[a] = b. For sets, b is + // ignored, and bracket_assign is equivalent to ht.insert(a). + template + bool bracket_equal(const key_type& key, const AssignValue& expected) { + return ht_[key] == expected; + } + + template + void bracket_assign(const key_type& key, const AssignValue& value) { + ht_[key] = value; + } + + size_type count(const key_type& key) const { return ht_.count(key); } + + std::pair equal_range(const key_type& key) + { + std::pair r + = ht_.equal_range(key); + return std::pair(iterator(r.first, this), + iterator(r.second, this)); + } + std::pair equal_range(const key_type& key) const + { + std::pair r + = ht_.equal_range(key); + return std::pair( + const_iterator(r.first, this), const_iterator(r.second, this)); + } + + const_iterator random_element(class ACMRandom* r) const { + return const_iterator(ht_.random_element(r), this); + } + + iterator random_element(class ACMRandom* r) { + return iterator(ht_.random_element(r), this); + } + + std::pair insert(const value_type& obj) { + std::pair r = ht_.insert(obj); + return std::pair(iterator(r.first, this), r.second); + } + template + void insert(InputIterator f, InputIterator l) { + ht_.insert(f, l); + } + void insert(typename HT::const_iterator f, typename HT::const_iterator l) { + ht_.insert(f, l); + } + iterator insert(typename HT::iterator, const value_type& obj) { + return iterator(insert(obj).first, this); + } + + // These will commonly need to be overridden by the child. + void set_empty_key(const key_type& k) { ht_.set_empty_key(k); } + void clear_empty_key() { ht_.clear_empty_key(); } + key_type empty_key() const { return ht_.empty_key(); } + + void set_deleted_key(const key_type& k) { ht_.set_deleted_key(k); } + void clear_deleted_key() { ht_.clear_deleted_key(); } + key_type deleted_key() const { return ht_.deleted_key(); } + + size_type erase(const key_type& key) { return ht_.erase(key); } + void erase(typename HT::iterator it) { ht_.erase(it); } + void erase(typename HT::iterator f, typename HT::iterator l) { + ht_.erase(f, l); + } + + bool operator==(const BaseHashtableInterface& other) const { + return ht_ == other.ht_; + } + bool operator!=(const BaseHashtableInterface& other) const { + return ht_ != other.ht_; + } + + template + bool serialize(ValueSerializer serializer, OUTPUT *fp) { + return ht_.serialize(serializer, fp); + } + template + bool unserialize(ValueSerializer serializer, INPUT *fp) { + return ht_.unserialize(serializer, fp); + } + + template + bool write_metadata(OUTPUT *fp) { + return ht_.write_metadata(fp); + } + template + bool read_metadata(INPUT *fp) { + return ht_.read_metadata(fp); + } + template + bool write_nopointer_data(OUTPUT *fp) { + return ht_.write_nopointer_data(fp); + } + template + bool read_nopointer_data(INPUT *fp) { + return ht_.read_nopointer_data(fp); + } + + // low-level stats + int num_table_copies() const { return (int)ht_.num_table_copies(); } + + // Not part of the hashtable API, but is provided to make testing easier. + virtual key_type get_key(const value_type& value) const = 0; + // All subclasses should define get_data(value_type) as well. I don't + // provide an abstract-virtual definition here, because the return type + // differs between subclasses (not all subclasses define data_type). + //virtual data_type get_data(const value_type& value) const = 0; + //virtual data_type default_data() const = 0; + + // These allow introspection into the interface. "Supports" means + // that the implementation of this functionality isn't a noop. + virtual bool supports_clear_no_resize() const = 0; + virtual bool supports_empty_key() const = 0; + virtual bool supports_deleted_key() const = 0; + virtual bool supports_brackets() const = 0; // has a 'real' operator[] + virtual bool supports_readwrite() const = 0; + virtual bool supports_num_table_copies() const = 0; + virtual bool supports_serialization() const = 0; + +protected: + HT ht_; + + // These are what subclasses have to define to get class-specific behavior + virtual key_type it_to_key(const iterator& it) const = 0; + virtual key_type it_to_key(const const_iterator& it) const = 0; + virtual key_type it_to_key(const local_iterator& it) const = 0; + virtual key_type it_to_key(const const_local_iterator& it) const = 0; +}; + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template , + class EqualKey = std::equal_to, + class Alloc = spp::libc_allocator_with_realloc > > +class HashtableInterface_SparseHashMap + : public BaseHashtableInterface< sparse_hash_map > +{ +private: + typedef sparse_hash_map ht; + typedef BaseHashtableInterface p; // parent + +public: + explicit HashtableInterface_SparseHashMap( + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(expected_max_items, hf, eql, alloc) { } + + template + HashtableInterface_SparseHashMap( + InputIterator f, InputIterator l, + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(f, l, expected_max_items, hf, eql, alloc) { } + + typename p::key_type get_key(const typename p::value_type& value) const { + return value.first; + } + typename ht::data_type get_data(const typename p::value_type& value) const { + return value.second; + } + typename ht::data_type default_data() const { + return typename ht::data_type(); + } + + bool supports_clear_no_resize() const { return false; } + bool supports_empty_key() const { return false; } + bool supports_deleted_key() const { return false; } + bool supports_brackets() const { return true; } + bool supports_readwrite() const { return true; } + bool supports_num_table_copies() const { return false; } + bool supports_serialization() const { return true; } + + void set_empty_key(const typename p::key_type&) { } + void clear_empty_key() { } + typename p::key_type empty_key() const { return typename p::key_type(); } + + int num_table_copies() const { return 0; } + + typedef typename ht::NopointerSerializer NopointerSerializer; + +protected: + template + friend void swap(HashtableInterface_SparseHashMap& a, + HashtableInterface_SparseHashMap& b); + + typename p::key_type it_to_key(const typename p::iterator& it) const { + return it->first; + } + typename p::key_type it_to_key(const typename p::const_iterator& it) const { + return it->first; + } + typename p::key_type it_to_key(const typename p::local_iterator& it) const { + return it->first; + } + typename p::key_type it_to_key(const typename p::const_local_iterator& it) const { + return it->first; + } +}; + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template +void swap(HashtableInterface_SparseHashMap& a, + HashtableInterface_SparseHashMap& b) +{ + swap(a.ht_, b.ht_); +} + + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template , + class EqualKey = std::equal_to, + class Alloc = spp::libc_allocator_with_realloc > +class HashtableInterface_SparseHashSet + : public BaseHashtableInterface< sparse_hash_set > +{ +private: + typedef sparse_hash_set ht; + typedef BaseHashtableInterface p; // parent + +public: + explicit HashtableInterface_SparseHashSet( + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(expected_max_items, hf, eql, alloc) { } + + template + HashtableInterface_SparseHashSet( + InputIterator f, InputIterator l, + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(f, l, expected_max_items, hf, eql, alloc) { } + + template + bool bracket_equal(const typename p::key_type& key, const AssignValue&) { + return this->ht_.find(key) != this->ht_.end(); + } + + template + void bracket_assign(const typename p::key_type& key, const AssignValue&) { + this->ht_.insert(key); + } + + typename p::key_type get_key(const typename p::value_type& value) const { + return value; + } + // For sets, the only 'data' is that an item is actually inserted. + bool get_data(const typename p::value_type&) const { + return true; + } + bool default_data() const { + return true; + } + + bool supports_clear_no_resize() const { return false; } + bool supports_empty_key() const { return false; } + bool supports_deleted_key() const { return false; } + bool supports_brackets() const { return false; } + bool supports_readwrite() const { return true; } + bool supports_num_table_copies() const { return false; } + bool supports_serialization() const { return true; } + + void set_empty_key(const typename p::key_type&) { } + void clear_empty_key() { } + typename p::key_type empty_key() const { return typename p::key_type(); } + + int num_table_copies() const { return 0; } + + typedef typename ht::NopointerSerializer NopointerSerializer; + +protected: + template + friend void swap(HashtableInterface_SparseHashSet& a, + HashtableInterface_SparseHashSet& b); + + typename p::key_type it_to_key(const typename p::iterator& it) const { + return *it; + } + typename p::key_type it_to_key(const typename p::const_iterator& it) const { + return *it; + } + typename p::key_type it_to_key(const typename p::local_iterator& it) const { + return *it; + } + typename p::key_type it_to_key(const typename p::const_local_iterator& it) + const { + return *it; + } +}; + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template +void swap(HashtableInterface_SparseHashSet& a, + HashtableInterface_SparseHashSet& b) +{ + swap(a.ht_, b.ht_); +} + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template +class HashtableInterface_SparseHashtable + : public BaseHashtableInterface< sparse_hashtable > +{ +private: + typedef sparse_hashtable ht; + typedef BaseHashtableInterface p; // parent + +public: + explicit HashtableInterface_SparseHashtable( + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(expected_max_items, hf, eql, + ExtractKey(), SetKey(), alloc) { } + + template + HashtableInterface_SparseHashtable( + InputIterator f, InputIterator l, + typename p::size_type expected_max_items = 0, + const typename p::hasher& hf = typename p::hasher(), + const typename p::key_equal& eql = typename p::key_equal(), + const typename p::allocator_type& alloc = typename p::allocator_type()) + : BaseHashtableInterface(expected_max_items, hf, eql, + ExtractKey(), SetKey(), alloc) { + this->insert(f, l); + } + + float max_load_factor() const { + float shrink, grow; + this->ht_.get_resizing_parameters(&shrink, &grow); + return grow; + } + void max_load_factor(float new_grow) { + float shrink, grow; + this->ht_.get_resizing_parameters(&shrink, &grow); + this->ht_.set_resizing_parameters(shrink, new_grow); + } + float min_load_factor() const { + float shrink, grow; + this->ht_.get_resizing_parameters(&shrink, &grow); + return shrink; + } + void min_load_factor(float new_shrink) { + float shrink, grow; + this->ht_.get_resizing_parameters(&shrink, &grow); + this->ht_.set_resizing_parameters(new_shrink, grow); + } + + template + bool bracket_equal(const typename p::key_type&, const AssignValue&) { + return false; + } + + template + void bracket_assign(const typename p::key_type&, const AssignValue&) { + } + + typename p::key_type get_key(const typename p::value_type& value) const { + return extract_key(value); + } + typename p::value_type get_data(const typename p::value_type& value) const { + return value; + } + typename p::value_type default_data() const { + return typename p::value_type(); + } + + bool supports_clear_no_resize() const { return false; } + bool supports_empty_key() const { return false; } + bool supports_deleted_key() const { return false; } + bool supports_brackets() const { return false; } + bool supports_readwrite() const { return true; } + bool supports_num_table_copies() const { return true; } + bool supports_serialization() const { return true; } + + void set_empty_key(const typename p::key_type&) { } + void clear_empty_key() { } + typename p::key_type empty_key() const { return typename p::key_type(); } + + // These tr1 names aren't defined for sparse_hashtable. + typename p::hasher hash_function() { return this->hash_funct(); } + void rehash(typename p::size_type hint) { this->resize(hint); } + + // TODO(csilvers): also support/test destructive_begin()/destructive_end()? + + typedef typename ht::NopointerSerializer NopointerSerializer; + +protected: + template + friend void swap( + HashtableInterface_SparseHashtable& a, + HashtableInterface_SparseHashtable& b); + + typename p::key_type it_to_key(const typename p::iterator& it) const { + return extract_key(*it); + } + typename p::key_type it_to_key(const typename p::const_iterator& it) const { + return extract_key(*it); + } + typename p::key_type it_to_key(const typename p::local_iterator& it) const { + return extract_key(*it); + } + typename p::key_type it_to_key(const typename p::const_local_iterator& it) + const { + return extract_key(*it); + } + +private: + ExtractKey extract_key; +}; + +// --------------------------------------------------------------------- +// --------------------------------------------------------------------- +template +void swap(HashtableInterface_SparseHashtable& a, + HashtableInterface_SparseHashtable& b) { + swap(a.ht_, b.ht_); +} + +void EXPECT_TRUE(bool cond) +{ + if (!cond) + { + ::fputs("Test failed:\n", stderr); + ::exit(1); + } +} + +SPP_START_NAMESPACE + + +namespace testing +{ + +#define EXPECT_FALSE(a) EXPECT_TRUE(!(a)) +#define EXPECT_EQ(a, b) EXPECT_TRUE((a) == (b)) +#define EXPECT_NE(a, b) EXPECT_TRUE((a) != (b)) +#define EXPECT_LT(a, b) EXPECT_TRUE((a) < (b)) +#define EXPECT_GT(a, b) EXPECT_TRUE((a) > (b)) +#define EXPECT_LE(a, b) EXPECT_TRUE((a) <= (b)) +#define EXPECT_GE(a, b) EXPECT_TRUE((a) >= (b)) + +#define EXPECT_DEATH(cmd, expected_error_string) \ + try { \ + cmd; \ + EXPECT_FALSE("did not see expected error: " #expected_error_string); \ + } catch (const std::length_error&) { \ + /* Good, the cmd failed. */ \ + } + +#define TEST(suitename, testname) \ + class TEST_##suitename##_##testname { \ + public: \ + TEST_##suitename##_##testname() { \ + ::fputs("Running " #suitename "." #testname "\n", stderr); \ + Run(); \ + } \ + void Run(); \ + }; \ + static TEST_##suitename##_##testname \ + test_instance_##suitename##_##testname; \ + void TEST_##suitename##_##testname::Run() + + +template +struct TypeList3 +{ + typedef C1 type1; + typedef C2 type2; + typedef C3 type3; +}; + +// I need to list 9 types here, for code below to compile, though +// only the first 3 are ever used. +#define TYPED_TEST_CASE_3(classname, typelist) \ + typedef typelist::type1 classname##_type1; \ + typedef typelist::type2 classname##_type2; \ + typedef typelist::type3 classname##_type3; \ + SPP_ATTRIBUTE_UNUSED static const int classname##_numtypes = 3; \ + typedef typelist::type1 classname##_type4; \ + typedef typelist::type1 classname##_type5; \ + typedef typelist::type1 classname##_type6; \ + typedef typelist::type1 classname##_type7; \ + typedef typelist::type1 classname##_type8; \ + typedef typelist::type1 classname##_type9; + +template +struct TypeList9 +{ + typedef C1 type1; + typedef C2 type2; + typedef C3 type3; + typedef C4 type4; + typedef C5 type5; + typedef C6 type6; + typedef C7 type7; + typedef C8 type8; + typedef C9 type9; +}; + +#define TYPED_TEST_CASE_9(classname, typelist) \ + typedef typelist::type1 classname##_type1; \ + typedef typelist::type2 classname##_type2; \ + typedef typelist::type3 classname##_type3; \ + typedef typelist::type4 classname##_type4; \ + typedef typelist::type5 classname##_type5; \ + typedef typelist::type6 classname##_type6; \ + typedef typelist::type7 classname##_type7; \ + typedef typelist::type8 classname##_type8; \ + typedef typelist::type9 classname##_type9; \ + static const int classname##_numtypes = 9; + +#define TYPED_TEST(superclass, testname) \ + template \ + class TEST_onetype_##superclass##_##testname : \ + public superclass { \ + public: \ + TEST_onetype_##superclass##_##testname() { \ + Run(); \ + } \ + private: \ + void Run(); \ + }; \ + class TEST_typed_##superclass##_##testname { \ + public: \ + explicit TEST_typed_##superclass##_##testname() { \ + if (superclass##_numtypes >= 1) { \ + ::fputs("Running " #superclass "." #testname ".1\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 2) { \ + ::fputs("Running " #superclass "." #testname ".2\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 3) { \ + ::fputs("Running " #superclass "." #testname ".3\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 4) { \ + ::fputs("Running " #superclass "." #testname ".4\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 5) { \ + ::fputs("Running " #superclass "." #testname ".5\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 6) { \ + ::fputs("Running " #superclass "." #testname ".6\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 7) { \ + ::fputs("Running " #superclass "." #testname ".7\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 8) { \ + ::fputs("Running " #superclass "." #testname ".8\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + if (superclass##_numtypes >= 9) { \ + ::fputs("Running " #superclass "." #testname ".9\n", stderr); \ + TEST_onetype_##superclass##_##testname t; \ + } \ + } \ + }; \ + static TEST_typed_##superclass##_##testname \ + test_instance_typed_##superclass##_##testname; \ + template \ + void TEST_onetype_##superclass##_##testname::Run() + +// This is a dummy class just to make converting from internal-google +// to opensourcing easier. +class Test { }; + +} // namespace testing + +SPP_END_NAMESPACE + + +namespace testing = SPP_NAMESPACE::testing; + +using std::cout; +using std::pair; +using std::set; +using std::string; +using std::vector; + +typedef unsigned char uint8; + +#ifdef _MSC_VER +// Below, we purposefully test having a very small allocator size. +// This causes some "type conversion too small" errors when using this +// allocator with sparsetable buckets. We're testing to make sure we +// handle that situation ok, so we don't need the compiler warnings. +#pragma warning(disable:4244) +#define ATTRIBUTE_UNUSED +#else +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#endif + +namespace { + +#ifndef _MSC_VER // windows defines its own version +# ifdef __MINGW32__ // mingw has trouble writing to /tmp +static string TmpFile(const char* basename) { + return string("./#") + basename; +} +# else +static string TmpFile(const char* basename) { + string kTmpdir = "/tmp"; + return kTmpdir + "/" + basename; +} +# endif +#endif + +// Used as a value in some of the hashtable tests. It's just some +// arbitrary user-defined type with non-trivial memory management. +// --------------------------------------------------------------- +struct ValueType +{ +public: + ValueType() : s_(kDefault) { } + ValueType(const char* init_s) : s_(kDefault) { set_s(init_s); } + ~ValueType() { set_s(NULL); } + ValueType(const ValueType& that) : s_(kDefault) { operator=(that); } + void operator=(const ValueType& that) { set_s(that.s_); } + bool operator==(const ValueType& that) const { + return strcmp(this->s(), that.s()) == 0; + } + void set_s(const char* new_s) { + if (s_ != kDefault) + free(const_cast(s_)); + s_ = (new_s == NULL ? kDefault : reinterpret_cast(_strdup(new_s))); + } + const char* s() const { return s_; } +private: + const char* s_; + static const char* const kDefault; +}; + +const char* const ValueType::kDefault = "hi"; + +// This is used by the low-level sparse/dense_hashtable classes, +// which support the most general relationship between keys and +// values: the key is derived from the value through some arbitrary +// function. (For classes like sparse_hash_map, the 'value' is a +// key/data pair, and the function to derive the key is +// FirstElementOfPair.) KeyToValue is the inverse of this function, +// so GetKey(KeyToValue(key)) == key. To keep the tests a bit +// simpler, we've chosen to make the key and value actually be the +// same type, which is why we need only one template argument for the +// types, rather than two (one for the key and one for the value). +template +struct SetKey +{ + void operator()(KeyAndValueT* value, const KeyAndValueT& new_key) const + { + *value = KeyToValue()(new_key); + } +}; + +// A hash function that keeps track of how often it's called. We use +// a simple djb-hash so we don't depend on how STL hashes. We use +// this same method to do the key-comparison, so we can keep track +// of comparison-counts too. +struct Hasher +{ + explicit Hasher(int i=0) : id_(i), num_hashes_(0), num_compares_(0) { } + int id() const { return id_; } + int num_hashes() const { return num_hashes_; } + int num_compares() const { return num_compares_; } + + size_t operator()(int a) const { + num_hashes_++; + return static_cast(a); + } + size_t operator()(const char* a) const { + num_hashes_++; + size_t hash = 0; + for (size_t i = 0; a[i]; i++ ) + hash = 33 * hash + a[i]; + return hash; + } + size_t operator()(const string& a) const { + num_hashes_++; + size_t hash = 0; + for (size_t i = 0; i < a.length(); i++ ) + hash = 33 * hash + a[i]; + return hash; + } + size_t operator()(const int* a) const { + num_hashes_++; + return static_cast(reinterpret_cast(a)); + } + bool operator()(int a, int b) const { + num_compares_++; + return a == b; + } + bool operator()(const string& a, const string& b) const { + num_compares_++; + return a == b; + } + bool operator()(const char* a, const char* b) const { + num_compares_++; + // The 'a == b' test is necessary, in case a and b are both NULL. + return (a == b || (a && b && strcmp(a, b) == 0)); + } + +private: + mutable int id_; + mutable int num_hashes_; + mutable int num_compares_; +}; + +// Allocator that allows controlling its size in various ways, to test +// allocator overflow. Because we use this allocator in a vector, we +// need to define != and swap for gcc. +// ------------------------------------------------------------------ +template(~0)> +struct Alloc +{ + typedef T value_type; + typedef SizeT size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + explicit Alloc(int i=0, int* count=NULL) : id_(i), count_(count) {} + ~Alloc() {} + pointer address(reference r) const { return &r; } + const_pointer address(const_reference r) const { return &r; } + pointer allocate(size_type n, const_pointer = 0) { + if (count_) ++(*count_); + return static_cast(malloc(n * sizeof(value_type))); + } + void deallocate(pointer p, size_type) { + free(p); + } + pointer reallocate(pointer p, size_type n) { + if (count_) ++(*count_); + return static_cast(realloc(p, n * sizeof(value_type))); + } + size_type max_size() const { + return static_cast(MAX_SIZE); + } + void construct(pointer p, const value_type& val) { + new(p) value_type(val); + } + void destroy(pointer p) { p->~value_type(); } + + bool is_custom_alloc() const { return true; } + + template + Alloc(const Alloc& that) + : id_(that.id_), count_(that.count_) { + } + + template + struct rebind { + typedef Alloc other; + }; + + bool operator==(const Alloc& that) const { + return this->id_ == that.id_ && this->count_ == that.count_; + } + bool operator!=(const Alloc& that) const { + return !this->operator==(that); + } + + int id() const { return id_; } + + // I have to make these public so the constructor used for rebinding + // can see them. Normally, I'd just make them private and say: + // template friend struct Alloc; + // but MSVC 7.1 barfs on that. So public it is. But no peeking! +public: + int id_; + int* count_; +}; + + +// Below are a few fun routines that convert a value into a key, used +// for dense_hashtable and sparse_hashtable. It's our responsibility +// to make sure, when we insert values into these objects, that the +// values match the keys we insert them under. To allow us to use +// these routines for SetKey as well, we require all these functions +// be their own inverse: f(f(x)) == x. +template +struct Negation { + typedef Value result_type; + Value operator()(Value& v) { return -v; } + const Value operator()(const Value& v) const { return -v; } +}; + +struct Capital +{ + typedef string result_type; + string operator()(string& s) { + return string(1, s[0] ^ 32) + s.substr(1); + } + const string operator()(const string& s) const { + return string(1, s[0] ^ 32) + s.substr(1); + } +}; + +struct Identity +{ // lame, I know, but an important case to test. + typedef const char* result_type; + const char* operator()(const char* s) const { + return s; + } +}; + +// This is just to avoid memory leaks -- it's a global pointer to +// all the memory allocated by UniqueObjectHelper. We'll use it +// to semi-test sparsetable as well. :-) +sparsetable g_unique_charstar_objects(16); + +// This is an object-generator: pass in an index, and it will return a +// unique object of type ItemType. We provide specializations for the +// types we actually support. +template ItemType UniqueObjectHelper(int index); +template<> int UniqueObjectHelper(int index) +{ + return index; +} +template<> string UniqueObjectHelper(int index) +{ + char buffer[64]; + snprintf(buffer, sizeof(buffer), "%d", index); + return buffer; +} +template<> char* UniqueObjectHelper(int index) +{ + // First grow the table if need be. + sparsetable::size_type table_size = g_unique_charstar_objects.size(); + while (index >= static_cast(table_size)) { + assert(table_size * 2 > table_size); // avoid overflow problems + table_size *= 2; + } + if (table_size > g_unique_charstar_objects.size()) + g_unique_charstar_objects.resize(table_size); + + if (!g_unique_charstar_objects.test((size_t)index)) { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "%d", index); + g_unique_charstar_objects[(size_t)index] = _strdup(buffer); + } + return g_unique_charstar_objects.get((size_t)index); +} +template<> const char* UniqueObjectHelper(int index) { + return UniqueObjectHelper(index); +} +template<> ValueType UniqueObjectHelper(int index) { + return ValueType(UniqueObjectHelper(index).c_str()); +} +template<> pair UniqueObjectHelper(int index) { + return pair(index, index + 1); +} +template<> pair UniqueObjectHelper(int index) +{ + return pair( + UniqueObjectHelper(index), UniqueObjectHelper(index + 1)); +} +template<> pair UniqueObjectHelper(int index) +{ + return pair( + UniqueObjectHelper(index), UniqueObjectHelper(index+1)); +} + +class ValueSerializer +{ +public: + bool operator()(FILE* fp, const int& value) { + return fwrite(&value, sizeof(value), 1, fp) == 1; + } + bool operator()(FILE* fp, int* value) { + return fread(value, sizeof(*value), 1, fp) == 1; + } + bool operator()(FILE* fp, const string& value) { + const size_t size = value.size(); + return (*this)(fp, (int)size) && fwrite(value.c_str(), size, 1, fp) == 1; + } + bool operator()(FILE* fp, string* value) { + int size; + if (!(*this)(fp, &size)) return false; + char* buf = new char[(size_t)size]; + if (fread(buf, (size_t)size, 1, fp) != 1) { + delete[] buf; + return false; + } + new (value) string(buf, (size_t)size); + delete[] buf; + return true; + } + template + bool operator()(OUTPUT* fp, const ValueType& v) { + return (*this)(fp, string(v.s())); + } + template + bool operator()(INPUT* fp, ValueType* v) { + string data; + if (!(*this)(fp, &data)) return false; + new(v) ValueType(data.c_str()); + return true; + } + template + bool operator()(OUTPUT* fp, const char* const& value) { + // Just store the index. + return (*this)(fp, atoi(value)); + } + template + bool operator()(INPUT* fp, const char** value) { + // Look up via index. + int index; + if (!(*this)(fp, &index)) return false; + *value = UniqueObjectHelper(index); + return true; + } + template + bool operator()(OUTPUT* fp, std::pair* value) { + return (*this)(fp, const_cast(&value->first)) + && (*this)(fp, &value->second); + } + template + bool operator()(INPUT* fp, const std::pair& value) { + return (*this)(fp, value.first) && (*this)(fp, value.second); + } +}; + +template +class HashtableTest : public ::testing::Test +{ +public: + HashtableTest() : ht_() { } + // Give syntactically-prettier access to UniqueObjectHelper. + typename HashtableType::value_type UniqueObject(int index) { + return UniqueObjectHelper(index); + } + typename HashtableType::key_type UniqueKey(int index) { + return this->ht_.get_key(this->UniqueObject(index)); + } +protected: + HashtableType ht_; +}; + +} + +// These are used to specify the empty key and deleted key in some +// contexts. They can't be in the unnamed namespace, or static, +// because the template code requires external linkage. +extern const string kEmptyString("--empty string--"); +extern const string kDeletedString("--deleted string--"); +extern const int kEmptyInt = 0; +extern const int kDeletedInt = -1234676543; // an unlikely-to-pick int +extern const char* const kEmptyCharStar = "--empty char*--"; +extern const char* const kDeletedCharStar = "--deleted char*--"; + +namespace { + +#define INT_HASHTABLES \ + HashtableInterface_SparseHashMap >, \ + HashtableInterface_SparseHashSet >, \ + /* This is a table where the key associated with a value is -value */ \ + HashtableInterface_SparseHashtable, \ + SetKey >, \ + Hasher, Alloc > + +#define STRING_HASHTABLES \ + HashtableInterface_SparseHashMap >, \ + HashtableInterface_SparseHashSet >, \ + /* This is a table where the key associated with a value is Cap(value) */ \ + HashtableInterface_SparseHashtable, \ + Hasher, Alloc > + +// --------------------------------------------------------------------- +// I'd like to use ValueType keys for SparseHashtable<> and +// DenseHashtable<> but I can't due to memory-management woes (nobody +// really owns the char* involved). So instead I do something simpler. +// --------------------------------------------------------------------- +#define CHARSTAR_HASHTABLES \ + HashtableInterface_SparseHashMap >, \ + HashtableInterface_SparseHashSet >, \ + HashtableInterface_SparseHashtable, \ + Hasher, Alloc > + +// --------------------------------------------------------------------- +// This is the list of types we run each test against. +// We need to define the same class 4 times due to limitations in the +// testing framework. Basically, we associate each class below with +// the set of types we want to run tests on it with. +// --------------------------------------------------------------------- +template class HashtableIntTest + : public HashtableTest { }; + +template class HashtableStringTest + : public HashtableTest { }; + +template class HashtableCharStarTest + : public HashtableTest { }; + +template class HashtableAllTest + : public HashtableTest { }; + +typedef testing::TypeList3 IntHashtables; +typedef testing::TypeList3 StringHashtables; +typedef testing::TypeList3 CharStarHashtables; +typedef testing::TypeList9 AllHashtables; + +TYPED_TEST_CASE_3(HashtableIntTest, IntHashtables); +TYPED_TEST_CASE_3(HashtableStringTest, StringHashtables); +TYPED_TEST_CASE_3(HashtableCharStarTest, CharStarHashtables); +TYPED_TEST_CASE_9(HashtableAllTest, AllHashtables); + +// ------------------------------------------------------------------------ +// First, some testing of the underlying infrastructure. + +#if 0 + +TEST(HashtableCommonTest, HashMunging) +{ + const Hasher hasher; + + // We don't munge the hash value on non-pointer template types. + { + const sparsehash_internal::sh_hashtable_settings + settings(hasher, 0.0, 0.0); + const int v = 1000; + EXPECT_EQ(hasher(v), settings.hash(v)); + } + + { + // We do munge the hash value on pointer template types. + const sparsehash_internal::sh_hashtable_settings + settings(hasher, 0.0, 0.0); + int* v = NULL; + v += 0x10000; // get a non-trivial pointer value + EXPECT_NE(hasher(v), settings.hash(v)); + } + { + const sparsehash_internal::sh_hashtable_settings + settings(hasher, 0.0, 0.0); + const int* v = NULL; + v += 0x10000; // get a non-trivial pointer value + EXPECT_NE(hasher(v), settings.hash(v)); + } +} + +#endif + +// ------------------------------------------------------------------------ +// If the first arg to TYPED_TEST is HashtableIntTest, it will run +// this test on all the hashtable types, with key=int and value=int. +// Likewise, HashtableStringTest will have string key/values, and +// HashtableCharStarTest will have char* keys and -- just to mix it up +// a little -- ValueType values. HashtableAllTest will run all three +// key/value types on all 6 hashtables types, for 9 test-runs total +// per test. +// +// In addition, TYPED_TEST makes available the magic keyword +// TypeParam, which is the type being used for the current test. + +// This first set of tests just tests the public API, going through +// the public typedefs and methods in turn. It goes approximately +// in the definition-order in sparse_hash_map.h. +// ------------------------------------------------------------------------ +TYPED_TEST(HashtableIntTest, Typedefs) +{ + // Make sure all the standard STL-y typedefs are defined. The exact + // key/value types don't matter here, so we only bother testing on + // the int tables. This is just a compile-time "test"; nothing here + // can fail at runtime. + this->ht_.set_deleted_key(-2); // just so deleted_key succeeds + typename TypeParam::key_type kt; + typename TypeParam::value_type vt; + typename TypeParam::hasher h; + typename TypeParam::key_equal ke; + typename TypeParam::allocator_type at; + + typename TypeParam::size_type st; + typename TypeParam::difference_type dt; + typename TypeParam::pointer p; + typename TypeParam::const_pointer cp; + // I can't declare variables of reference-type, since I have nothing + // to point them to, so I just make sure that these types exist. + ATTRIBUTE_UNUSED typedef typename TypeParam::reference r; + ATTRIBUTE_UNUSED typedef typename TypeParam::const_reference cf; + + typename TypeParam::iterator i; + typename TypeParam::const_iterator ci; + typename TypeParam::local_iterator li; + typename TypeParam::const_local_iterator cli; + + // Now make sure the variables are used, so the compiler doesn't + // complain. Where possible, I "use" the variable by calling the + // method that's supposed to return the unique instance of the + // relevant type (eg. get_allocator()). Otherwise, I try to call a + // different, arbitrary function that returns the type. Sometimes + // the type isn't used at all, and there's no good way to use the + // variable. + kt = this->ht_.deleted_key(); + (void)vt; // value_type may not be copyable. Easiest not to try. + h = this->ht_.hash_funct(); + ke = this->ht_.key_eq(); + at = this->ht_.get_allocator(); + st = this->ht_.size(); + (void)dt; + (void)p; + (void)cp; + i = this->ht_.begin(); + ci = this->ht_.begin(); + li = this->ht_.begin(0); + cli = this->ht_.begin(0); +} + +TYPED_TEST(HashtableAllTest, NormalIterators) +{ + EXPECT_TRUE(this->ht_.begin() == this->ht_.end()); + this->ht_.insert(this->UniqueObject(1)); + { + typename TypeParam::iterator it = this->ht_.begin(); + EXPECT_TRUE(it != this->ht_.end()); + ++it; + EXPECT_TRUE(it == this->ht_.end()); + } +} + +TEST(HashtableTest, ModifyViaIterator) +{ + // This only works for hash-maps, since only they have non-const values. + { + sparse_hash_map ht; + ht[1] = 2; + sparse_hash_map::iterator it = ht.find(1); + EXPECT_TRUE(it != ht.end()); + EXPECT_EQ(1, it->first); + EXPECT_EQ(2, it->second); + it->second = 5; + it = ht.find(1); + EXPECT_TRUE(it != ht.end()); + EXPECT_EQ(5, it->second); + } +} + +TYPED_TEST(HashtableAllTest, ConstIterators) +{ + this->ht_.insert(this->UniqueObject(1)); + typename TypeParam::const_iterator it = this->ht_.begin(); + EXPECT_TRUE(it != (typename TypeParam::const_iterator)this->ht_.end()); + ++it; + EXPECT_TRUE(it == (typename TypeParam::const_iterator)this->ht_.end()); +} + +TYPED_TEST(HashtableAllTest, LocalIterators) +{ + // Now, tr1 begin/end (the local iterator that takes a bucket-number). + // ht::bucket() returns the bucket that this key would be inserted in. + this->ht_.insert(this->UniqueObject(1)); + const typename TypeParam::size_type bucknum = + this->ht_.bucket(this->UniqueKey(1)); + typename TypeParam::local_iterator b = this->ht_.begin(bucknum); + typename TypeParam::local_iterator e = this->ht_.end(bucknum); + EXPECT_TRUE(b != e); + b++; + EXPECT_TRUE(b == e); + + // Check an empty bucket. We can just xor the bottom bit and be sure + // of getting a legal bucket, since #buckets is always a power of 2. + EXPECT_TRUE(this->ht_.begin(bucknum ^ 1) == this->ht_.end(bucknum ^ 1)); + // Another test, this time making sure we're using the right types. + typename TypeParam::local_iterator b2 = this->ht_.begin(bucknum ^ 1); + typename TypeParam::local_iterator e2 = this->ht_.end(bucknum ^ 1); + EXPECT_TRUE(b2 == e2); +} + +TYPED_TEST(HashtableAllTest, ConstLocalIterators) +{ + this->ht_.insert(this->UniqueObject(1)); + const typename TypeParam::size_type bucknum = + this->ht_.bucket(this->UniqueKey(1)); + typename TypeParam::const_local_iterator b = this->ht_.begin(bucknum); + typename TypeParam::const_local_iterator e = this->ht_.end(bucknum); + EXPECT_TRUE(b != e); + b++; + EXPECT_TRUE(b == e); + typename TypeParam::const_local_iterator b2 = this->ht_.begin(bucknum ^ 1); + typename TypeParam::const_local_iterator e2 = this->ht_.end(bucknum ^ 1); + EXPECT_TRUE(b2 == e2); +} + +TYPED_TEST(HashtableAllTest, Iterating) +{ + // Test a bit more iterating than just one ++. + this->ht_.insert(this->UniqueObject(1)); + this->ht_.insert(this->UniqueObject(11)); + this->ht_.insert(this->UniqueObject(111)); + this->ht_.insert(this->UniqueObject(1111)); + this->ht_.insert(this->UniqueObject(11111)); + this->ht_.insert(this->UniqueObject(111111)); + this->ht_.insert(this->UniqueObject(1111111)); + this->ht_.insert(this->UniqueObject(11111111)); + this->ht_.insert(this->UniqueObject(111111111)); + typename TypeParam::iterator it = this->ht_.begin(); + for (int i = 1; i <= 9; i++) { // start at 1 so i is never 0 + // && here makes it easier to tell what loop iteration the test failed on. + EXPECT_TRUE(i && (it++ != this->ht_.end())); + } + EXPECT_TRUE(it == this->ht_.end()); +} + +TYPED_TEST(HashtableIntTest, Constructors) +{ + // The key/value types don't matter here, so I just test on one set + // of tables, the ones with int keys, which can easily handle the + // placement-news we have to do below. + Hasher hasher(1); // 1 is a unique id + int alloc_count = 0; + Alloc alloc(2, &alloc_count); + + TypeParam ht_noarg; + TypeParam ht_onearg(100); + TypeParam ht_twoarg(100, hasher); + TypeParam ht_threearg(100, hasher, hasher); // hasher serves as key_equal too + TypeParam ht_fourarg(100, hasher, hasher, alloc); + + // The allocator should have been called at most once, for the last ht. + EXPECT_GE(1, alloc_count); + int old_alloc_count = alloc_count; + + const typename TypeParam::value_type input[] = { + this->UniqueObject(1), + this->UniqueObject(2), + this->UniqueObject(4), + this->UniqueObject(8) + }; + const int num_inputs = sizeof(input) / sizeof(input[0]); + const typename TypeParam::value_type *begin = &input[0]; + const typename TypeParam::value_type *end = begin + num_inputs; + TypeParam ht_iter_noarg(begin, end); + TypeParam ht_iter_onearg(begin, end, 100); + TypeParam ht_iter_twoarg(begin, end, 100, hasher); + TypeParam ht_iter_threearg(begin, end, 100, hasher, hasher); + TypeParam ht_iter_fourarg(begin, end, 100, hasher, hasher, alloc); + // Now the allocator should have been called more. + EXPECT_GT(alloc_count, old_alloc_count); + old_alloc_count = alloc_count; + + // Let's do a lot more inserting and make sure the alloc-count goes up + for (int i = 2; i < 2000; i++) + ht_fourarg.insert(this->UniqueObject(i)); + EXPECT_GT(alloc_count, old_alloc_count); + + EXPECT_LT(ht_noarg.bucket_count(), 100u); + EXPECT_GE(ht_onearg.bucket_count(), 100u); + EXPECT_GE(ht_twoarg.bucket_count(), 100u); + EXPECT_GE(ht_threearg.bucket_count(), 100u); + EXPECT_GE(ht_fourarg.bucket_count(), 100u); + EXPECT_GE(ht_iter_onearg.bucket_count(), 100u); + + // When we pass in a hasher -- it can serve both as the hash-function + // and the key-equal function -- its id should be 1. Where we don't + // pass it in and use the default Hasher object, the id should be 0. + EXPECT_EQ(0, ht_noarg.hash_funct().id()); + EXPECT_EQ(0, ht_noarg.key_eq().id()); + EXPECT_EQ(0, ht_onearg.hash_funct().id()); + EXPECT_EQ(0, ht_onearg.key_eq().id()); + EXPECT_EQ(1, ht_twoarg.hash_funct().id()); + EXPECT_EQ(0, ht_twoarg.key_eq().id()); + EXPECT_EQ(1, ht_threearg.hash_funct().id()); + EXPECT_EQ(1, ht_threearg.key_eq().id()); + + EXPECT_EQ(0, ht_iter_noarg.hash_funct().id()); + EXPECT_EQ(0, ht_iter_noarg.key_eq().id()); + EXPECT_EQ(0, ht_iter_onearg.hash_funct().id()); + EXPECT_EQ(0, ht_iter_onearg.key_eq().id()); + EXPECT_EQ(1, ht_iter_twoarg.hash_funct().id()); + EXPECT_EQ(0, ht_iter_twoarg.key_eq().id()); + EXPECT_EQ(1, ht_iter_threearg.hash_funct().id()); + EXPECT_EQ(1, ht_iter_threearg.key_eq().id()); + + // Likewise for the allocator + EXPECT_EQ(0, ht_threearg.get_allocator().id()); + EXPECT_EQ(0, ht_iter_threearg.get_allocator().id()); + EXPECT_EQ(2, ht_fourarg.get_allocator().id()); + EXPECT_EQ(2, ht_iter_fourarg.get_allocator().id()); +} + +TYPED_TEST(HashtableAllTest, OperatorEquals) +{ + { + TypeParam ht1, ht2; + ht1.set_deleted_key(this->UniqueKey(1)); + ht2.set_deleted_key(this->UniqueKey(2)); + + ht1.insert(this->UniqueObject(10)); + ht2.insert(this->UniqueObject(20)); + EXPECT_FALSE(ht1 == ht2); + ht1 = ht2; + EXPECT_TRUE(ht1 == ht2); + } + { + TypeParam ht1, ht2; + ht1.insert(this->UniqueObject(30)); + ht1 = ht2; + EXPECT_EQ(0u, ht1.size()); + } + { + TypeParam ht1, ht2; + ht1.set_deleted_key(this->UniqueKey(1)); + ht2.insert(this->UniqueObject(1)); // has same key as ht1.delkey + ht1 = ht2; // should reset deleted-key to 'unset' + EXPECT_EQ(1u, ht1.size()); + EXPECT_EQ(1u, ht1.count(this->UniqueKey(1))); + } +} + +TYPED_TEST(HashtableAllTest, Clear) +{ + for (int i = 1; i < 200; i++) { + this->ht_.insert(this->UniqueObject(i)); + } + this->ht_.clear(); + EXPECT_EQ(0u, this->ht_.size()); + // TODO(csilvers): do we want to enforce that the hashtable has or + // has not shrunk? It does for dense_* but not sparse_*. +} + +TYPED_TEST(HashtableAllTest, ClearNoResize) +{ + if (!this->ht_.supports_clear_no_resize()) + return; + typename TypeParam::size_type empty_bucket_count = this->ht_.bucket_count(); + int last_element = 1; + while (this->ht_.bucket_count() == empty_bucket_count) { + this->ht_.insert(this->UniqueObject(last_element)); + ++last_element; + } + typename TypeParam::size_type last_bucket_count = this->ht_.bucket_count(); + this->ht_.clear_no_resize(); + EXPECT_EQ(last_bucket_count, this->ht_.bucket_count()); + EXPECT_TRUE(this->ht_.empty()); + + // When inserting the same number of elements again, no resize + // should be necessary. + for (int i = 1; i < last_element; ++i) { + this->ht_.insert(this->UniqueObject(last_element + i)); + EXPECT_EQ(last_bucket_count, this->ht_.bucket_count()); + } +} + +TYPED_TEST(HashtableAllTest, Swap) +{ + // Let's make a second hashtable with its own hasher, key_equal, etc. + Hasher hasher(1); // 1 is a unique id + TypeParam other_ht(200, hasher, hasher); + + this->ht_.set_deleted_key(this->UniqueKey(1)); + other_ht.set_deleted_key(this->UniqueKey(2)); + + for (int i = 3; i < 2000; i++) { + this->ht_.insert(this->UniqueObject(i)); + } + this->ht_.erase(this->UniqueKey(1000)); + other_ht.insert(this->UniqueObject(2001)); + typename TypeParam::size_type expected_buckets = other_ht.bucket_count(); + + this->ht_.swap(other_ht); + + EXPECT_EQ(this->UniqueKey(2), this->ht_.deleted_key()); + EXPECT_EQ(this->UniqueKey(1), other_ht.deleted_key()); + + EXPECT_EQ(1, this->ht_.hash_funct().id()); + EXPECT_EQ(0, other_ht.hash_funct().id()); + + EXPECT_EQ(1, this->ht_.key_eq().id()); + EXPECT_EQ(0, other_ht.key_eq().id()); + + EXPECT_EQ(expected_buckets, this->ht_.bucket_count()); + EXPECT_GT(other_ht.bucket_count(), 200u); + + EXPECT_EQ(1u, this->ht_.size()); + EXPECT_EQ(1996u, other_ht.size()); // because we erased 1000 + + EXPECT_EQ(0u, this->ht_.count(this->UniqueKey(111))); + EXPECT_EQ(1u, other_ht.count(this->UniqueKey(111))); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(2001))); + EXPECT_EQ(0u, other_ht.count(this->UniqueKey(2001))); + EXPECT_EQ(0u, this->ht_.count(this->UniqueKey(1000))); + EXPECT_EQ(0u, other_ht.count(this->UniqueKey(1000))); + + // We purposefully don't swap allocs -- they're not necessarily swappable. + + // Now swap back, using the free-function swap + // NOTE: MSVC seems to have trouble with this free swap, not quite + // sure why. I've given up trying to fix it though. +#ifdef _MSC_VER + other_ht.swap(this->ht_); +#else + std::swap(this->ht_, other_ht); +#endif + + EXPECT_EQ(this->UniqueKey(1), this->ht_.deleted_key()); + EXPECT_EQ(this->UniqueKey(2), other_ht.deleted_key()); + EXPECT_EQ(0, this->ht_.hash_funct().id()); + EXPECT_EQ(1, other_ht.hash_funct().id()); + EXPECT_EQ(1996u, this->ht_.size()); + EXPECT_EQ(1u, other_ht.size()); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(111))); + EXPECT_EQ(0u, other_ht.count(this->UniqueKey(111))); + + // A user reported a crash with this code using swap to clear. + // We've since fixed the bug; this prevents a regression. + TypeParam swap_to_clear_ht; + swap_to_clear_ht.set_deleted_key(this->UniqueKey(1)); + for (int i = 2; i < 10000; ++i) { + swap_to_clear_ht.insert(this->UniqueObject(i)); + } + TypeParam empty_ht; + empty_ht.swap(swap_to_clear_ht); + swap_to_clear_ht.set_deleted_key(this->UniqueKey(1)); + for (int i = 2; i < 10000; ++i) { + swap_to_clear_ht.insert(this->UniqueObject(i)); + } +} + +TYPED_TEST(HashtableAllTest, Size) +{ + EXPECT_EQ(0u, this->ht_.size()); + for (int i = 1; i < 1000; i++) { // go through some resizes + this->ht_.insert(this->UniqueObject(i)); + EXPECT_EQ(static_cast(i), this->ht_.size()); + } + this->ht_.clear(); + EXPECT_EQ(0u, this->ht_.size()); + + this->ht_.set_deleted_key(this->UniqueKey(1)); + EXPECT_EQ(0u, this->ht_.size()); // deleted key doesn't count + for (int i = 2; i < 1000; i++) { // go through some resizes + this->ht_.insert(this->UniqueObject(i)); + this->ht_.erase(this->UniqueKey(i)); + EXPECT_EQ(0u, this->ht_.size()); + } +} + +TEST(HashtableTest, MaxSizeAndMaxBucketCount) +{ + // The max size depends on the allocator. So we can't use the + // built-in allocator type; instead, we make our own types. + sparse_hash_set > ht_default; + sparse_hash_set > ht_char; + sparse_hash_set > ht_104; + + EXPECT_GE(ht_default.max_size(), 256u); + EXPECT_EQ(255u, ht_char.max_size()); + EXPECT_EQ(104u, ht_104.max_size()); + + // In our implementations, MaxBucketCount == MaxSize. + EXPECT_EQ(ht_default.max_size(), ht_default.max_bucket_count()); + EXPECT_EQ(ht_char.max_size(), ht_char.max_bucket_count()); + EXPECT_EQ(ht_104.max_size(), ht_104.max_bucket_count()); +} + +TYPED_TEST(HashtableAllTest, Empty) +{ + EXPECT_TRUE(this->ht_.empty()); + + this->ht_.insert(this->UniqueObject(1)); + EXPECT_FALSE(this->ht_.empty()); + + this->ht_.clear(); + EXPECT_TRUE(this->ht_.empty()); + + TypeParam empty_ht; + this->ht_.insert(this->UniqueObject(1)); + this->ht_.swap(empty_ht); + EXPECT_TRUE(this->ht_.empty()); +} + +TYPED_TEST(HashtableAllTest, BucketCount) +{ + TypeParam ht(100); + // constructor arg is number of *items* to be inserted, not the + // number of buckets, so we expect more buckets. + EXPECT_GT(ht.bucket_count(), 100u); + for (int i = 1; i < 200; i++) { + ht.insert(this->UniqueObject(i)); + } + EXPECT_GT(ht.bucket_count(), 200u); +} + +TYPED_TEST(HashtableAllTest, BucketAndBucketSize) +{ + const typename TypeParam::size_type expected_bucknum = this->ht_.bucket( + this->UniqueKey(1)); + EXPECT_EQ(0u, this->ht_.bucket_size(expected_bucknum)); + + this->ht_.insert(this->UniqueObject(1)); + EXPECT_EQ(expected_bucknum, this->ht_.bucket(this->UniqueKey(1))); + EXPECT_EQ(1u, this->ht_.bucket_size(expected_bucknum)); + + // Check that a bucket we didn't insert into, has a 0 size. Since + // we have an even number of buckets, bucknum^1 is guaranteed in range. + EXPECT_EQ(0u, this->ht_.bucket_size(expected_bucknum ^ 1)); +} + +TYPED_TEST(HashtableAllTest, LoadFactor) +{ + const typename TypeParam::size_type kSize = 16536; + // Check growing past various thresholds and then shrinking below + // them. + for (float grow_threshold = 0.2f; + grow_threshold <= 0.8f; + grow_threshold += 0.2f) + { + TypeParam ht; + ht.set_deleted_key(this->UniqueKey(1)); + ht.max_load_factor(grow_threshold); + ht.min_load_factor(0.0); + EXPECT_EQ(grow_threshold, ht.max_load_factor()); + EXPECT_EQ(0.0, ht.min_load_factor()); + + ht.resize(kSize); + size_t bucket_count = ht.bucket_count(); + // Erase and insert an element to set consider_shrink = true, + // which should not cause a shrink because the threshold is 0.0. + ht.insert(this->UniqueObject(2)); + ht.erase(this->UniqueKey(2)); + for (int i = 2;; ++i) + { + ht.insert(this->UniqueObject(i)); + if (static_cast(ht.size())/bucket_count < grow_threshold) { + EXPECT_EQ(bucket_count, ht.bucket_count()); + } else { + EXPECT_GT(ht.bucket_count(), bucket_count); + break; + } + } + // Now set a shrink threshold 1% below the current size and remove + // items until the size falls below that. + const float shrink_threshold = static_cast(ht.size()) / + ht.bucket_count() - 0.01f; + + // This time around, check the old set_resizing_parameters interface. + ht.set_resizing_parameters(shrink_threshold, 1.0); + EXPECT_EQ(1.0, ht.max_load_factor()); + EXPECT_EQ(shrink_threshold, ht.min_load_factor()); + + bucket_count = ht.bucket_count(); + for (int i = 2;; ++i) + { + ht.erase(this->UniqueKey(i)); + // A resize is only triggered by an insert, so add and remove a + // value every iteration to trigger the shrink as soon as the + // threshold is passed. + ht.erase(this->UniqueKey(i+1)); + ht.insert(this->UniqueObject(i+1)); + if (static_cast(ht.size())/bucket_count > shrink_threshold) { + EXPECT_EQ(bucket_count, ht.bucket_count()); + } else { + EXPECT_LT(ht.bucket_count(), bucket_count); + break; + } + } + } +} + +TYPED_TEST(HashtableAllTest, ResizeAndRehash) +{ + // resize() and rehash() are synonyms. rehash() is the tr1 name. + TypeParam ht(10000); + ht.max_load_factor(0.8f); // for consistency's sake + + for (int i = 1; i < 100; ++i) + ht.insert(this->UniqueObject(i)); + ht.resize(0); + // Now ht should be as small as possible. + EXPECT_LT(ht.bucket_count(), 300u); + + ht.rehash(9000); // use the 'rehash' version of the name. + // Bucket count should be next power of 2, after considering max_load_factor. + EXPECT_EQ(16384u, ht.bucket_count()); + for (int i = 101; i < 200; ++i) + ht.insert(this->UniqueObject(i)); + // Adding a few hundred buckets shouldn't have caused a resize yet. + EXPECT_EQ(ht.bucket_count(), 16384u); +} + +TYPED_TEST(HashtableAllTest, FindAndCountAndEqualRange) +{ + pair eq_pair; + pair const_eq_pair; + + EXPECT_TRUE(this->ht_.empty()); + EXPECT_TRUE(this->ht_.find(this->UniqueKey(1)) == this->ht_.end()); + EXPECT_EQ(0u, this->ht_.count(this->UniqueKey(1))); + eq_pair = this->ht_.equal_range(this->UniqueKey(1)); + EXPECT_TRUE(eq_pair.first == eq_pair.second); + + this->ht_.insert(this->UniqueObject(1)); + EXPECT_FALSE(this->ht_.empty()); + this->ht_.insert(this->UniqueObject(11)); + this->ht_.insert(this->UniqueObject(111)); + this->ht_.insert(this->UniqueObject(1111)); + this->ht_.insert(this->UniqueObject(11111)); + this->ht_.insert(this->UniqueObject(111111)); + this->ht_.insert(this->UniqueObject(1111111)); + this->ht_.insert(this->UniqueObject(11111111)); + this->ht_.insert(this->UniqueObject(111111111)); + EXPECT_EQ(9u, this->ht_.size()); + typename TypeParam::const_iterator it = this->ht_.find(this->UniqueKey(1)); + EXPECT_EQ(it.key(), this->UniqueKey(1)); + + // Allow testing the const version of the methods as well. + const TypeParam ht = this->ht_; + + // Some successful lookups (via find, count, and equal_range). + EXPECT_TRUE(this->ht_.find(this->UniqueKey(1)) != this->ht_.end()); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(1))); + eq_pair = this->ht_.equal_range(this->UniqueKey(1)); + EXPECT_TRUE(eq_pair.first != eq_pair.second); + EXPECT_EQ(eq_pair.first.key(), this->UniqueKey(1)); + ++eq_pair.first; + EXPECT_TRUE(eq_pair.first == eq_pair.second); + + EXPECT_TRUE(ht.find(this->UniqueKey(1)) != ht.end()); + EXPECT_EQ(1u, ht.count(this->UniqueKey(1))); + const_eq_pair = ht.equal_range(this->UniqueKey(1)); + EXPECT_TRUE(const_eq_pair.first != const_eq_pair.second); + EXPECT_EQ(const_eq_pair.first.key(), this->UniqueKey(1)); + ++const_eq_pair.first; + EXPECT_TRUE(const_eq_pair.first == const_eq_pair.second); + + EXPECT_TRUE(this->ht_.find(this->UniqueKey(11111)) != this->ht_.end()); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(11111))); + eq_pair = this->ht_.equal_range(this->UniqueKey(11111)); + EXPECT_TRUE(eq_pair.first != eq_pair.second); + EXPECT_EQ(eq_pair.first.key(), this->UniqueKey(11111)); + ++eq_pair.first; + EXPECT_TRUE(eq_pair.first == eq_pair.second); + + EXPECT_TRUE(ht.find(this->UniqueKey(11111)) != ht.end()); + EXPECT_EQ(1u, ht.count(this->UniqueKey(11111))); + const_eq_pair = ht.equal_range(this->UniqueKey(11111)); + EXPECT_TRUE(const_eq_pair.first != const_eq_pair.second); + EXPECT_EQ(const_eq_pair.first.key(), this->UniqueKey(11111)); + ++const_eq_pair.first; + EXPECT_TRUE(const_eq_pair.first == const_eq_pair.second); + + // Some unsuccessful lookups (via find, count, and equal_range). + EXPECT_TRUE(this->ht_.find(this->UniqueKey(11112)) == this->ht_.end()); + EXPECT_EQ(0u, this->ht_.count(this->UniqueKey(11112))); + eq_pair = this->ht_.equal_range(this->UniqueKey(11112)); + EXPECT_TRUE(eq_pair.first == eq_pair.second); + + EXPECT_TRUE(ht.find(this->UniqueKey(11112)) == ht.end()); + EXPECT_EQ(0u, ht.count(this->UniqueKey(11112))); + const_eq_pair = ht.equal_range(this->UniqueKey(11112)); + EXPECT_TRUE(const_eq_pair.first == const_eq_pair.second); + + EXPECT_TRUE(this->ht_.find(this->UniqueKey(11110)) == this->ht_.end()); + EXPECT_EQ(0u, this->ht_.count(this->UniqueKey(11110))); + eq_pair = this->ht_.equal_range(this->UniqueKey(11110)); + EXPECT_TRUE(eq_pair.first == eq_pair.second); + + EXPECT_TRUE(ht.find(this->UniqueKey(11110)) == ht.end()); + EXPECT_EQ(0u, ht.count(this->UniqueKey(11110))); + const_eq_pair = ht.equal_range(this->UniqueKey(11110)); + EXPECT_TRUE(const_eq_pair.first == const_eq_pair.second); +} + +TYPED_TEST(HashtableAllTest, BracketInsert) +{ + // tests operator[], for those types that support it. + if (!this->ht_.supports_brackets()) + return; + + // bracket_equal is equivalent to ht_[a] == b. It should insert a if + // it doesn't already exist. + EXPECT_TRUE(this->ht_.bracket_equal(this->UniqueKey(1), + this->ht_.default_data())); + EXPECT_TRUE(this->ht_.find(this->UniqueKey(1)) != this->ht_.end()); + + // bracket_assign is equivalent to ht_[a] = b. + this->ht_.bracket_assign(this->UniqueKey(2), + this->ht_.get_data(this->UniqueObject(4))); + EXPECT_TRUE(this->ht_.find(this->UniqueKey(2)) != this->ht_.end()); + EXPECT_TRUE(this->ht_.bracket_equal( + this->UniqueKey(2), this->ht_.get_data(this->UniqueObject(4)))); + + this->ht_.bracket_assign( + this->UniqueKey(2), this->ht_.get_data(this->UniqueObject(6))); + EXPECT_TRUE(this->ht_.bracket_equal( + this->UniqueKey(2), this->ht_.get_data(this->UniqueObject(6)))); + // bracket_equal shouldn't have modified the value. + EXPECT_TRUE(this->ht_.bracket_equal( + this->UniqueKey(2), this->ht_.get_data(this->UniqueObject(6)))); + + // Verify that an operator[] that doesn't cause a resize, also + // doesn't require an extra rehash. + TypeParam ht(100); + EXPECT_EQ(0, ht.hash_funct().num_hashes()); + ht.bracket_assign(this->UniqueKey(2), ht.get_data(this->UniqueObject(2))); + EXPECT_EQ(1, ht.hash_funct().num_hashes()); + + // And overwriting, likewise, should only cause one extra hash. + ht.bracket_assign(this->UniqueKey(2), ht.get_data(this->UniqueObject(2))); + EXPECT_EQ(2, ht.hash_funct().num_hashes()); +} + + +TYPED_TEST(HashtableAllTest, InsertValue) +{ + // First, try some straightforward insertions. + EXPECT_TRUE(this->ht_.empty()); + this->ht_.insert(this->UniqueObject(1)); + EXPECT_FALSE(this->ht_.empty()); + this->ht_.insert(this->UniqueObject(11)); + this->ht_.insert(this->UniqueObject(111)); + this->ht_.insert(this->UniqueObject(1111)); + this->ht_.insert(this->UniqueObject(11111)); + this->ht_.insert(this->UniqueObject(111111)); + this->ht_.insert(this->UniqueObject(1111111)); + this->ht_.insert(this->UniqueObject(11111111)); + this->ht_.insert(this->UniqueObject(111111111)); + EXPECT_EQ(9u, this->ht_.size()); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(1))); + EXPECT_EQ(1u, this->ht_.count(this->UniqueKey(1111))); + + // Check the return type. + pair insert_it; + insert_it = this->ht_.insert(this->UniqueObject(1)); + EXPECT_EQ(false, insert_it.second); // false: already present + EXPECT_TRUE(*insert_it.first == this->UniqueObject(1)); + + insert_it = this->ht_.insert(this->UniqueObject(2)); + EXPECT_EQ(true, insert_it.second); // true: not already present + EXPECT_TRUE(*insert_it.first == this->UniqueObject(2)); +} + +TYPED_TEST(HashtableIntTest, InsertRange) +{ + // We just test the ints here, to make the placement-new easier. + TypeParam ht_source; + ht_source.insert(this->UniqueObject(10)); + ht_source.insert(this->UniqueObject(100)); + ht_source.insert(this->UniqueObject(1000)); + ht_source.insert(this->UniqueObject(10000)); + ht_source.insert(this->UniqueObject(100000)); + ht_source.insert(this->UniqueObject(1000000)); + + const typename TypeParam::value_type input[] = { + // This is a copy of the first element in ht_source. + *ht_source.begin(), + this->UniqueObject(2), + this->UniqueObject(4), + this->UniqueObject(8) + }; + + set set_input; + set_input.insert(this->UniqueObject(1111111)); + set_input.insert(this->UniqueObject(111111)); + set_input.insert(this->UniqueObject(11111)); + set_input.insert(this->UniqueObject(1111)); + set_input.insert(this->UniqueObject(111)); + set_input.insert(this->UniqueObject(11)); + + // Insert from ht_source, an iterator of the same type as us. + typename TypeParam::const_iterator begin = ht_source.begin(); + typename TypeParam::const_iterator end = begin; + std::advance(end, 3); + this->ht_.insert(begin, end); // insert 3 elements from ht_source + EXPECT_EQ(3u, this->ht_.size()); + EXPECT_TRUE(*this->ht_.begin() == this->UniqueObject(10) || + *this->ht_.begin() == this->UniqueObject(100) || + *this->ht_.begin() == this->UniqueObject(1000) || + *this->ht_.begin() == this->UniqueObject(10000) || + *this->ht_.begin() == this->UniqueObject(100000) || + *this->ht_.begin() == this->UniqueObject(1000000)); + + // And insert from set_input, a separate, non-random-access iterator. + typename set::const_iterator set_begin; + typename set::const_iterator set_end; + set_begin = set_input.begin(); + set_end = set_begin; + std::advance(set_end, 3); + this->ht_.insert(set_begin, set_end); + EXPECT_EQ(6u, this->ht_.size()); + + // Insert from input as well, a separate, random-access iterator. + // The first element of input overlaps with an existing element + // of ht_, so this should only up the size by 2. + this->ht_.insert(&input[0], &input[3]); + EXPECT_EQ(8u, this->ht_.size()); +} + +TEST(HashtableTest, InsertValueToMap) +{ + // For the maps in particular, ensure that inserting doesn't change + // the value. + sparse_hash_map shm; + pair::iterator, bool> shm_it; + shm[1] = 2; // test a different method of inserting + shm_it = shm.insert(pair(1, 3)); + EXPECT_EQ(false, shm_it.second); + EXPECT_EQ(1, shm_it.first->first); + EXPECT_EQ(2, shm_it.first->second); + shm_it.first->second = 20; + EXPECT_EQ(20, shm[1]); + + shm_it = shm.insert(pair(2, 4)); + EXPECT_EQ(true, shm_it.second); + EXPECT_EQ(2, shm_it.first->first); + EXPECT_EQ(4, shm_it.first->second); + EXPECT_EQ(4, shm[2]); +} + +TYPED_TEST(HashtableStringTest, EmptyKey) +{ + // Only run the string tests, to make it easier to know what the + // empty key should be. + if (!this->ht_.supports_empty_key()) + return; + EXPECT_EQ(kEmptyString, this->ht_.empty_key()); +} + +TYPED_TEST(HashtableAllTest, DeletedKey) +{ + if (!this->ht_.supports_deleted_key()) + return; + this->ht_.insert(this->UniqueObject(10)); + this->ht_.insert(this->UniqueObject(20)); + this->ht_.set_deleted_key(this->UniqueKey(1)); + EXPECT_EQ(this->ht_.deleted_key(), this->UniqueKey(1)); + EXPECT_EQ(2u, this->ht_.size()); + this->ht_.erase(this->UniqueKey(20)); + EXPECT_EQ(1u, this->ht_.size()); + + // Changing the deleted key is fine. + this->ht_.set_deleted_key(this->UniqueKey(2)); + EXPECT_EQ(this->ht_.deleted_key(), this->UniqueKey(2)); + EXPECT_EQ(1u, this->ht_.size()); +} + +TYPED_TEST(HashtableAllTest, Erase) +{ + this->ht_.set_deleted_key(this->UniqueKey(1)); + EXPECT_EQ(0u, this->ht_.erase(this->UniqueKey(20))); + this->ht_.insert(this->UniqueObject(10)); + this->ht_.insert(this->UniqueObject(20)); + EXPECT_EQ(1u, this->ht_.erase(this->UniqueKey(20))); + EXPECT_EQ(1u, this->ht_.size()); + EXPECT_EQ(0u, this->ht_.erase(this->UniqueKey(20))); + EXPECT_EQ(1u, this->ht_.size()); + EXPECT_EQ(0u, this->ht_.erase(this->UniqueKey(19))); + EXPECT_EQ(1u, this->ht_.size()); + + typename TypeParam::iterator it = this->ht_.find(this->UniqueKey(10)); + EXPECT_TRUE(it != this->ht_.end()); + this->ht_.erase(it); + EXPECT_EQ(0u, this->ht_.size()); + + for (int i = 10; i < 100; i++) + this->ht_.insert(this->UniqueObject(i)); + EXPECT_EQ(90u, this->ht_.size()); + this->ht_.erase(this->ht_.begin(), this->ht_.end()); + EXPECT_EQ(0u, this->ht_.size()); +} + +TYPED_TEST(HashtableAllTest, EraseDoesNotResize) +{ + this->ht_.set_deleted_key(this->UniqueKey(1)); + for (int i = 10; i < 2000; i++) { + this->ht_.insert(this->UniqueObject(i)); + } + const typename TypeParam::size_type old_count = this->ht_.bucket_count(); + for (int i = 10; i < 1000; i++) { // erase half one at a time + EXPECT_EQ(1u, this->ht_.erase(this->UniqueKey(i))); + } + this->ht_.erase(this->ht_.begin(), this->ht_.end()); // and the rest at once + EXPECT_EQ(0u, this->ht_.size()); + EXPECT_EQ(old_count, this->ht_.bucket_count()); +} + +TYPED_TEST(HashtableAllTest, Equals) +{ + // The real test here is whether two hashtables are equal if they + // have the same items but in a different order. + TypeParam ht1; + TypeParam ht2; + + EXPECT_TRUE(ht1 == ht1); + EXPECT_FALSE(ht1 != ht1); + EXPECT_TRUE(ht1 == ht2); + EXPECT_FALSE(ht1 != ht2); + ht1.set_deleted_key(this->UniqueKey(1)); + // Only the contents affect equality, not things like deleted-key. + EXPECT_TRUE(ht1 == ht2); + EXPECT_FALSE(ht1 != ht2); + ht1.resize(2000); + EXPECT_TRUE(ht1 == ht2); + + // The choice of allocator/etc doesn't matter either. + Hasher hasher(1); + Alloc alloc(2, NULL); + TypeParam ht3(5, hasher, hasher, alloc); + EXPECT_TRUE(ht1 == ht3); + EXPECT_FALSE(ht1 != ht3); + + ht1.insert(this->UniqueObject(2)); + EXPECT_TRUE(ht1 != ht2); + EXPECT_FALSE(ht1 == ht2); // this should hold as well! + + ht2.insert(this->UniqueObject(2)); + EXPECT_TRUE(ht1 == ht2); + + for (int i = 3; i <= 2000; i++) { + ht1.insert(this->UniqueObject(i)); + } + for (int i = 2000; i >= 3; i--) { + ht2.insert(this->UniqueObject(i)); + } + EXPECT_TRUE(ht1 == ht2); +} + +TEST(HashtableTest, IntIO) +{ + // Since the set case is just a special (easier) case than the map case, I + // just test on sparse_hash_map. This handles the easy case where we can + // use the standard reader and writer. + sparse_hash_map ht_out; + ht_out.set_deleted_key(0); + for (int i = 1; i < 1000; i++) { + ht_out[i] = i * i; + } + ht_out.erase(563); // just to test having some erased keys when we write. + ht_out.erase(22); + + string file(TmpFile("intio")); + FILE* fp = fopen(file.c_str(), "wb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_out.write_metadata(fp)); + EXPECT_TRUE(ht_out.write_nopointer_data(fp)); + fclose(fp); + } + + sparse_hash_map ht_in; + fp = fopen(file.c_str(), "rb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_in.read_metadata(fp)); + EXPECT_TRUE(ht_in.read_nopointer_data(fp)); + fclose(fp); + } + + EXPECT_EQ(1, ht_in[1]); + EXPECT_EQ(998001, ht_in[999]); + EXPECT_EQ(100, ht_in[10]); + EXPECT_EQ(441, ht_in[21]); + EXPECT_EQ(0, ht_in[22]); // should not have been saved + EXPECT_EQ(0, ht_in[563]); +} + +TEST(HashtableTest, StringIO) +{ + // Since the set case is just a special (easier) case than the map case, + // I just test on sparse_hash_map. This handles the difficult case where + // we have to write our own custom reader/writer for the data. + typedef sparse_hash_map SP; + SP ht_out; + ht_out.set_deleted_key(string("")); + + for (int i = 32; i < 128; i++) { + // This maps 'a' to 32 a's, 'b' to 33 b's, etc. + ht_out[string(1, (char)i)] = string((size_t)i, (char)i); + } + ht_out.erase("c"); // just to test having some erased keys when we write. + ht_out.erase("y"); + + string file(TmpFile("stringio")); + FILE* fp = fopen(file.c_str(), "wb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_out.write_metadata(fp)); + + for (SP::const_iterator it = ht_out.cbegin(); it != ht_out.cend(); ++it) + { + const string::size_type first_size = it->first.length(); + fwrite(&first_size, sizeof(first_size), 1, fp); // ignore endianness issues + fwrite(it->first.c_str(), first_size, 1, fp); + + const string::size_type second_size = it->second.length(); + fwrite(&second_size, sizeof(second_size), 1, fp); + fwrite(it->second.c_str(), second_size, 1, fp); + } + fclose(fp); + } + + sparse_hash_map ht_in; + fp = fopen(file.c_str(), "rb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_in.read_metadata(fp)); + for (sparse_hash_map::iterator + it = ht_in.begin(); it != ht_in.end(); ++it) { + string::size_type first_size; + EXPECT_EQ(1u, fread(&first_size, sizeof(first_size), 1, fp)); + char* first = new char[first_size]; + EXPECT_EQ(1u, fread(first, first_size, 1, fp)); + + string::size_type second_size; + EXPECT_EQ(1u, fread(&second_size, sizeof(second_size), 1, fp)); + char* second = new char[second_size]; + EXPECT_EQ(1u, fread(second, second_size, 1, fp)); + + // it points to garbage, so we have to use placement-new to initialize. + // We also have to use const-cast since it->first is const. + new(const_cast(&it->first)) string(first, first_size); + new(&it->second) string(second, second_size); + delete[] first; + delete[] second; + } + fclose(fp); + } + EXPECT_EQ(string(" "), ht_in[" "]); + EXPECT_EQ(string("+++++++++++++++++++++++++++++++++++++++++++"), ht_in["+"]); + EXPECT_EQ(string(""), ht_in["c"]); // should not have been saved + EXPECT_EQ(string(""), ht_in["y"]); +} + +TYPED_TEST(HashtableAllTest, Serialization) +{ + if (!this->ht_.supports_serialization()) return; + TypeParam ht_out; + ht_out.set_deleted_key(this->UniqueKey(2000)); + for (int i = 1; i < 100; i++) { + ht_out.insert(this->UniqueObject(i)); + } + // just to test having some erased keys when we write. + ht_out.erase(this->UniqueKey(56)); + ht_out.erase(this->UniqueKey(22)); + + string file(TmpFile("serialization")); + FILE* fp = fopen(file.c_str(), "wb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_out.serialize(ValueSerializer(), fp)); + fclose(fp); + } + + TypeParam ht_in; + fp = fopen(file.c_str(), "rb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_in.unserialize(ValueSerializer(), fp)); + fclose(fp); + } + + EXPECT_EQ(this->UniqueObject(1), *ht_in.find(this->UniqueKey(1))); + EXPECT_EQ(this->UniqueObject(99), *ht_in.find(this->UniqueKey(99))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(100))); + EXPECT_EQ(this->UniqueObject(21), *ht_in.find(this->UniqueKey(21))); + // should not have been saved + EXPECT_FALSE(ht_in.count(this->UniqueKey(22))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(56))); +} + +TYPED_TEST(HashtableIntTest, NopointerSerialization) +{ + if (!this->ht_.supports_serialization()) return; + TypeParam ht_out; + ht_out.set_deleted_key(this->UniqueKey(2000)); + for (int i = 1; i < 100; i++) { + ht_out.insert(this->UniqueObject(i)); + } + // just to test having some erased keys when we write. + ht_out.erase(this->UniqueKey(56)); + ht_out.erase(this->UniqueKey(22)); + + string file(TmpFile("nopointer_serialization")); + FILE* fp = fopen(file.c_str(), "wb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_out.serialize(typename TypeParam::NopointerSerializer(), fp)); + fclose(fp); + } + + TypeParam ht_in; + fp = fopen(file.c_str(), "rb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + EXPECT_TRUE(ht_in.unserialize(typename TypeParam::NopointerSerializer(), fp)); + fclose(fp); + } + + EXPECT_EQ(this->UniqueObject(1), *ht_in.find(this->UniqueKey(1))); + EXPECT_EQ(this->UniqueObject(99), *ht_in.find(this->UniqueKey(99))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(100))); + EXPECT_EQ(this->UniqueObject(21), *ht_in.find(this->UniqueKey(21))); + // should not have been saved + EXPECT_FALSE(ht_in.count(this->UniqueKey(22))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(56))); +} + +// We don't support serializing to a string by default, but you can do +// it by writing your own custom input/output class. +class StringIO { + public: + explicit StringIO(string* s) : s_(s) {} + size_t Write(const void* buf, size_t len) { + s_->append(reinterpret_cast(buf), len); + return len; + } + size_t Read(void* buf, size_t len) { + if (s_->length() < len) + len = s_->length(); + memcpy(reinterpret_cast(buf), s_->data(), len); + s_->erase(0, len); + return len; + } + private: + StringIO& operator=(const StringIO&); + string* const s_; +}; + +TYPED_TEST(HashtableIntTest, SerializingToString) +{ + if (!this->ht_.supports_serialization()) return; + TypeParam ht_out; + ht_out.set_deleted_key(this->UniqueKey(2000)); + for (int i = 1; i < 100; i++) { + ht_out.insert(this->UniqueObject(i)); + } + // just to test having some erased keys when we write. + ht_out.erase(this->UniqueKey(56)); + ht_out.erase(this->UniqueKey(22)); + + string stringbuf; + StringIO stringio(&stringbuf); + EXPECT_TRUE(ht_out.serialize(typename TypeParam::NopointerSerializer(), + &stringio)); + + TypeParam ht_in; + EXPECT_TRUE(ht_in.unserialize(typename TypeParam::NopointerSerializer(), + &stringio)); + + EXPECT_EQ(this->UniqueObject(1), *ht_in.find(this->UniqueKey(1))); + EXPECT_EQ(this->UniqueObject(99), *ht_in.find(this->UniqueKey(99))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(100))); + EXPECT_EQ(this->UniqueObject(21), *ht_in.find(this->UniqueKey(21))); + // should not have been saved + EXPECT_FALSE(ht_in.count(this->UniqueKey(22))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(56))); +} + +// An easier way to do the above would be to use the existing stream methods. +TYPED_TEST(HashtableIntTest, SerializingToStringStream) +{ + if (!this->ht_.supports_serialization()) return; + TypeParam ht_out; + ht_out.set_deleted_key(this->UniqueKey(2000)); + for (int i = 1; i < 100; i++) { + ht_out.insert(this->UniqueObject(i)); + } + // just to test having some erased keys when we write. + ht_out.erase(this->UniqueKey(56)); + ht_out.erase(this->UniqueKey(22)); + + std::stringstream string_buffer; + EXPECT_TRUE(ht_out.serialize(typename TypeParam::NopointerSerializer(), + &string_buffer)); + + TypeParam ht_in; + EXPECT_TRUE(ht_in.unserialize(typename TypeParam::NopointerSerializer(), + &string_buffer)); + + EXPECT_EQ(this->UniqueObject(1), *ht_in.find(this->UniqueKey(1))); + EXPECT_EQ(this->UniqueObject(99), *ht_in.find(this->UniqueKey(99))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(100))); + EXPECT_EQ(this->UniqueObject(21), *ht_in.find(this->UniqueKey(21))); + // should not have been saved + EXPECT_FALSE(ht_in.count(this->UniqueKey(22))); + EXPECT_FALSE(ht_in.count(this->UniqueKey(56))); +} + +// Verify that the metadata serialization is endianness and word size +// agnostic. +TYPED_TEST(HashtableAllTest, MetadataSerializationAndEndianness) +{ + TypeParam ht_out; + string kExpectedDense("\x13W\x86""B\0\0\0\0\0\0\0 \0\0\0\0\0\0\0\0\0\0\0\0", + 24); + + // GP change - switched size from 20 to formula, because the sparsegroup bitmap is 4 or 8 bytes and not 6 + string kExpectedSparse("$hu1\0\0\0 \0\0\0\0\0\0\0\0\0\0\0", 12 + sizeof(group_bm_type)); + + if (ht_out.supports_readwrite()) { + size_t num_bytes = 0; + string file(TmpFile("metadata_serialization")); + FILE* fp = fopen(file.c_str(), "wb"); + if (fp) + { + EXPECT_TRUE(fp != NULL); + + EXPECT_TRUE(ht_out.write_metadata(fp)); + EXPECT_TRUE(ht_out.write_nopointer_data(fp)); + + num_bytes = (const size_t)ftell(fp); + fclose(fp); + } + + char contents[24] = {0}; + fp = fopen(file.c_str(), "rb"); + if (fp) + { + EXPECT_LE(num_bytes, static_cast(24)); + EXPECT_EQ(num_bytes, fread(contents, 1, num_bytes <= 24 ? num_bytes : 24, fp)); + EXPECT_EQ(EOF, fgetc(fp)); // check we're *exactly* the right size + fclose(fp); + } + // TODO(csilvers): check type of ht_out instead of looking at the 1st byte. + if (contents[0] == kExpectedDense[0]) { + EXPECT_EQ(kExpectedDense, string(contents, num_bytes)); + } else { + EXPECT_EQ(kExpectedSparse, string(contents, num_bytes)); + } + } + + // Do it again with new-style serialization. Here we can use StringIO. + if (ht_out.supports_serialization()) { + string stringbuf; + StringIO stringio(&stringbuf); + EXPECT_TRUE(ht_out.serialize(typename TypeParam::NopointerSerializer(), + &stringio)); + if (stringbuf[0] == kExpectedDense[0]) { + EXPECT_EQ(kExpectedDense, stringbuf); + } else { + EXPECT_EQ(kExpectedSparse, stringbuf); + } + } +} + + +// ------------------------------------------------------------------------ +// The above tests test the general API for correctness. These tests +// test a few corner cases that have tripped us up in the past, and +// more general, cross-API issues like memory management. + +TYPED_TEST(HashtableAllTest, BracketOperatorCrashing) +{ + this->ht_.set_deleted_key(this->UniqueKey(1)); + for (int iters = 0; iters < 10; iters++) { + // We start at 33 because after shrinking, we'll be at 32 buckets. + for (int i = 33; i < 133; i++) { + this->ht_.bracket_assign(this->UniqueKey(i), + this->ht_.get_data(this->UniqueObject(i))); + } + this->ht_.clear_no_resize(); + // This will force a shrink on the next insert, which we want to test. + this->ht_.bracket_assign(this->UniqueKey(2), + this->ht_.get_data(this->UniqueObject(2))); + this->ht_.erase(this->UniqueKey(2)); + } +} + +// For data types with trivial copy-constructors and destructors, we +// should use an optimized routine for data-copying, that involves +// memmove. We test this by keeping count of how many times the +// copy-constructor is called; it should be much less with the +// optimized code. +struct Memmove +{ +public: + Memmove(): i(0) {} + explicit Memmove(int ival): i(ival) {} + Memmove(const Memmove& that) { this->i = that.i; num_copies++; } + int i; + static int num_copies; +}; +int Memmove::num_copies = 0; + +struct NoMemmove +{ +public: + NoMemmove(): i(0) {} + explicit NoMemmove(int ival): i(ival) {} + NoMemmove(const NoMemmove& that) { this->i = that.i; num_copies++; } + int i; + static int num_copies; +}; +int NoMemmove::num_copies = 0; + +} // unnamed namespace + +#if 0 +// This is what tells the hashtable code it can use memmove for this class: +namespace google { + +template<> struct has_trivial_copy : true_type { }; +template<> struct has_trivial_destructor : true_type { }; + +}; +#endif + +namespace +{ + +TEST(HashtableTest, SimpleDataTypeOptimizations) +{ + // Only sparsehashtable optimizes moves in this way. + sparse_hash_map memmove; + sparse_hash_map nomemmove; + sparse_hash_map > + memmove_nonstandard_alloc; + + Memmove::num_copies = 0; + for (int i = 10000; i > 0; i--) { + memmove[i] = Memmove(i); + } + // GP change - const int memmove_copies = Memmove::num_copies; + + NoMemmove::num_copies = 0; + for (int i = 10000; i > 0; i--) { + nomemmove[i] = NoMemmove(i); + } + // GP change - const int nomemmove_copies = NoMemmove::num_copies; + + Memmove::num_copies = 0; + for (int i = 10000; i > 0; i--) { + memmove_nonstandard_alloc[i] = Memmove(i); + } + // GP change - const int memmove_nonstandard_alloc_copies = Memmove::num_copies; + + // GP change - commented out following two lines + //EXPECT_GT(nomemmove_copies, memmove_copies); + //EXPECT_EQ(nomemmove_copies, memmove_nonstandard_alloc_copies); +} + +TYPED_TEST(HashtableAllTest, ResizeHysteresis) +{ + // We want to make sure that when we create a hashtable, and then + // add and delete one element, the size of the hashtable doesn't + // change. + this->ht_.set_deleted_key(this->UniqueKey(1)); + typename TypeParam::size_type old_bucket_count = this->ht_.bucket_count(); + this->ht_.insert(this->UniqueObject(4)); + this->ht_.erase(this->UniqueKey(4)); + this->ht_.insert(this->UniqueObject(4)); + this->ht_.erase(this->UniqueKey(4)); + EXPECT_EQ(old_bucket_count, this->ht_.bucket_count()); + + // Try it again, but with a hashtable that starts very small + TypeParam ht(2); + EXPECT_LT(ht.bucket_count(), 32u); // verify we really do start small + ht.set_deleted_key(this->UniqueKey(1)); + old_bucket_count = ht.bucket_count(); + ht.insert(this->UniqueObject(4)); + ht.erase(this->UniqueKey(4)); + ht.insert(this->UniqueObject(4)); + ht.erase(this->UniqueKey(4)); + EXPECT_EQ(old_bucket_count, ht.bucket_count()); +} + +TEST(HashtableTest, ConstKey) +{ + // Sometimes people write hash_map, even though the + // const isn't necessary. Make sure we handle this cleanly. + sparse_hash_map shm; + shm.set_deleted_key(1); + shm[10] = 20; +} + +TYPED_TEST(HashtableAllTest, ResizeActuallyResizes) +{ + // This tests for a problem we had where we could repeatedly "resize" + // a hashtable to the same size it was before, on every insert. + // ----------------------------------------------------------------- + const typename TypeParam::size_type kSize = 1<<10; // Pick any power of 2 + const float kResize = 0.8f; // anything between 0.5 and 1 is fine. + const int kThreshold = static_cast(kSize * kResize - 1); + this->ht_.set_resizing_parameters(0, kResize); + this->ht_.set_deleted_key(this->UniqueKey(kThreshold + 100)); + + // Get right up to the resizing threshold. + for (int i = 0; i <= kThreshold; i++) { + this->ht_.insert(this->UniqueObject(i+1)); + } + // The bucket count should equal kSize. + EXPECT_EQ(kSize, this->ht_.bucket_count()); + + // Now start doing erase+insert pairs. This should cause us to + // copy the hashtable at most once. + const int pre_copies = this->ht_.num_table_copies(); + for (int i = 0; i < static_cast(kSize); i++) { + this->ht_.erase(this->UniqueKey(kThreshold)); + this->ht_.insert(this->UniqueObject(kThreshold)); + } + EXPECT_LT(this->ht_.num_table_copies(), pre_copies + 2); + + // Now create a hashtable where we go right to the threshold, then + // delete everything and do one insert. Even though our hashtable + // is now tiny, we should still have at least kSize buckets, because + // our shrink threshhold is 0. + // ----------------------------------------------------------------- + TypeParam ht2; + ht2.set_deleted_key(this->UniqueKey(kThreshold + 100)); + ht2.set_resizing_parameters(0, kResize); + EXPECT_LT(ht2.bucket_count(), kSize); + for (int i = 0; i <= kThreshold; i++) { + ht2.insert(this->UniqueObject(i+1)); + } + EXPECT_EQ(ht2.bucket_count(), kSize); + for (int i = 0; i <= kThreshold; i++) { + ht2.erase(this->UniqueKey(i+1)); + EXPECT_EQ(ht2.bucket_count(), kSize); + } + ht2.insert(this->UniqueObject(kThreshold+2)); + EXPECT_GE(ht2.bucket_count(), kSize); +} + +TEST(HashtableTest, CXX11) +{ +#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST) + { + // Initializer lists + // ----------------- + typedef sparse_hash_map Smap; + + Smap smap({ {1, 1}, {2, 2} }); + EXPECT_EQ(smap.size(), 2); + + smap = { {1, 1}, {2, 2}, {3, 4} }; + EXPECT_EQ(smap.size(), 3); + + smap.insert({{5, 1}, {6, 1}}); + EXPECT_EQ(smap.size(), 5); + EXPECT_EQ(smap[6], 1); + EXPECT_EQ(smap.at(6), 1); + try + { + EXPECT_EQ(smap.at(999), 1); + } + catch (...) + {}; + + sparse_hash_set sset({ 1, 3, 4, 5 }); + EXPECT_EQ(sset.size(), 4); + } +#endif +} + + + +TEST(HashtableTest, NestedHashtables) +{ + // People can do better than to have a hash_map of hash_maps, but we + // should still support it. I try a few different mappings. + sparse_hash_map, Hasher, Hasher> ht1; + + ht1["hi"]; // create a sub-ht with the default values + ht1["lo"][1] = "there"; + sparse_hash_map, Hasher, Hasher> + ht1copy = ht1; +} + +TEST(HashtableDeathTest, ResizeOverflow) +{ + sparse_hash_map ht2; + EXPECT_DEATH(ht2.resize(static_cast(-1)), "overflows size_type"); +} + +TEST(HashtableDeathTest, InsertSizeTypeOverflow) +{ + static const int kMax = 256; + vector test_data(kMax); + for (int i = 0; i < kMax; ++i) { + test_data[(size_t)i] = i+1000; + } + + sparse_hash_set > shs; + + // Test we are using the correct allocator + EXPECT_TRUE(shs.get_allocator().is_custom_alloc()); + + // Test size_type overflow in insert(it, it) + EXPECT_DEATH(shs.insert(test_data.begin(), test_data.end()), "overflows size_type"); +} + +TEST(HashtableDeathTest, InsertMaxSizeOverflow) +{ + static const int kMax = 256; + vector test_data(kMax); + for (int i = 0; i < kMax; ++i) { + test_data[(size_t)i] = i+1000; + } + + sparse_hash_set > shs; + + // Test max_size overflow + EXPECT_DEATH(shs.insert(test_data.begin(), test_data.begin() + 11), "exceed max_size"); +} + +TEST(HashtableDeathTest, ResizeSizeTypeOverflow) +{ + // Test min-buckets overflow, when we want to resize too close to size_type + sparse_hash_set > shs; + + EXPECT_DEATH(shs.resize(250), "overflows size_type"); +} + +TEST(HashtableDeathTest, ResizeDeltaOverflow) +{ + static const int kMax = 256; + vector test_data(kMax); + for (int i = 0; i < kMax; ++i) { + test_data[(size_t)i] = i+1000; + } + + sparse_hash_set > shs; + + for (int i = 0; i < 9; i++) { + shs.insert(i); + } + EXPECT_DEATH(shs.insert(test_data.begin(), test_data.begin() + 250), + "overflows size_type"); +} + +// ------------------------------------------------------------------------ +// This informational "test" comes last so it's easy to see. +// Also, benchmarks. + +TYPED_TEST(HashtableAllTest, ClassSizes) +{ + std::cout << "sizeof(" << typeid(TypeParam).name() << "): " + << sizeof(this->ht_) << "\n"; +} + +} // unnamed namespace + +int main(int, char **) +{ + // All the work is done in the static constructors. If they don't + // die, the tests have all passed. + cout << "PASS\n"; + return 0; +} diff --git a/resources/3rdparty/sparsepp/spp_utils.h b/resources/3rdparty/sparsepp/spp_utils.h new file mode 100644 index 000000000..6b627233c --- /dev/null +++ b/resources/3rdparty/sparsepp/spp_utils.h @@ -0,0 +1,280 @@ +// ---------------------------------------------------------------------- +// Copyright (c) 2016, Steven Gregory Popovitch - greg7mdp@gmail.com +// All rights reserved. +// +// Code derived derived from Boost libraries. +// Boost software licence reproduced below. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * The name of Steven Gregory Popovitch may not be used to +// endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// ---------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// --------------------------------------------------------------------------- + +// ---------------------------------------------------------------------- +// H A S H F U N C T I O N S +// ---------------------------- +// +// Implements spp::spp_hash() and spp::hash_combine() +// +// The exact same code is duplicated in sparsepp.h. +// +// WARNING: Any change here has to be duplicated in sparsepp.h. +// ---------------------------------------------------------------------- + +#if !defined(spp_utils_h_guard_) +#define spp_utils_h_guard_ + +#if defined(_MSC_VER) + #if (_MSC_VER >= 1600 ) // vs2010 (1900 is vs2015) + #include + #define SPP_HASH_CLASS std::hash + #else + #include + #define SPP_HASH_CLASS stdext::hash_compare + #endif + #if (_MSC_FULL_VER < 190021730) + #define SPP_NO_CXX11_NOEXCEPT + #endif +#elif defined(__GNUC__) + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) + #include + #define SPP_HASH_CLASS std::hash + + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600 + #define SPP_NO_CXX11_NOEXCEPT + #endif + #else + #include + #define SPP_HASH_CLASS std::tr1::hash + #define SPP_NO_CXX11_NOEXCEPT + #endif +#elif defined __clang__ + #include + #define SPP_HASH_CLASS std::hash + + #if !__has_feature(cxx_noexcept) + #define SPP_NO_CXX11_NOEXCEPT + #endif +#else + #include + #define SPP_HASH_CLASS std::hash +#endif + +#ifdef SPP_NO_CXX11_NOEXCEPT + #define SPP_NOEXCEPT +#else + #define SPP_NOEXCEPT noexcept +#endif + +#define SPP_INLINE + +#ifndef SPP_NAMESPACE + #define SPP_NAMESPACE spp +#endif + +namespace SPP_NAMESPACE +{ + +template +struct spp_hash +{ + SPP_INLINE size_t operator()(const T &__v) const SPP_NOEXCEPT + { + SPP_HASH_CLASS hasher; + return hasher(__v); + } +}; + +template +struct spp_hash +{ + static size_t spp_log2 (size_t val) SPP_NOEXCEPT + { + size_t res = 0; + while (val > 1) + { + val >>= 1; + res++; + } + return res; + } + + SPP_INLINE size_t operator()(const T *__v) const SPP_NOEXCEPT + { + static const size_t shift = spp_log2(1 + sizeof(T)); + return static_cast((*(reinterpret_cast(&__v))) >> shift); + } +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(bool __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(signed char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned char __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(wchar_t __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(short __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned short __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(int __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned int __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(long __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(unsigned long __v) const SPP_NOEXCEPT {return static_cast(__v);} +}; + +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(float __v) const SPP_NOEXCEPT + { + // -0.0 and 0.0 should return same hash + uint32_t *as_int = reinterpret_cast(&__v); + return (__v == 0) ? static_cast(0) : static_cast(*as_int); + } +}; + +#if 0 +// todo: we should not ignore half of the double => see libcxx/include/functional +template <> +struct spp_hash : public std::unary_function +{ + SPP_INLINE size_t operator()(double __v) const SPP_NOEXCEPT + { + // -0.0 and 0.0 should return same hash + return (__v == 0) ? (size_t)0 : (size_t)*((uint64_t *)&__v); + } +}; +#endif + +template struct Combiner +{ + inline void operator()(T& seed, T value); +}; + +template struct Combiner +{ + inline void operator()(T& seed, T value) + { + seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } +}; + +template struct Combiner +{ + inline void operator()(T& seed, T value) + { + seed ^= value + T(0xc6a4a7935bd1e995) + (seed << 6) + (seed >> 2); + } +}; + +template +inline void hash_combine(std::size_t& seed, T const& v) +{ + spp::spp_hash hasher; + Combiner combiner; + + combiner(seed, hasher(v)); +} + +}; + +#endif // spp_utils_h_guard_ +