commit 5934a428980fb37cf0994a90621cc1c084e1013a
Author: dehnert <dehnert@cs.rwth-aachen.de>
Date:   Fri May 6 19:23:57 2016 +0200

    Squashed 'resources/3rdparty/sylvan/' content from commit d91f6ac
    
    git-subtree-dir: resources/3rdparty/sylvan
    git-subtree-split: d91f6acb554fd7de603ab80522507f41f2faa04b

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..d45dee77e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,43 @@
+# autotools
+**/Makefile
+/autom4te.cache/
+config.*
+.dirstamp
+aclocal.m4
+configure
+m4/*
+tools
+Makefile.in
+
+# cmake
+**/CMakeCache.txt
+**/CMakeFiles
+**/cmake_install.cmake
+
+# libtool
+.deps/
+.libs/
+/libtool
+
+# object files
+*.lo
+*.o
+*.la
+
+# output files
+examples/mc
+examples/lddmc
+test/sylvan_test
+test/test_cxx
+src/libsylvan.a
+
+# MacOS file
+.DS_Store
+
+# eclipse files
+.cproject
+.project
+.settings
+
+# coverage output
+coverage
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..ac0da9fb9
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,100 @@
+sudo: false
+
+matrix:
+  include:
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-4.7 CXX=g++-4.7 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["gcc-4.7", "g++-4.7", "libstd++-4.7-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-4.8 CXX=g++-4.8 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["gcc-4.8", "g++-4.8", "libstd++-4.8-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-4.9 CXX=g++-4.9 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["gcc-4.9", "g++-4.9", "libstd++-4.9-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Debug" HWLOC="OFF" SYLVAN_STATS="OFF"
+    addons:
+      apt:
+        packages: ["gcc-5", "g++-5", "libstd++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Debug" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["gcc-5", "g++-5", "libstd++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["gcc-5", "g++-5", "libstd++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="OFF"
+    addons:
+      apt:
+        packages: ["gcc-5", "g++-5", "libstd++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON" VARIANT="coverage"
+    addons:
+      apt:
+        packages: ["gcc-5", "g++-5", "libstd++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test"]
+  - os: linux
+    env: TOOLSET=clang CC=/usr/local/clang-3.4/bin/clang CXX=/usr/local/clang-3.4/bin/clang++ BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["clang-3.4", "libstdc++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test", "llvm-toolchain-precise-3.4"]
+  - os: linux
+    env: TOOLSET=clang CC=clang-3.6 CXX=clang++-3.6 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["clang-3.6", "libstdc++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test", "llvm-toolchain-precise-3.6"]
+  - os: linux
+    env: TOOLSET=clang CC=clang-3.7 CXX=clang++-3.7 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="ON"
+    addons:
+      apt:
+        packages: ["clang-3.7", "libstdc++-5-dev", "libgmp-dev", "cmake", "libhwloc-dev"]
+        sources: ["ubuntu-toolchain-r-test", "llvm-toolchain-precise-3.7"]
+  - os: osx
+    env: TOOLSET=clang CC=clang CXX=clang++ BUILD_TYPE="Debug" HWLOC="ON" SYLVAN_STATS="ON"
+  - os: osx
+    env: TOOLSET=clang CC=clang CXX=clang++ BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="OFF"
+  - os: osx
+    env: TOOLSET=gcc CC=gcc-4.9 CXX=g++-4.9 BUILD_TYPE="Debug" HWLOC="ON" SYLVAN_STATS="OFF"
+  - os: osx
+    env: TOOLSET=gcc CC=gcc-4.9 CXX=g++-4.9 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="OFF"
+  - os: osx
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Debug" HWLOC="ON" SYLVAN_STATS="OFF"
+  - os: osx
+    env: TOOLSET=gcc CC=gcc-5 CXX=g++-5 BUILD_TYPE="Release" HWLOC="ON" SYLVAN_STATS="OFF"
+
+install:
+- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew install argp-standalone homebrew/science/hwloc; fi
+- if [[ "$TRAVIS_OS_NAME" == "osx" && "$CC" == "gcc-5" ]]; then brew install homebrew/versions/gcc5; fi
+
+script:
+- ${CC} --version
+- ${CXX} --version
+- cmake . -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DUSE_HWLOC=${HWLOC} -DSYLVAN_STATS=${SYLVAN_STATS} -DWITH_COVERAGE=${COVERAGE}
+- make -j 2
+- make test
+- examples/simple
+- examples/mc models/schedule_world.2.8-rgs.bdd -w 2 | tee /dev/fd/2 | grep -q "1,570,340"
+- examples/lddmc models/blocks.2.ldd -w 2 | tee /dev/fd/2 | grep -q "7057 states"
+
+notifications:
+  email: false
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 000000000..27762655b
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,50 @@
+cmake_minimum_required(VERSION 2.6)
+project(sylvan C CXX)
+enable_testing()
+
+set(CMAKE_C_FLAGS "-O3 -Wextra -Wall -Werror -fno-strict-aliasing -std=gnu11")
+set(CMAKE_CXX_FLAGS "-O3 -Wextra -Wall -Werror -fno-strict-aliasing -Wno-deprecated-register -std=gnu++11")
+
+option(WITH_COVERAGE "Add generation of test coverage" OFF)
+if(WITH_COVERAGE)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g -coverage")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -coverage")
+
+    find_program(GCOV_PATH gcov)
+    find_program(LCOV_PATH lcov)
+    find_program(GENHTML_PATH genhtml)
+
+    add_custom_target(coverage
+        # Cleanup lcov
+        ${LCOV_PATH} --directory . --zerocounters
+        # Run tests
+        COMMAND make test
+        # Capture counters
+        COMMAND ${LCOV_PATH} --gcov-tool ${GCOV_PATH} --directory . --capture --output-file coverage.info
+        COMMAND ${LCOV_PATH} --remove coverage.info 'test/*' '/usr/*' 'examples/*' 'src/sylvan_mtbdd*' 'src/lace*' 'src/sylvan_ldd*' 'src/avl.h' 'src/sha2.c' --output-file coverage.info.cleaned
+        COMMAND ${GENHTML_PATH} -o coverage coverage.info.cleaned
+        COMMAND ${CMAKE_COMMAND} -E remove coverage.info coverage.info.cleaned
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+endif()
+
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+find_package(GMP REQUIRED)
+include_directories(${GMP_INCLUDE_DIR})
+include_directories(src)
+
+include_directories(src)
+
+add_subdirectory(src)
+
+option(SYLVAN_BUILD_TEST "Build test programs" ON)
+
+if(SYLVAN_BUILD_TEST)
+    add_subdirectory(test)
+endif()
+
+option(SYLVAN_BUILD_EXAMPLES "Build example tools" OFF)
+
+if(SYLVAN_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+endif()
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 000000000..6e1cf8acc
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,5 @@
+ACLOCAL_AMFLAGS = -I m4
+
+AM_CFLAGS = -g -O2 -Wall -Wextra -Werror -std=gnu11
+
+SUBDIRS = src
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..f0f88b851
--- /dev/null
+++ b/README.md
@@ -0,0 +1,97 @@
+Sylvan [![Build Status](https://travis-ci.org/trolando/sylvan.svg?branch=master)](https://travis-ci.org/trolando/sylvan)
+======
+Sylvan is a parallel (multi-core) BDD library in C. Sylvan allows both sequential and parallel BDD-based algorithms to benefit from parallelism. Sylvan uses the work-stealing framework Lace and a scalable lockless hashtable to implement scalable multi-core BDD operations.
+
+Sylvan is developed (&copy; 2011-2016) by the [Formal Methods and Tools](http://fmt.ewi.utwente.nl/) group at the University of Twente as part of the MaDriD project, which is funded by NWO. Sylvan is licensed with the Apache 2.0 license.
+
+You can contact the main author of Sylvan at <t.vandijk@utwente.nl>. Please let us know if you use Sylvan in your projects.
+
+Sylvan is available at: https://github.com/utwente-fmt/sylvan  
+Java/JNI bindings: https://github.com/trolando/jsylvan  
+Haskell bindings: https://github.com/adamwalker/sylvan-haskell
+
+Publications
+------------
+T. van Dijk and J. van de Pol (2015) [Sylvan: Multi-core Decision Diagrams](http://dx.doi.org/10.1007/978-3-662-46681-0_60). In: TACAS 2015, LNCS 9035. Springer.
+
+T. van Dijk and A.W. Laarman and J. van de Pol (2012) [Multi-Core BDD Operations for Symbolic Reachability](http://eprints.eemcs.utwente.nl/22166/). In: PDMC 2012, ENTCS. Elsevier.
+
+Usage
+-----
+Simple examples can be found in the `examples` subdirectory. The file `simple.cpp` contains a toy program that 
+uses the C++ objects to perform basic BDD manipulation.
+The `mc.c` and `lddmc.c` programs are more advanced examples of symbolic model checking (with example models in the `models` subdirectory).
+
+Sylvan depends on the [work-stealing framework Lace](http://fmt.ewi.utwente.nl/tools/lace) for its implementation. Lace is embedded in the Sylvan distribution.
+To use Sylvan, Lace must be initialized first.
+For more details, see the comments in `src/sylvan.h`.
+
+### Basic functionality
+
+To create new BDDs, you can use:
+- `sylvan_true`: representation of constant `true`.
+- `sylvan_false`: representation of constant `false`.
+- `sylvan_ithvar(var)`: representation of literal &lt;var&gt; (negated: `sylvan_nithvar(var)`)
+
+To follow the BDD edges and obtain the variable at the root of a BDD, you can use:
+- `sylvan_var(bdd)`: obtain variable of the root node of &lt;bdd&gt; - requires that &lt;bdd&gt; is not constant `true` or `false`.
+- `sylvan_high(bdd)`: follow high edge of &lt;bdd&gt;.
+- `sylvan_low(bdd)`: follow low edge of &lt;bdd&gt;.
+
+You need to manually reference BDDs that you want to keep during garbage collection:
+- `sylvan_ref(bdd)`: add reference to &lt;bdd&gt;.
+- `sylvan_deref(bdd)`: remove reference to &lt;bdd&gt;.
+- `sylvan_protect(bddptr)`: add a pointer reference to the BDD variable &lt;bddptr&gt;
+- `sylvan_unprotect(bddptr)`: remove a pointer reference to the BDD variable &lt;bddptr&gt;
+
+It is recommended to use `sylvan_protect` and `sylvan_unprotect`.
+The C++ objects handle this automatically.
+
+The following 'primitives' are implemented:
+- `sylvan_not(bdd)`: negation of &lt;bdd&gt;.
+- `sylvan_ite(a,b,c)`: calculate 'if &lt;a&gt; then &lt;b&gt; else &lt;c&gt;'.
+- `sylvan_and(a, b)`: calculate a and b
+- `sylvan_or(a, b)`: calculate a or b
+- `sylvan_nand(a, b)`: calculate not (a and b)
+- `sylvan_nor(a, b)`: calculate not (a or b)
+- `sylvan_imp(a, b)`: calculate a implies b
+- `sylvan_invimp(a, b)`: calculate implies a
+- `sylvan_xor(a, b)`: calculate a xor b
+- `sylvan_equiv(a, b)`: calculate a = b
+- `sylvan_diff(a, b)`: calculate a and not b
+- `sylvan_less(a, b)`: calculate b and not a
+- `sylvan_exists(bdd, vars)`: existential quantification of &lt;bdd&gt; with respect to variables &lt;vars&gt;. Here, &lt;vars&gt; is a conjunction of literals.
+- `sylvan_forall(bdd, vars)`: universal quantification of &lt;bdd&gt; with respect to variables &lt;vars&gt;. Here, &lt;vars&gt; is a conjunction of literals.
+
+### Other BDD operations
+
+See `src/sylvan_bdd.h`, `src/sylvan_mtbdd.h` and `src/sylvan_ldd.h` for other implemented operations.
+See `src/sylvan_obj.hpp` for the C++ interface.
+ 
+### Garbage collection
+
+Garbage collection is triggered when trying to insert a new node and no new bucket can be found within a reasonable upper bound. 
+Garbage collection is stop-the-world and all workers must cooperate on garbage collection. (Beware of deadlocks if you use Sylvan operations in critical sections!)
+- `sylvan_gc()`: manually trigger garbage collection.
+- `sylvan_gc_enable()`: enable garbage collection.
+- `sylvan_gc_disable()`: disable garbage collection.
+
+### Table resizing
+
+During garbage collection, it is possible to resize the nodes table and the cache.
+Sylvan provides two default implementations: an agressive version that resizes every time garbage collection is performed,
+and a less agressive version that only resizes when at least half the table is full.
+This can be configured in `src/sylvan_config.h`
+It is not possible to decrease the size of the nodes table and the cache.
+
+### Dynamic reordering
+
+Dynamic reordening is currently not supported.
+For now, we suggest users find a good static variable ordering.
+
+Troubleshooting
+---------------
+Sylvan may require a larger than normal program stack. You may need to increase the program stack size on your system using `ulimit -s`. Segmentation faults on large computations typically indicate a program stack overflow.
+
+### I am getting the error "unable to allocate memory: ...!"
+Sylvan allocates virtual memory using mmap. If you specify a combined size for the cache and node table larger than your actual available memory you may need to set `vm.overcommit_memory` to `1`. E.g. `echo 1 > /proc/sys/vm/overcommit_memory`. You can make this setting permanent with `echo "vm.overcommit_memory = 1" > /etc/sysctl.d/99-sylvan.conf`. You can verify the setting with `cat /proc/sys/vm/overcommit_memory`. It should report `1`.
diff --git a/cmake/FindGMP.cmake b/cmake/FindGMP.cmake
new file mode 100644
index 000000000..62c75c034
--- /dev/null
+++ b/cmake/FindGMP.cmake
@@ -0,0 +1,20 @@
+FIND_PATH(GMP_INCLUDE_DIR 
+            gmp.h )
+
+FIND_LIBRARY(GMP_LIBRARIES
+             NAMES gmp 
+             HINTS /usr/local/lib )
+
+IF (GMP_INCLUDE_DIR AND GMP_LIBRARIES)
+   SET(GMP_FOUND TRUE)
+ENDIF (GMP_INCLUDE_DIR AND GMP_LIBRARIES)
+
+IF (GMP_FOUND)
+   IF (NOT GMP_FIND_QUIETLY)
+      MESSAGE(STATUS "Found GMP: ${GMP_LIBRARIES}")
+   ENDIF (NOT GMP_FIND_QUIETLY)
+ELSE (GMP_FOUND)
+   IF (GMP_FIND_REQUIRED)
+      MESSAGE(FATAL_ERROR "Could not find GMP")
+   ENDIF (GMP_FIND_REQUIRED)
+ENDIF (GMP_FOUND)
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 000000000..5363e6daa
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,21 @@
+AC_PREREQ([2.60])
+AC_INIT([sylvan], [1.0])
+AC_CONFIG_MACRO_DIR([m4])
+AC_CONFIG_AUX_DIR([tools])
+AM_INIT_AUTOMAKE([foreign])
+
+AC_PROG_CC
+AX_CHECK_COMPILE_FLAG([-std=c11],,[AC_MSG_FAILURE([no acceptable C11 compiler found.])])
+AC_PROG_CXX
+LT_INIT
+
+AC_CHECKING([for any suitable hwloc installation])
+AC_CHECK_LIB([hwloc], [hwloc_topology_init], [AC_CHECK_HEADER([hwloc.h], [hwloc=yes])])
+AM_CONDITIONAL([HAVE_LIBHWLOC], [test "$hwloc" = "yes"])
+
+AC_CANONICAL_HOST
+AM_CONDITIONAL([DARWIN], [case $host_os in darwin*) true;; *) false;; esac])
+# test x$(uname) == "xDarwin"])
+
+AC_CONFIG_FILES([Makefile src/Makefile])
+AC_OUTPUT
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000..bb335935d
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 2.6)
+project(sylvan C CXX)
+
+include_directories(.)
+
+add_executable(mc mc.c getrss.h getrss.c)
+target_link_libraries(mc sylvan)
+
+add_executable(lddmc lddmc.c getrss.h getrss.c)
+target_link_libraries(lddmc sylvan)
+
+add_executable(simple simple.cpp)
+target_link_libraries(simple sylvan stdc++)
+
+include(CheckIncludeFiles)
+check_include_files("gperftools/profiler.h" HAVE_PROFILER)
+
+if(HAVE_PROFILER)
+    set_target_properties(mc PROPERTIES COMPILE_DEFINITIONS "HAVE_PROFILER")
+    target_link_libraries(mc profiler)
+
+    set_target_properties(lddmc PROPERTIES COMPILE_DEFINITIONS "HAVE_PROFILER")
+    target_link_libraries(lddmc profiler)
+endif()
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    # add argp library for OSX
+    target_link_libraries(mc argp)
+    target_link_libraries(lddmc argp)
+endif()
+
+
diff --git a/examples/getrss.c b/examples/getrss.c
new file mode 100644
index 000000000..f3aa5e381
--- /dev/null
+++ b/examples/getrss.c
@@ -0,0 +1,68 @@
+/*
+ * Author:  David Robert Nadeau
+ * Site:    http://NadeauSoftware.com/
+ * License: Creative Commons Attribution 3.0 Unported License
+ *          http://creativecommons.org/licenses/by/3.0/deed.en_US
+ */
+
+/*
+ * Modified by Tom van Dijk to remove WIN32 and solaris code
+ */
+
+#if defined(__APPLE__) && defined(__MACH__)
+#include <unistd.h>
+#include <sys/resource.h>
+#include <mach/mach.h>
+#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
+#include <unistd.h>
+#include <sys/resource.h>
+#include <stdio.h>
+#else
+#error "Cannot define getPeakRSS( ) or getCurrentRSS( ) for an unknown OS."
+#endif
+
+/**
+ * Returns the peak (maximum so far) resident set size (physical
+ * memory use) measured in bytes, or zero if the value cannot be
+ * determined on this OS.
+ */
+size_t
+getPeakRSS()
+{
+    struct rusage rusage;
+    getrusage(RUSAGE_SELF, &rusage);
+#if defined(__APPLE__) && defined(__MACH__)
+    return (size_t)rusage.ru_maxrss;
+#else
+    return (size_t)(rusage.ru_maxrss * 1024L);
+#endif
+}
+
+/**
+ * Returns the current resident set size (physical memory use) measured
+ * in bytes, or zero if the value cannot be determined on this OS.
+ */
+size_t
+getCurrentRSS()
+{
+#if defined(__APPLE__) && defined(__MACH__)
+    /* OSX ------------------------------------------------------ */
+    struct mach_task_basic_info info;
+    mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
+    if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) != KERN_SUCCESS)
+        return (size_t)0L;      /* Can't access? */
+    return (size_t)info.resident_size;
+#else
+    /* Linux ---------------------------------------------------- */
+    long rss = 0L;
+    FILE *fp = NULL;
+    if ((fp = fopen("/proc/self/statm", "r")) == NULL)
+        return (size_t)0L;      /* Can't open? */
+    if (fscanf(fp, "%*s%ld", &rss) != 1) {
+        fclose(fp);
+        return (size_t)0L;      /* Can't read? */
+    }
+    fclose(fp);
+    return (size_t)rss * (size_t)sysconf(_SC_PAGESIZE);
+#endif
+}
diff --git a/examples/getrss.h b/examples/getrss.h
new file mode 100644
index 000000000..653e78e76
--- /dev/null
+++ b/examples/getrss.h
@@ -0,0 +1,26 @@
+#ifndef GETRSS_H
+#define GETRSS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * Returns the peak (maximum so far) resident set size (physical
+ * memory use) measured in bytes, or zero if the value cannot be
+ * determined on this OS.
+ */
+size_t getPeakRSS();
+
+/**
+ * Returns the current resident set size (physical memory use) measured
+ * in bytes, or zero if the value cannot be determined on this OS.
+ */
+size_t getCurrentRSS();
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+
+#endif
diff --git a/examples/lddmc.c b/examples/lddmc.c
new file mode 100644
index 000000000..9e9c9c008
--- /dev/null
+++ b/examples/lddmc.c
@@ -0,0 +1,499 @@
+#include <argp.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#ifdef HAVE_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+#include <getrss.h>
+#include <sylvan.h>
+#include <llmsset.h>
+
+/* Configuration */
+static int report_levels = 0; // report states at start of every level
+static int report_table = 0; // report table size at end of every level
+static int strategy = 1; // set to 1 = use PAR strategy; set to 0 = use BFS strategy
+static int check_deadlocks = 0; // set to 1 to check for deadlocks
+static int print_transition_matrix = 1; // print transition relation matrix
+static int workers = 0; // autodetect
+static char* model_filename = NULL; // filename of model
+#ifdef HAVE_PROFILER
+static char* profile_filename = NULL; // filename for profiling
+#endif
+
+/* argp configuration */
+static struct argp_option options[] =
+{
+    {"workers", 'w', "<workers>", 0, "Number of workers (default=0: autodetect)", 0},
+    {"strategy", 's', "<bfs|par|sat>", 0, "Strategy for reachability (default=par)", 0},
+#ifdef HAVE_PROFILER
+    {"profiler", 'p', "<filename>", 0, "Filename for profiling", 0},
+#endif
+    {"deadlocks", 3, 0, 0, "Check for deadlocks", 1},
+    {"count-states", 1, 0, 0, "Report #states at each level", 1},
+    {"count-table", 2, 0, 0, "Report table usage at each level", 1},
+    {0, 0, 0, 0, 0, 0}
+};
+static error_t
+parse_opt(int key, char *arg, struct argp_state *state)
+{
+    switch (key) {
+    case 'w':
+        workers = atoi(arg);
+        break;
+    case 's':
+        if (strcmp(arg, "bfs")==0) strategy = 0;
+        else if (strcmp(arg, "par")==0) strategy = 1;
+        else if (strcmp(arg, "sat")==0) strategy = 2;
+        else argp_usage(state);
+        break;
+    case 3:
+        check_deadlocks = 1;
+        break;
+    case 1:
+        report_levels = 1;
+        break;
+    case 2:
+        report_table = 1;
+        break;
+#ifdef HAVE_PROFILER
+    case 'p':
+        profile_filename = arg;
+        break;
+#endif
+    case ARGP_KEY_ARG:
+        if (state->arg_num >= 1) argp_usage(state);
+        model_filename = arg;
+        break; 
+    case ARGP_KEY_END:
+        if (state->arg_num < 1) argp_usage(state);
+        break;
+    default:
+        return ARGP_ERR_UNKNOWN;
+    }
+    return 0;
+}
+static struct argp argp = { options, parse_opt, "<model>", 0, 0, 0, 0 };
+
+/* Globals */
+typedef struct set
+{
+    MDD mdd;
+    MDD proj;
+    int size;
+} *set_t;
+
+typedef struct relation
+{
+    MDD mdd;
+    MDD meta;
+    int size;
+} *rel_t;
+
+static size_t vector_size; // size of vector
+static int next_count; // number of partitions of the transition relation
+static rel_t *next; // each partition of the transition relation
+
+#define Abort(...) { fprintf(stderr, __VA_ARGS__); exit(-1); }
+
+/* Load a set from file */
+static set_t
+set_load(FILE* f)
+{
+    lddmc_serialize_fromfile(f);
+
+    size_t mdd;
+    size_t proj;
+    int size;
+
+    if (fread(&mdd, sizeof(size_t), 1, f) != 1) Abort("Invalid input file!\n");
+    if (fread(&proj, sizeof(size_t), 1, f) != 1) Abort("Invalid input file!\n");
+    if (fread(&size, sizeof(int), 1, f) != 1) Abort("Invalid input file!\n");
+
+    LACE_ME;
+
+    set_t set = (set_t)malloc(sizeof(struct set));
+    set->mdd = lddmc_ref(lddmc_serialize_get_reversed(mdd));
+    set->proj = lddmc_ref(lddmc_serialize_get_reversed(proj));
+    set->size = size;
+
+    return set;
+}
+
+static int
+calculate_size(MDD meta)
+{
+    int result = 0;
+    uint32_t val = lddmc_getvalue(meta);
+    while (val != (uint32_t)-1) {
+        if (val != 0) result += 1;
+        meta = lddmc_follow(meta, val);
+        assert(meta != lddmc_true && meta != lddmc_false);
+        val = lddmc_getvalue(meta);
+    }
+    return result;
+}
+
+/* Load a relation from file */
+static rel_t
+rel_load(FILE* f)
+{
+    lddmc_serialize_fromfile(f);
+
+    size_t mdd;
+    size_t meta;
+
+    if (fread(&mdd, sizeof(size_t), 1, f) != 1) Abort("Invalid input file!\n");
+    if (fread(&meta, sizeof(size_t), 1, f) != 1) Abort("Invalid input file!\n");
+
+    LACE_ME;
+
+    rel_t rel = (rel_t)malloc(sizeof(struct relation));
+    rel->mdd = lddmc_ref(lddmc_serialize_get_reversed(mdd));
+    rel->meta = lddmc_ref(lddmc_serialize_get_reversed(meta));
+    rel->size = calculate_size(rel->meta);
+
+    return rel;
+}
+
+static void
+print_example(MDD example)
+{
+    if (example != lddmc_false) {
+        LACE_ME;
+        uint32_t vec[vector_size];
+        lddmc_sat_one(example, vec, vector_size);
+
+        size_t i;
+        printf("[");
+        for (i=0; i<vector_size; i++) {
+            if (i>0) printf(",");
+            printf("%" PRIu32, vec[i]);
+        }
+        printf("]");
+    }
+}
+
+static void
+print_matrix(size_t size, MDD meta)
+{
+    if (size == 0) return;
+    uint32_t val = lddmc_getvalue(meta);
+    if (val == 1) {
+        printf("+");
+        print_matrix(size-1, lddmc_follow(lddmc_follow(meta, 1), 2));
+    } else {
+        if (val == (uint32_t)-1) printf("-");
+        else if (val == 0) printf("-");
+        else if (val == 3) printf("r");
+        else if (val == 4) printf("w");
+        print_matrix(size-1, lddmc_follow(meta, val));
+    }
+}
+
+static char*
+to_h(double size, char *buf)
+{
+    const char* units[] = {"B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"};
+    int i = 0;
+    for (;size>1024;size/=1024) i++;
+    sprintf(buf, "%.*f %s", i, size, units[i]);
+    return buf;
+}
+
+static int
+get_first(MDD meta)
+{
+    uint32_t val = lddmc_getvalue(meta);
+    if (val != 0) return 0;
+    return 1+get_first(lddmc_follow(meta, val));
+}
+
+/* Straight-forward implementation of parallel reduction */
+TASK_5(MDD, go_par, MDD, cur, MDD, visited, size_t, from, size_t, len, MDD*, deadlocks)
+{
+    if (len == 1) {
+        // Calculate NEW successors (not in visited)
+        MDD succ = lddmc_ref(lddmc_relprod(cur, next[from]->mdd, next[from]->meta));
+        if (deadlocks) {
+            // check which MDDs in deadlocks do not have a successor in this relation
+            MDD anc = lddmc_ref(lddmc_relprev(succ, next[from]->mdd, next[from]->meta, cur));
+            *deadlocks = lddmc_ref(lddmc_minus(*deadlocks, anc));
+            lddmc_deref(anc);
+        }
+        MDD result = lddmc_ref(lddmc_minus(succ, visited));
+        lddmc_deref(succ);
+        return result;
+    } else {
+        MDD deadlocks_left;
+        MDD deadlocks_right;
+        if (deadlocks) {
+            deadlocks_left = *deadlocks;
+            deadlocks_right = *deadlocks;
+        }
+
+        // Recursively calculate left+right
+        SPAWN(go_par, cur, visited, from, (len+1)/2, deadlocks ? &deadlocks_left: NULL);
+        MDD right = CALL(go_par, cur, visited, from+(len+1)/2, len/2, deadlocks ? &deadlocks_right : NULL);
+        MDD left = SYNC(go_par);
+
+        // Merge results of left+right
+        MDD result = lddmc_ref(lddmc_union(left, right));
+        lddmc_deref(left);
+        lddmc_deref(right);
+
+        if (deadlocks) {
+            *deadlocks = lddmc_ref(lddmc_intersect(deadlocks_left, deadlocks_right));
+            lddmc_deref(deadlocks_left);
+            lddmc_deref(deadlocks_right);
+        }
+
+        return result;
+    }
+}
+
+/* PAR strategy, parallel strategy (operations called in parallel *and* parallelized by Sylvan) */
+VOID_TASK_1(par, set_t, set)
+{
+    MDD visited = set->mdd;
+    MDD new = lddmc_ref(visited);
+    size_t counter = 1;
+    do {
+        char buf[32];
+        to_h(getCurrentRSS(), buf);
+        printf("Memory usage: %s\n", buf);
+        printf("Level %zu... ", counter++);
+        if (report_levels) {
+            printf("%zu states... ", (size_t)lddmc_satcount_cached(visited));
+        }
+        fflush(stdout);
+
+        // calculate successors in parallel
+        MDD cur = new;
+        MDD deadlocks = cur;
+        new = CALL(go_par, cur, visited, 0, next_count, check_deadlocks ? &deadlocks : NULL);
+        lddmc_deref(cur);
+
+        if (check_deadlocks) {
+            printf("found %zu deadlock states... ", (size_t)lddmc_satcount_cached(deadlocks));
+            if (deadlocks != lddmc_false) {
+                printf("example: ");
+                print_example(deadlocks);
+                printf("... ");
+                check_deadlocks = 0;
+            }
+        }
+
+        // visited = visited + new
+        MDD old_visited = visited;
+        visited = lddmc_ref(lddmc_union(visited, new));
+        lddmc_deref(old_visited);
+
+        if (report_table) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            printf("done, table: %0.1f%% full (%zu nodes).\n", 100.0*(double)filled/total, filled);
+        } else {
+            printf("done.\n");
+        }
+    } while (new != lddmc_false);
+    lddmc_deref(new);
+    set->mdd = visited;
+}
+
+/* Sequential version of merge-reduction */
+TASK_5(MDD, go_bfs, MDD, cur, MDD, visited, size_t, from, size_t, len, MDD*, deadlocks)
+{
+    if (len == 1) {
+        // Calculate NEW successors (not in visited)
+        MDD succ = lddmc_ref(lddmc_relprod(cur, next[from]->mdd, next[from]->meta));
+        if (deadlocks) {
+            // check which MDDs in deadlocks do not have a successor in this relation
+            MDD anc = lddmc_ref(lddmc_relprev(succ, next[from]->mdd, next[from]->meta, cur));
+            *deadlocks = lddmc_ref(lddmc_minus(*deadlocks, anc));
+            lddmc_deref(anc);
+        }
+        MDD result = lddmc_ref(lddmc_minus(succ, visited));
+        lddmc_deref(succ);
+        return result;
+    } else {
+        MDD deadlocks_left;
+        MDD deadlocks_right;
+        if (deadlocks) {
+            deadlocks_left = *deadlocks;
+            deadlocks_right = *deadlocks;
+        }
+
+        // Recursively calculate left+right
+        MDD left = CALL(go_bfs, cur, visited, from, (len+1)/2, deadlocks ? &deadlocks_left : NULL);
+        MDD right = CALL(go_bfs, cur, visited, from+(len+1)/2, len/2, deadlocks ? &deadlocks_right : NULL);
+
+        // Merge results of left+right
+        MDD result = lddmc_ref(lddmc_union(left, right));
+        lddmc_deref(left);
+        lddmc_deref(right);
+
+        if (deadlocks) {
+            *deadlocks = lddmc_ref(lddmc_intersect(deadlocks_left, deadlocks_right));
+            lddmc_deref(deadlocks_left);
+            lddmc_deref(deadlocks_right);
+        }
+
+        return result;
+    }
+}
+
+/* BFS strategy, sequential strategy (but operations are parallelized by Sylvan) */
+VOID_TASK_1(bfs, set_t, set)
+{
+    MDD visited = set->mdd;
+    MDD new = lddmc_ref(visited);
+    size_t counter = 1;
+    do {
+        char buf[32];
+        to_h(getCurrentRSS(), buf);
+        printf("Memory usage: %s\n", buf);
+        printf("Level %zu... ", counter++);
+        if (report_levels) {
+            printf("%zu states... ", (size_t)lddmc_satcount_cached(visited));
+        }
+        fflush(stdout);
+
+        MDD cur = new;
+        MDD deadlocks = cur;
+        new = CALL(go_bfs, cur, visited, 0, next_count, check_deadlocks ? &deadlocks : NULL);
+        lddmc_deref(cur);
+
+        if (check_deadlocks) {
+            printf("found %zu deadlock states... ", (size_t)lddmc_satcount_cached(deadlocks));
+            if (deadlocks != lddmc_false) {
+                printf("example: ");
+                print_example(deadlocks);
+                printf("... ");
+                check_deadlocks = 0;
+            }
+        }
+
+        // visited = visited + new
+        MDD old_visited = visited;
+        visited = lddmc_ref(lddmc_union(visited, new));
+        lddmc_deref(old_visited);
+
+        if (report_table) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            printf("done, table: %0.1f%% full (%zu nodes).\n", 100.0*(double)filled/total, filled);
+        } else {
+            printf("done.\n");
+        }
+    } while (new != lddmc_false);
+    lddmc_deref(new);
+    set->mdd = visited;
+}
+
+/* Obtain current wallclock time */
+static double
+wctime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (tv.tv_sec + 1E-6 * tv.tv_usec);
+}
+
+int
+main(int argc, char **argv)
+{
+    argp_parse(&argp, argc, argv, 0, 0, 0);
+
+    FILE *f = fopen(model_filename, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Cannot open file '%s'!\n", model_filename);
+        return -1;
+    }
+
+    // Init Lace
+    lace_init(workers, 1000000); // auto-detect number of workers, use a 1,000,000 size task queue
+    lace_startup(0, NULL, NULL); // auto-detect program stack, do not use a callback for startup
+
+    // Init Sylvan LDDmc
+    // Nodes table size: 24 bytes * 2**N_nodes
+    // Cache table size: 36 bytes * 2**N_cache
+    // With: N_nodes=25, N_cache=24: 1.3 GB memory
+    sylvan_init_package(1LL<<21, 1LL<<27, 1LL<<20, 1LL<<26);
+    sylvan_init_ldd();
+
+    // Read and report domain info (integers per vector and bits per integer)
+    if (fread(&vector_size, sizeof(size_t), 1, f) != 1) Abort("Invalid input file!\n");
+
+    printf("Vector size: %zu\n", vector_size);
+
+    // Read initial state
+    printf("Loading initial state... ");
+    fflush(stdout);
+    set_t states = set_load(f);
+    printf("done.\n");
+
+    // Read transitions
+    if (fread(&next_count, sizeof(int), 1, f) != 1) Abort("Invalid input file!\n");
+    next = (rel_t*)malloc(sizeof(rel_t) * next_count);
+
+    printf("Loading transition relations... ");
+    fflush(stdout);
+    int i;
+    for (i=0; i<next_count; i++) {
+        next[i] = rel_load(f);
+        printf("%d, ", i);
+        fflush(stdout);
+    }
+    fclose(f);
+    printf("done.\n");
+
+    // Report statistics
+    printf("Read file '%s'\n", argv[1]);
+    printf("%zu integers per state, %d transition groups\n", vector_size, next_count);
+    printf("MDD nodes:\n");
+    printf("Initial states: %zu MDD nodes\n", lddmc_nodecount(states->mdd));
+    for (i=0; i<next_count; i++) {
+        printf("Transition %d: %zu MDD nodes\n", i, lddmc_nodecount(next[i]->mdd));
+    }
+
+    if (print_transition_matrix) {
+        for (i=0; i<next_count; i++) {
+            print_matrix(vector_size, next[i]->meta);
+            printf(" (%d)\n", get_first(next[i]->meta));
+        }
+    }
+
+    LACE_ME;
+
+#ifdef HAVE_PROFILER
+    if (profile_filename != NULL) ProfilerStart(profile_filename);
+#endif
+    if (strategy == 1) {
+        double t1 = wctime();
+        CALL(par, states);
+        double t2 = wctime();
+        printf("PAR Time: %f\n", t2-t1);
+    } else {
+        double t1 = wctime();
+        CALL(bfs, states);
+        double t2 = wctime();
+        printf("BFS Time: %f\n", t2-t1);
+    }
+#ifdef HAVE_PROFILER
+    if (profile_filename != NULL) ProfilerStop();
+#endif
+
+    // Now we just have states
+    printf("Final states: %zu states\n", (size_t)lddmc_satcount_cached(states->mdd));
+    printf("Final states: %zu MDD nodes\n", lddmc_nodecount(states->mdd));
+
+    sylvan_stats_report(stdout, 1);
+
+    return 0;
+}
diff --git a/examples/mc.c b/examples/mc.c
new file mode 100644
index 000000000..2e7938fd9
--- /dev/null
+++ b/examples/mc.c
@@ -0,0 +1,616 @@
+#include <argp.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#ifdef HAVE_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+#include <sylvan.h>
+#include <llmsset.h>
+
+/* Configuration */
+static int report_levels = 0; // report states at end of every level
+static int report_table = 0; // report table size at end of every level
+static int report_nodes = 0; // report number of nodes of BDDs
+static int strategy = 1; // set to 1 = use PAR strategy; set to 0 = use BFS strategy
+static int check_deadlocks = 0; // set to 1 to check for deadlocks
+static int merge_relations = 0; // merge relations to 1 relation
+static int print_transition_matrix = 0; // print transition relation matrix
+static int workers = 0; // autodetect
+static char* model_filename = NULL; // filename of model
+#ifdef HAVE_PROFILER
+static char* profile_filename = NULL; // filename for profiling
+#endif
+
+/* argp configuration */
+static struct argp_option options[] =
+{
+    {"workers", 'w', "<workers>", 0, "Number of workers (default=0: autodetect)", 0},
+    {"strategy", 's', "<bfs|par|sat>", 0, "Strategy for reachability (default=par)", 0},
+#ifdef HAVE_PROFILER
+    {"profiler", 'p', "<filename>", 0, "Filename for profiling", 0},
+#endif
+    {"deadlocks", 3, 0, 0, "Check for deadlocks", 1},
+    {"count-nodes", 5, 0, 0, "Report #nodes for BDDs", 1},
+    {"count-states", 1, 0, 0, "Report #states at each level", 1},
+    {"count-table", 2, 0, 0, "Report table usage at each level", 1},
+    {"merge-relations", 6, 0, 0, "Merge transition relations into one transition relation", 1},
+    {"print-matrix", 4, 0, 0, "Print transition matrix", 1},
+    {0, 0, 0, 0, 0, 0}
+};
+static error_t
+parse_opt(int key, char *arg, struct argp_state *state)
+{
+    switch (key) {
+    case 'w':
+        workers = atoi(arg);
+        break;
+    case 's':
+        if (strcmp(arg, "bfs")==0) strategy = 0;
+        else if (strcmp(arg, "par")==0) strategy = 1;
+        else if (strcmp(arg, "sat")==0) strategy = 2;
+        else argp_usage(state);
+        break;
+    case 4:
+        print_transition_matrix = 1;
+        break;
+    case 3:
+        check_deadlocks = 1;
+        break;
+    case 1:
+        report_levels = 1;
+        break;
+    case 2:
+        report_table = 1;
+        break;
+    case 6:
+        merge_relations = 1;
+        break;
+#ifdef HAVE_PROFILER
+    case 'p':
+        profile_filename = arg;
+        break;
+#endif
+    case ARGP_KEY_ARG:
+        if (state->arg_num >= 1) argp_usage(state);
+        model_filename = arg;
+        break;
+    case ARGP_KEY_END:
+        if (state->arg_num < 1) argp_usage(state);
+        break;
+    default:
+        return ARGP_ERR_UNKNOWN;
+    }
+    return 0;
+}
+static struct argp argp = { options, parse_opt, "<model>", 0, 0, 0, 0 };
+
+/* Globals */
+typedef struct set
+{
+    BDD bdd;
+    BDD variables; // all variables in the set (used by satcount)
+} *set_t;
+
+typedef struct relation
+{
+    BDD bdd;
+    BDD variables; // all variables in the relation (used by relprod)
+} *rel_t;
+
+static int vector_size; // size of vector
+static int statebits, actionbits; // number of bits for state, number of bits for action
+static int bits_per_integer; // number of bits per integer in the vector
+static int next_count; // number of partitions of the transition relation
+static rel_t *next; // each partition of the transition relation
+
+/* Obtain current wallclock time */
+static double
+wctime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (tv.tv_sec + 1E-6 * tv.tv_usec);
+}
+
+static double t_start;
+#define INFO(s, ...) fprintf(stdout, "[% 8.2f] " s, wctime()-t_start, ##__VA_ARGS__)
+#define Abort(...) { fprintf(stderr, __VA_ARGS__); exit(-1); }
+
+/* Load a set from file */
+#define set_load(f) CALL(set_load, f)
+TASK_1(set_t, set_load, FILE*, f)
+{
+    sylvan_serialize_fromfile(f);
+
+    size_t set_bdd, set_vector_size, set_state_vars;
+    if ((fread(&set_bdd, sizeof(size_t), 1, f) != 1) ||
+        (fread(&set_vector_size, sizeof(size_t), 1, f) != 1) ||
+        (fread(&set_state_vars, sizeof(size_t), 1, f) != 1)) {
+        Abort("Invalid input file!\n");
+    }
+
+    set_t set = (set_t)malloc(sizeof(struct set));
+    set->bdd = sylvan_serialize_get_reversed(set_bdd);
+    set->variables = sylvan_support(sylvan_serialize_get_reversed(set_state_vars));
+
+    sylvan_protect(&set->bdd);
+    sylvan_protect(&set->variables);
+
+    return set;
+}
+
+/* Load a relation from file */
+#define rel_load(f) CALL(rel_load, f)
+TASK_1(rel_t, rel_load, FILE*, f)
+{
+    sylvan_serialize_fromfile(f);
+
+    size_t rel_bdd, rel_vars;
+    if ((fread(&rel_bdd, sizeof(size_t), 1, f) != 1) ||
+        (fread(&rel_vars, sizeof(size_t), 1, f) != 1)) {
+        Abort("Invalid input file!\n");
+    }
+
+    rel_t rel = (rel_t)malloc(sizeof(struct relation));
+    rel->bdd = sylvan_serialize_get_reversed(rel_bdd);
+    rel->variables = sylvan_support(sylvan_serialize_get_reversed(rel_vars));
+
+    sylvan_protect(&rel->bdd);
+    sylvan_protect(&rel->variables);
+
+    return rel;
+}
+
+#define print_example(example, variables) CALL(print_example, example, variables)
+VOID_TASK_2(print_example, BDD, example, BDDSET, variables)
+{
+    uint8_t str[vector_size * bits_per_integer];
+
+    if (example != sylvan_false) {
+        sylvan_sat_one(example, variables, str);
+        printf("[");
+        for (int i=0; i<vector_size; i++) {
+            uint32_t res = 0;
+            for (int j=0; j<bits_per_integer; j++) {
+                if (str[bits_per_integer*i+j] == 1) res++;
+                res<<=1;
+            }
+            if (i>0) printf(",");
+            printf("%" PRIu32, res);
+        }
+        printf("]");
+    }
+}
+
+/* Straight-forward implementation of parallel reduction */
+TASK_5(BDD, go_par, BDD, cur, BDD, visited, size_t, from, size_t, len, BDD*, deadlocks)
+{
+    if (len == 1) {
+        // Calculate NEW successors (not in visited)
+        BDD succ = sylvan_relnext(cur, next[from]->bdd, next[from]->variables);
+        bdd_refs_push(succ);
+        if (deadlocks) {
+            // check which BDDs in deadlocks do not have a successor in this relation
+            BDD anc = sylvan_relprev(next[from]->bdd, succ, next[from]->variables);
+            bdd_refs_push(anc);
+            *deadlocks = sylvan_diff(*deadlocks, anc);
+            bdd_refs_pop(1);
+        }
+        BDD result = sylvan_diff(succ, visited);
+        bdd_refs_pop(1);
+        return result;
+    } else {
+        BDD deadlocks_left;
+        BDD deadlocks_right;
+        if (deadlocks) {
+            deadlocks_left = *deadlocks;
+            deadlocks_right = *deadlocks;
+            sylvan_protect(&deadlocks_left);
+            sylvan_protect(&deadlocks_right);
+        }
+
+        // Recursively calculate left+right
+        bdd_refs_spawn(SPAWN(go_par, cur, visited, from, (len+1)/2, deadlocks ? &deadlocks_left: NULL));
+        BDD right = bdd_refs_push(CALL(go_par, cur, visited, from+(len+1)/2, len/2, deadlocks ? &deadlocks_right : NULL));
+        BDD left = bdd_refs_push(bdd_refs_sync(SYNC(go_par)));
+
+        // Merge results of left+right
+        BDD result = sylvan_or(left, right);
+        bdd_refs_pop(2);
+
+        if (deadlocks) {
+            bdd_refs_push(result);
+            *deadlocks = sylvan_and(deadlocks_left, deadlocks_right);
+            sylvan_unprotect(&deadlocks_left);
+            sylvan_unprotect(&deadlocks_right);
+            bdd_refs_pop(1);
+        }
+
+        return result;
+    }
+}
+
+/* PAR strategy, parallel strategy (operations called in parallel *and* parallelized by Sylvan) */
+VOID_TASK_1(par, set_t, set)
+{
+    BDD visited = set->bdd;
+    BDD next_level = visited;
+    BDD cur_level = sylvan_false;
+    BDD deadlocks = sylvan_false;
+
+    sylvan_protect(&visited);
+    sylvan_protect(&next_level);
+    sylvan_protect(&cur_level);
+    sylvan_protect(&deadlocks);
+
+    int iteration = 1;
+    do {
+        // calculate successors in parallel
+        cur_level = next_level;
+        deadlocks = cur_level;
+
+        next_level = CALL(go_par, cur_level, visited, 0, next_count, check_deadlocks ? &deadlocks : NULL);
+
+        if (check_deadlocks && deadlocks != sylvan_false) {
+            INFO("Found %'0.0f deadlock states... ", sylvan_satcount(deadlocks, set->variables));
+            if (deadlocks != sylvan_false) {
+                printf("example: ");
+                print_example(deadlocks, set->variables);
+                check_deadlocks = 0;
+            }
+            printf("\n");
+        }
+
+        // visited = visited + new
+        visited = sylvan_or(visited, next_level);
+
+        if (report_table && report_levels) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            INFO("Level %d done, %'0.0f states explored, table: %0.1f%% full (%'zu nodes)\n",
+                iteration, sylvan_satcount(visited, set->variables),
+                100.0*(double)filled/total, filled);
+        } else if (report_table) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            INFO("Level %d done, table: %0.1f%% full (%'zu nodes)\n",
+                iteration,
+                100.0*(double)filled/total, filled);
+        } else if (report_levels) {
+            INFO("Level %d done, %'0.0f states explored\n", iteration, sylvan_satcount(visited, set->variables));
+        } else {
+            INFO("Level %d done\n", iteration);
+        }
+        iteration++;
+    } while (next_level != sylvan_false);
+
+    set->bdd = visited;
+
+    sylvan_unprotect(&visited);
+    sylvan_unprotect(&next_level);
+    sylvan_unprotect(&cur_level);
+    sylvan_unprotect(&deadlocks);
+}
+
+/* Sequential version of merge-reduction */
+TASK_5(BDD, go_bfs, BDD, cur, BDD, visited, size_t, from, size_t, len, BDD*, deadlocks)
+{
+    if (len == 1) {
+        // Calculate NEW successors (not in visited)
+        BDD succ = sylvan_relnext(cur, next[from]->bdd, next[from]->variables);
+        bdd_refs_push(succ);
+        if (deadlocks) {
+            // check which BDDs in deadlocks do not have a successor in this relation
+            BDD anc = sylvan_relprev(next[from]->bdd, succ, next[from]->variables);
+            bdd_refs_push(anc);
+            *deadlocks = sylvan_diff(*deadlocks, anc);
+            bdd_refs_pop(1);
+        }
+        BDD result = sylvan_diff(succ, visited);
+        bdd_refs_pop(1);
+        return result;
+    } else {
+        BDD deadlocks_left;
+        BDD deadlocks_right;
+        if (deadlocks) {
+            deadlocks_left = *deadlocks;
+            deadlocks_right = *deadlocks;
+            sylvan_protect(&deadlocks_left);
+            sylvan_protect(&deadlocks_right);
+        }
+
+        // Recursively calculate left+right
+        BDD left = CALL(go_bfs, cur, visited, from, (len+1)/2, deadlocks ? &deadlocks_left : NULL);
+        bdd_refs_push(left);
+        BDD right = CALL(go_bfs, cur, visited, from+(len+1)/2, len/2, deadlocks ? &deadlocks_right : NULL);
+        bdd_refs_push(right);
+
+        // Merge results of left+right
+        BDD result = sylvan_or(left, right);
+        bdd_refs_pop(2);
+
+        if (deadlocks) {
+            bdd_refs_push(result);
+            *deadlocks = sylvan_and(deadlocks_left, deadlocks_right);
+            sylvan_unprotect(&deadlocks_left);
+            sylvan_unprotect(&deadlocks_right);
+            bdd_refs_pop(1);
+        }
+
+        return result;
+    }
+}
+
+/* BFS strategy, sequential strategy (but operations are parallelized by Sylvan) */
+VOID_TASK_1(bfs, set_t, set)
+{
+    BDD visited = set->bdd;
+    BDD next_level = visited;
+    BDD cur_level = sylvan_false;
+    BDD deadlocks = sylvan_false;
+
+    sylvan_protect(&visited);
+    sylvan_protect(&next_level);
+    sylvan_protect(&cur_level);
+    sylvan_protect(&deadlocks);
+
+    int iteration = 1;
+    do {
+        // calculate successors in parallel
+        cur_level = next_level;
+        deadlocks = cur_level;
+
+        next_level = CALL(go_bfs, cur_level, visited, 0, next_count, check_deadlocks ? &deadlocks : NULL);
+
+        if (check_deadlocks && deadlocks != sylvan_false) {
+            INFO("Found %'0.0f deadlock states... ", sylvan_satcount(deadlocks, set->variables));
+            if (deadlocks != sylvan_false) {
+                printf("example: ");
+                print_example(deadlocks, set->variables);
+                check_deadlocks = 0;
+            }
+            printf("\n");
+        }
+
+        // visited = visited + new
+        visited = sylvan_or(visited, next_level);
+
+        if (report_table && report_levels) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            INFO("Level %d done, %'0.0f states explored, table: %0.1f%% full (%'zu nodes)\n",
+                iteration, sylvan_satcount(visited, set->variables),
+                100.0*(double)filled/total, filled);
+        } else if (report_table) {
+            size_t filled, total;
+            sylvan_table_usage(&filled, &total);
+            INFO("Level %d done, table: %0.1f%% full (%'zu nodes)\n",
+                iteration,
+                100.0*(double)filled/total, filled);
+        } else if (report_levels) {
+            INFO("Level %d done, %'0.0f states explored\n", iteration, sylvan_satcount(visited, set->variables));
+        } else {
+            INFO("Level %d done\n", iteration);
+        }
+        iteration++;
+    } while (next_level != sylvan_false);
+
+    set->bdd = visited;
+
+    sylvan_unprotect(&visited);
+    sylvan_unprotect(&next_level);
+    sylvan_unprotect(&cur_level);
+    sylvan_unprotect(&deadlocks);
+}
+
+/**
+ * Extend a transition relation to a larger domain (using s=s')
+ */
+#define extend_relation(rel, vars) CALL(extend_relation, rel, vars)
+TASK_2(BDD, extend_relation, BDD, relation, BDDSET, variables)
+{
+    /* first determine which state BDD variables are in rel */
+    int has[statebits];
+    for (int i=0; i<statebits; i++) has[i] = 0;
+    BDDSET s = variables;
+    while (!sylvan_set_isempty(s)) {
+        BDDVAR v = sylvan_set_var(s);
+        if (v/2 >= (unsigned)statebits) break; // action labels
+        has[v/2] = 1;
+        s = sylvan_set_next(s);
+    }
+
+    /* create "s=s'" for all variables not in rel */
+    BDD eq = sylvan_true;
+    for (int i=statebits-1; i>=0; i--) {
+        if (has[i]) continue;
+        BDD low = sylvan_makenode(2*i+1, eq, sylvan_false);
+        bdd_refs_push(low);
+        BDD high = sylvan_makenode(2*i+1, sylvan_false, eq);
+        bdd_refs_pop(1);
+        eq = sylvan_makenode(2*i, low, high);
+    }
+
+    bdd_refs_push(eq);
+    BDD result = sylvan_and(relation, eq);
+    bdd_refs_pop(1);
+
+    return result;
+}
+
+/**
+ * Compute \BigUnion ( sets[i] )
+ */
+#define big_union(first, count) CALL(big_union, first, count)
+TASK_2(BDD, big_union, int, first, int, count)
+{
+    if (count == 1) return next[first]->bdd;
+
+    bdd_refs_spawn(SPAWN(big_union, first, count/2));
+    BDD right = bdd_refs_push(CALL(big_union, first+count/2, count-count/2));
+    BDD left = bdd_refs_push(bdd_refs_sync(SYNC(big_union)));
+    BDD result = sylvan_or(left, right);
+    bdd_refs_pop(2);
+    return result;
+}
+
+static void
+print_matrix(BDD vars)
+{
+    for (int i=0; i<vector_size; i++) {
+        if (sylvan_set_isempty(vars)) {
+            fprintf(stdout, "-");
+        } else {
+            BDDVAR next_s = 2*((i+1)*bits_per_integer);
+            if (sylvan_set_var(vars) < next_s) {
+                fprintf(stdout, "+");
+                for (;;) {
+                    vars = sylvan_set_next(vars);
+                    if (sylvan_set_isempty(vars)) break;
+                    if (sylvan_set_var(vars) >= next_s) break;
+                }
+            } else {
+                fprintf(stdout, "-");
+            }
+        }
+    }
+}
+
+VOID_TASK_0(gc_start)
+{
+    INFO("(GC) Starting garbage collection...\n");
+}
+
+VOID_TASK_0(gc_end)
+{
+    INFO("(GC) Garbage collection done.\n");
+}
+
+int
+main(int argc, char **argv)
+{
+    argp_parse(&argp, argc, argv, 0, 0, 0);
+    setlocale(LC_NUMERIC, "en_US.utf-8");
+    t_start = wctime();
+
+    FILE *f = fopen(model_filename, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Cannot open file '%s'!\n", model_filename);
+        return -1;
+    }
+
+    // Init Lace
+    lace_init(workers, 1000000); // auto-detect number of workers, use a 1,000,000 size task queue
+    lace_startup(0, NULL, NULL); // auto-detect program stack, do not use a callback for startup
+
+    LACE_ME;
+
+    // Init Sylvan
+    // Nodes table size: 24 bytes * 2**N_nodes
+    // Cache table size: 36 bytes * 2**N_cache
+    // With: N_nodes=25, N_cache=24: 1.3 GB memory
+    sylvan_init_package(1LL<<21, 1LL<<27, 1LL<<20, 1LL<<26);
+    sylvan_init_bdd(6); // granularity 6 is decent default value - 1 means "use cache for every operation"
+    sylvan_gc_add_mark(0, TASK(gc_start));
+    sylvan_gc_add_mark(40, TASK(gc_end));
+
+    /* Load domain information */
+    if ((fread(&vector_size, sizeof(int), 1, f) != 1) ||
+        (fread(&statebits, sizeof(int), 1, f) != 1) ||
+        (fread(&actionbits, sizeof(int), 1, f) != 1)) {
+        Abort("Invalid input file!\n");
+    }
+
+    bits_per_integer = statebits;
+    statebits *= vector_size;
+
+    // Read initial state
+    set_t states = set_load(f);
+
+    // Read transitions
+    if (fread(&next_count, sizeof(int), 1, f) != 1) Abort("Invalid input file!\n");
+    next = (rel_t*)malloc(sizeof(rel_t) * next_count);
+
+    int i;
+    for (i=0; i<next_count; i++) {
+        next[i] = rel_load(f);
+    }
+
+    /* Done */
+    fclose(f);
+
+    if (print_transition_matrix) {
+        for (i=0; i<next_count; i++) {
+            INFO("");
+            print_matrix(next[i]->variables);
+            fprintf(stdout, "\n");
+        }
+    }
+
+    // Report statistics
+    INFO("Read file '%s'\n", model_filename);
+    INFO("%d integers per state, %d bits per integer, %d transition groups\n", vector_size, bits_per_integer, next_count);
+
+    if (merge_relations) {
+        BDD prime_variables = sylvan_set_empty();
+        for (int i=statebits-1; i>=0; i--) {
+            bdd_refs_push(prime_variables);
+            prime_variables = sylvan_set_add(prime_variables, i*2+1);
+            bdd_refs_pop(1);
+        }
+
+        bdd_refs_push(prime_variables);
+
+        INFO("Extending transition relations to full domain.\n");
+        for (int i=0; i<next_count; i++) {
+            next[i]->bdd = extend_relation(next[i]->bdd, next[i]->variables);
+            next[i]->variables = prime_variables;
+        }
+
+        INFO("Taking union of all transition relations.\n");
+        next[0]->bdd = big_union(0, next_count);
+        next_count = 1;
+    }
+
+    if (report_nodes) {
+        INFO("BDD nodes:\n");
+        INFO("Initial states: %zu BDD nodes\n", sylvan_nodecount(states->bdd));
+        for (i=0; i<next_count; i++) {
+            INFO("Transition %d: %zu BDD nodes\n", i, sylvan_nodecount(next[i]->bdd));
+        }
+    }
+
+#ifdef HAVE_PROFILER
+    if (profile_filename != NULL) ProfilerStart(profile_filename);
+#endif
+    if (strategy == 1) {
+        double t1 = wctime();
+        CALL(par, states);
+        double t2 = wctime();
+        INFO("PAR Time: %f\n", t2-t1);
+    } else {
+        double t1 = wctime();
+        CALL(bfs, states);
+        double t2 = wctime();
+        INFO("BFS Time: %f\n", t2-t1);
+    }
+#ifdef HAVE_PROFILER
+    if (profile_filename != NULL) ProfilerStop();
+#endif
+
+    // Now we just have states
+    INFO("Final states: %'0.0f states\n", sylvan_satcount(states->bdd, states->variables));
+    if (report_nodes) {
+        INFO("Final states: %'zu BDD nodes\n", sylvan_nodecount(states->bdd));
+    }
+
+    sylvan_stats_report(stdout, 1);
+
+    return 0;
+}
diff --git a/examples/simple.cpp b/examples/simple.cpp
new file mode 100644
index 000000000..22bd9eb8b
--- /dev/null
+++ b/examples/simple.cpp
@@ -0,0 +1,121 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <sylvan.h>
+#include <sylvan_obj.hpp>
+
+using namespace sylvan;
+
+VOID_TASK_0(simple_cxx)
+{
+    Bdd one = Bdd::bddOne(); // the True terminal
+    Bdd zero = Bdd::bddZero(); // the False terminal
+
+    // check if they really are the True/False terminal
+    assert(one.GetBDD() == sylvan_true);
+    assert(zero.GetBDD() == sylvan_false);
+
+    Bdd a = Bdd::bddVar(0); // create a BDD variable x_0
+    Bdd b = Bdd::bddVar(1); // create a BDD variable x_1
+
+    // check if a really is the Boolean formula "x_0"
+    assert(!a.isConstant());
+    assert(a.TopVar() == 0);
+    assert(a.Then() == one);
+    assert(a.Else() == zero);
+
+    // check if b really is the Boolean formula "x_1"
+    assert(!b.isConstant());
+    assert(b.TopVar() == 1);
+    assert(b.Then() == one);
+    assert(b.Else() == zero);
+
+    // compute !a
+    Bdd not_a = !a;
+
+    // check if !!a is really a
+    assert((!not_a) == a);
+
+    // compute a * b and !(!a + !b) and check if they are equivalent
+    Bdd a_and_b = a * b;
+    Bdd not_not_a_or_not_b = !(!a + !b);
+    assert(a_and_b == not_not_a_or_not_b);
+
+    // perform some simple quantification and check the results
+    Bdd ex = a_and_b.ExistAbstract(a); // \exists a . a * b
+    assert(ex == b);
+    Bdd andabs = a.AndAbstract(b, a); // \exists a . a * b using AndAbstract
+    assert(ex == andabs);
+    Bdd univ = a_and_b.UnivAbstract(a); // \forall a . a * b
+    assert(univ == zero);
+
+    // alternative method to get the cube "ab" using bddCube
+    BddSet variables = a * b;
+    std::vector<unsigned char> vec = {1, 1};
+    assert(a_and_b == Bdd::bddCube(variables, vec));
+
+    // test the bddCube method for all combinations
+    assert((!a * !b) == Bdd::bddCube(variables, std::vector<uint8_t>({0, 0})));
+    assert((!a * b)  == Bdd::bddCube(variables, std::vector<uint8_t>({0, 1})));
+    assert((!a)      == Bdd::bddCube(variables, std::vector<uint8_t>({0, 2})));
+    assert((a * !b)  == Bdd::bddCube(variables, std::vector<uint8_t>({1, 0})));
+    assert((a * b)   == Bdd::bddCube(variables, std::vector<uint8_t>({1, 1})));
+    assert((a)       == Bdd::bddCube(variables, std::vector<uint8_t>({1, 2})));
+    assert((!b)      == Bdd::bddCube(variables, std::vector<uint8_t>({2, 0})));
+    assert((b)       == Bdd::bddCube(variables, std::vector<uint8_t>({2, 1})));
+    assert(one       == Bdd::bddCube(variables, std::vector<uint8_t>({2, 2})));
+}
+
+VOID_TASK_1(_main, void*, arg)
+{
+    // Initialize Sylvan
+    // With starting size of the nodes table 1 << 21, and maximum size 1 << 27.
+    // With starting size of the cache table 1 << 20, and maximum size 1 << 20.
+    // Memory usage: 24 bytes per node, and 36 bytes per cache bucket
+    // - 1<<24 nodes: 384 MB
+    // - 1<<25 nodes: 768 MB
+    // - 1<<26 nodes: 1536 MB
+    // - 1<<27 nodes: 3072 MB
+    // - 1<<24 cache: 576 MB
+    // - 1<<25 cache: 1152 MB
+    // - 1<<26 cache: 2304 MB
+    // - 1<<27 cache: 4608 MB
+    sylvan_init_package(1LL<<22, 1LL<<26, 1LL<<22, 1LL<<26);
+
+    // Initialize the BDD module with granularity 1 (cache every operation)
+    // A higher granularity (e.g. 6) often results in better performance in practice
+    sylvan_init_bdd(1);
+
+    // Now we can do some simple stuff using the C++ objects.
+    CALL(simple_cxx);
+
+    // Report statistics (if SYLVAN_STATS is 1 in the configuration)
+    sylvan_stats_report(stdout, 1);
+
+    // And quit, freeing memory
+    sylvan_quit();
+
+    // We didn't use arg
+    (void)arg;
+}
+
+int
+main (int argc, char *argv[])
+{
+    int n_workers = 0; // automatically detect number of workers
+    size_t deque_size = 0; // default value for the size of task deques for the workers
+    size_t program_stack_size = 0; // default value for the program stack of each pthread
+
+    // Initialize the Lace framework for <n_workers> workers.
+    lace_init(n_workers, deque_size);
+
+    // Spawn and start all worker pthreads; suspends current thread until done.
+    lace_startup(program_stack_size, TASK(_main), NULL);
+
+    // The lace_startup command also exits Lace after _main is completed.
+
+    return 0;
+    (void)argc; // unused variable
+    (void)argv; // unused variable
+}
diff --git a/m4/.gitignore b/m4/.gitignore
new file mode 100644
index 000000000..5590b8bc5
--- /dev/null
+++ b/m4/.gitignore
@@ -0,0 +1,5 @@
+# Ignore everything in this directory
+*
+# Except:
+!.gitignore
+!m4_ax_check_compile_flag.m4
diff --git a/m4/m4_ax_check_compile_flag.m4 b/m4/m4_ax_check_compile_flag.m4
new file mode 100644
index 000000000..c3a8d695a
--- /dev/null
+++ b/m4/m4_ax_check_compile_flag.m4
@@ -0,0 +1,72 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
+#
+# DESCRIPTION
+#
+#   Check whether the given FLAG works with the current language's compiler
+#   or gives an error.  (Warnings, however, are ignored)
+#
+#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+#   success/failure.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
diff --git a/models/at.5.8-rgs.bdd b/models/at.5.8-rgs.bdd
new file mode 100644
index 000000000..8a0c19500
Binary files /dev/null and b/models/at.5.8-rgs.bdd differ
diff --git a/models/at.6.8-rgs.bdd b/models/at.6.8-rgs.bdd
new file mode 100644
index 000000000..71ef84a77
Binary files /dev/null and b/models/at.6.8-rgs.bdd differ
diff --git a/models/at.7.8-rgs.bdd b/models/at.7.8-rgs.bdd
new file mode 100644
index 000000000..c8c29628e
Binary files /dev/null and b/models/at.7.8-rgs.bdd differ
diff --git a/models/blocks.2.ldd b/models/blocks.2.ldd
new file mode 100644
index 000000000..1379ed1a3
Binary files /dev/null and b/models/blocks.2.ldd differ
diff --git a/models/blocks.4.ldd b/models/blocks.4.ldd
new file mode 100644
index 000000000..7270052c4
Binary files /dev/null and b/models/blocks.4.ldd differ
diff --git a/models/collision.4.9-rgs.bdd b/models/collision.4.9-rgs.bdd
new file mode 100644
index 000000000..6db7602d1
Binary files /dev/null and b/models/collision.4.9-rgs.bdd differ
diff --git a/models/collision.5.9-rgs.bdd b/models/collision.5.9-rgs.bdd
new file mode 100644
index 000000000..7c32c293e
Binary files /dev/null and b/models/collision.5.9-rgs.bdd differ
diff --git a/models/schedule_world.2.8-rgs.bdd b/models/schedule_world.2.8-rgs.bdd
new file mode 100644
index 000000000..7c5354aef
Binary files /dev/null and b/models/schedule_world.2.8-rgs.bdd differ
diff --git a/models/schedule_world.3.8-rgs.bdd b/models/schedule_world.3.8-rgs.bdd
new file mode 100644
index 000000000..a4e3e444d
Binary files /dev/null and b/models/schedule_world.3.8-rgs.bdd differ
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 000000000..50a645f22
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,81 @@
+cmake_minimum_required(VERSION 2.6)
+project(sylvan C CXX)
+
+add_library(sylvan
+    avl.h
+    lace.h
+    lace.c
+    llmsset.c
+    llmsset.h
+    refs.h
+    refs.c
+    sha2.h
+    sha2.c
+    stats.h
+    stats.c
+    sylvan.h
+    sylvan_bdd.h
+    sylvan_bdd.c
+    sylvan_cache.h
+    sylvan_cache.c
+    sylvan_config.h
+    sylvan_common.h
+    sylvan_common.c
+    sylvan_gmp.h
+    sylvan_gmp.c
+    sylvan_ldd.h
+    sylvan_ldd.c
+    sylvan_mtbdd.h
+    sylvan_mtbdd.c
+    sylvan_mtbdd_int.h
+    sylvan_obj.hpp
+    sylvan_obj.cpp
+    tls.h
+)
+
+# We need to make sure that the binary is put into a folder that is independent of the
+# build type. Otherwise -- for example when using Xcode -- the binary might end up in a
+# sub-folder "Debug" or "Release".
+set_target_properties(sylvan PROPERTIES
+                      ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_CURRENT_BINARY_DIR}
+                      ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_CURRENT_BINARY_DIR})
+
+target_link_libraries(sylvan m pthread)
+
+if(UNIX AND NOT APPLE)
+    target_link_libraries(sylvan rt)
+endif()
+
+option(USE_HWLOC "Use HWLOC library if available" ON)
+
+if(USE_HWLOC)
+    include(CheckIncludeFiles)
+    check_include_files(hwloc.h HAVE_HWLOC)
+    if(HAVE_HWLOC)
+        set_target_properties(sylvan PROPERTIES COMPILE_DEFINITIONS "USE_HWLOC=1")
+        target_link_libraries(sylvan hwloc)
+    endif()
+endif()
+
+option(SYLVAN_STATS "Collect statistics" OFF)
+if(SYLVAN_STATS)
+    set_target_properties(sylvan PROPERTIES COMPILE_DEFINITIONS "SYLVAN_STATS")
+endif()
+
+install(TARGETS
+    sylvan
+    DESTINATION "lib")
+
+install(FILES 
+    lace.h
+    llmsset.h
+    sylvan.h
+    sylvan_cache.h
+    sylvan_common.h
+    sylvan_config.h
+    sylvan_bdd.h
+    sylvan_ldd.h
+    sylvan_mtbdd.h
+    sylvan_obj.hpp
+    tls.h
+    DESTINATION "include")
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 000000000..5894c000f
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,39 @@
+lib_LTLIBRARIES = libsylvan.la
+
+libsylvan_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing -std=gnu11
+
+libsylvan_la_SOURCES = \
+    avl.h \
+    lace.c \
+    lace.h \
+    llmsset.c \
+    llmsset.h \
+    refs.h \
+    refs.c \
+    sha2.c \
+    sha2.h \
+    stats.h \
+    stats.c \
+    sylvan.h \
+    sylvan_config.h \
+    sylvan_bdd.h \
+    sylvan_bdd.c \
+    sylvan_ldd.h \
+    sylvan_ldd.c \
+    sylvan_cache.h \
+    sylvan_cache.c \
+    sylvan_common.c \
+    sylvan_common.h \
+    sylvan_mtbdd.h \
+    sylvan_mtbdd.c \
+    sylvan_mtbdd_int.h \
+    sylvan_obj.hpp \
+    sylvan_obj.cpp \
+    tls.h
+
+libsylvan_la_LIBADD = -lm
+
+if HAVE_LIBHWLOC
+libsylvan_la_LIBADD += -lhwloc
+libsylvan_la_CFLAGS += -DUSE_HWLOC=1
+endif
diff --git a/src/avl.h b/src/avl.h
new file mode 100644
index 000000000..68b4ea220
--- /dev/null
+++ b/src/avl.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Self-balancing binary tree implemented using C macros
+ *
+ * See also: http://en.wikipedia.org/wiki/AVL_tree
+ * Data structure originally by Adelson-Velskii, Landis, 1962.
+ *
+ * Usage of this AVL implementation:
+ *
+ * AVL(some_name, some_type)
+ * {
+ *    Compare some_type *left with some_type *right
+ *    Return <0 when left<right, >0 when left>right, 0 when left=right
+ * }
+ *
+ * You get:
+ *  - some_type *some_name_put(avl_node_t **root_node, some_type *data, int *inserted);
+ *    Either insert new or retrieve existing key, <inserted> if non-NULL receives 0 or 1.
+ *  - int some_name_insert(avl_node_t **root_node, some_type *data);
+ *    Try to insert, return 1 if succesful, 0 if existed.
+ *  - int some_name_delete(avl_node_t **root_node, some_type *data);
+ *    Try to delete, return 1 if deleted, 0 if no match.
+ *  - some_type *some_name_search(avl_node_t *root_node, some_type *data);
+ *    Retrieve existing data, returns NULL if unsuccesful
+ *  - void some_name_free(avl_node_t **root_node);
+ *    Free all memory used by the AVL tree
+ *  - some_type *some_name_toarray(avl_node_t *root_node);
+ *    Malloc an array and put the sorted data in it...
+ *  - size_t avl_count(avl_node_t *root_node);
+ *    Returns the number of items in the tree
+ *
+ * For example:
+ * struct my_struct { ... };
+ * AVL(some_name, struct my_struct)
+ * {
+ *    Compare struct my_struct *left with struct my_struct *right
+ *    Return <0 when left<right, >0 when left>right, 0 when left=right
+ * }
+ *
+ * avl_node_t *the_root = NULL;
+ * struct mystuff;
+ * if (!some_name_search(the_root, &mystuff)) some_name_insert(&the_root, &mystuff);
+ * some_name_free(&the_root);
+ *
+ * For questions, feedback, etc: t.vandijk@utwente.nl
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef __AVL_H__
+#define __AVL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct avl_node
+{
+    struct avl_node *left, *right;
+    unsigned int height;
+    char pad[8-sizeof(unsigned int)];
+    char data[0];
+} avl_node_t;
+
+/* Retrieve the height of a tree */
+static inline int
+avl_get_height(avl_node_t *node)
+{
+    return node == NULL ? 0 : node->height;
+}
+
+/* Helper for rotations to update the heights of trees */
+static inline void
+avl_update_height(avl_node_t *node)
+{
+    int h1 = avl_get_height(node->left);
+    int h2 = avl_get_height(node->right);
+    node->height = 1 + (h1 > h2 ? h1 : h2);
+}
+
+/* Helper for avl_balance_tree */
+static inline int
+avl_update_height_get_balance(avl_node_t *node)
+{
+    int h1 = avl_get_height(node->left);
+    int h2 = avl_get_height(node->right);
+    node->height = 1 + (h1 > h2 ? h1 : h2);
+    return h1 - h2;
+}
+
+/* Helper for avl_check_consistent */
+static inline int
+avl_verify_height(avl_node_t *node)
+{
+    int h1 = avl_get_height(node->left);
+    int h2 = avl_get_height(node->right);
+    int expected_height = 1 + (h1 > h2 ? h1 : h2);
+    return expected_height == avl_get_height(node);
+}
+
+/* Optional consistency check */
+static inline int __attribute__((unused))
+avl_check_consistent(avl_node_t *root)
+{
+    if (root == NULL) return 1;
+    if (!avl_check_consistent(root->left)) return 0;
+    if (!avl_check_consistent(root->right)) return 0;
+    if (!avl_verify_height(root)) return 0;
+    return 1;
+}
+
+/* Perform LL rotation, returns the new root */
+static avl_node_t*
+avl_rotate_LL(avl_node_t *parent)
+{
+    avl_node_t *child = parent->left;
+    parent->left = child->right;
+    child->right = parent;
+    avl_update_height(parent);
+    avl_update_height(child);
+    return child;
+}
+
+/* Perform RR rotation, returns the new root */
+static avl_node_t*
+avl_rotate_RR(avl_node_t *parent)
+{
+    avl_node_t *child = parent->right;
+    parent->right = child->left;
+    child->left = parent;
+    avl_update_height(parent);
+    avl_update_height(child);
+    return child;
+}
+
+/* Perform RL rotation, returns the new root */
+static avl_node_t*
+avl_rotate_RL(avl_node_t *parent)
+{
+    avl_node_t *child = parent->right;
+    parent->right = avl_rotate_LL(child);
+    return avl_rotate_RR(parent);
+}
+
+/* Perform LR rotation, returns the new root */
+static avl_node_t*
+avl_rotate_LR(avl_node_t *parent)
+{
+    avl_node_t *child = parent->left;
+    parent->left = avl_rotate_RR(child);
+    return avl_rotate_LL(parent);
+}
+
+/* Calculate balance factor */
+static inline int
+avl_get_balance(avl_node_t *node)
+{
+    if (node == NULL) return 0;
+    return avl_get_height(node->left) - avl_get_height(node->right);
+}
+
+/* Balance the tree */
+static void
+avl_balance_tree(avl_node_t **node)
+{
+    int factor = avl_update_height_get_balance(*node);
+
+    if (factor > 1) {
+        if (avl_get_balance((*node)->left) > 0) *node = avl_rotate_LL(*node);
+        else *node = avl_rotate_LR(*node);
+    } else if (factor < -1) {
+        if (avl_get_balance((*node)->right) < 0) *node = avl_rotate_RR(*node);
+        else *node = avl_rotate_RL(*node);
+    }
+}
+
+/* Get number of items in the AVL */
+static size_t
+avl_count(avl_node_t *node)
+{
+    if (node == NULL) return 0;
+    return 1 + avl_count(node->left) + avl_count(node->right);
+}
+
+/* Structure for iterator */
+typedef struct avl_iter
+{
+    size_t height;
+    avl_node_t *nodes[0];
+} avl_iter_t;
+
+/**
+ * nodes[0] = root node
+ * nodes[1] = some node
+ * nodes[2] = some node
+ * nodes[3] = leaf node (height = 4)
+ * nodes[4] = NULL (max = height + 1)
+ */
+
+/* Create a new iterator */
+static inline avl_iter_t*
+avl_iter(avl_node_t *node)
+{
+    size_t max = node ? node->height+1 : 1;
+    avl_iter_t *result = (avl_iter_t*)malloc(sizeof(avl_iter_t) + sizeof(avl_node_t*) * max);
+    result->height = 0;
+    result->nodes[0] = node;
+    return result;
+}
+
+/* Get the next node during iteration */
+static inline avl_node_t*
+avl_iter_next(avl_iter_t *iter)
+{
+    /* when first node is NULL, we're done */
+    if (iter->nodes[0] == NULL) return NULL;
+
+    /* if the head is not NULL, first entry... */
+    while (iter->nodes[iter->height] != NULL) {
+        iter->nodes[iter->height+1] = iter->nodes[iter->height]->left;
+        iter->height++;
+    }
+
+    /* head is now NULL, take parent as result */
+    avl_node_t *result = iter->nodes[iter->height-1];
+
+    if (result->right != NULL) {
+        /* if we can go right, do that */
+        iter->nodes[iter->height] = result->right;
+    } else {
+        /* cannot go right, backtrack */
+        do {
+            iter->height--;
+        } while (iter->height > 0 && iter->nodes[iter->height] == iter->nodes[iter->height-1]->right);
+        iter->nodes[iter->height] = NULL; /* set head to NULL: second entry */
+    }
+
+    return result;
+}
+
+#define AVL(NAME, TYPE)                                                                     \
+static inline int                                                                           \
+NAME##_AVL_compare(TYPE *left, TYPE *right);                                                \
+static __attribute__((unused)) TYPE*                                                        \
+NAME##_put(avl_node_t **root, TYPE *data, int *inserted)                                    \
+{                                                                                           \
+    if (inserted && *inserted) *inserted = 0; /* reset inserted once */                     \
+    TYPE *result;                                                                           \
+    avl_node_t *it = *root;                                                                 \
+    if (it == NULL) {                                                                       \
+        *root = it = (avl_node_t*)malloc(sizeof(struct avl_node)+sizeof(TYPE));             \
+        it->left = it->right = NULL;                                                        \
+        it->height = 1;                                                                     \
+        memcpy(it->data, data, sizeof(TYPE));                                               \
+        result = (TYPE *)it->data;                                                          \
+        if (inserted) *inserted = 1;                                                        \
+    } else {                                                                                \
+        int cmp = NAME##_AVL_compare(data, (TYPE*)(it->data));                              \
+        if (cmp == 0) return (TYPE *)it->data;                                              \
+        if (cmp < 0) result = NAME##_put(&it->left, data, inserted);                        \
+        else result = NAME##_put(&it->right, data, inserted);                               \
+        avl_balance_tree(root);                                                             \
+    }                                                                                       \
+    return result;                                                                          \
+}                                                                                           \
+static __attribute__((unused)) int                                                          \
+NAME##_insert(avl_node_t **root, TYPE *data)                                                \
+{                                                                                           \
+    int inserted;                                                                           \
+    NAME##_put(root, data, &inserted);                                                      \
+    return inserted;                                                                        \
+}                                                                                           \
+static void                                                                                 \
+NAME##_exchange_and_balance(avl_node_t *target, avl_node_t **node)                          \
+{                                                                                           \
+    avl_node_t *it = *node;                                                                 \
+    if (it->left == 0) { /* leftmost node contains lowest value */                          \
+        memcpy(target->data, it->data, sizeof(TYPE));                                       \
+        *node = it->right;                                                                  \
+        free(it);                                                                           \
+    } else {                                                                                \
+        NAME##_exchange_and_balance(target, &it->left);                                     \
+    }                                                                                       \
+    avl_balance_tree(node);                                                                 \
+}                                                                                           \
+static __attribute__((unused)) int                                                          \
+NAME##_delete(avl_node_t **node, TYPE *data)                                                \
+{                                                                                           \
+    avl_node_t *it = *node;                                                                 \
+    if (it == NULL) return 0;                                                               \
+    int cmp = NAME##_AVL_compare(data, (TYPE *)((*node)->data)), res;                       \
+    if (cmp < 0) res = NAME##_delete(&it->left, data);                                      \
+    else if (cmp > 0) res = NAME##_delete(&it->right, data);                                \
+    else {                                                                                  \
+        int h_left = avl_get_height(it->left);                                              \
+        int h_right = avl_get_height(it->right);                                            \
+        if (h_left == 0) {                                                                  \
+            if (h_right == 0) { /* Leaf */                                                  \
+                *node = NULL;                                                               \
+                free(it);                                                                   \
+                return 1;                                                                   \
+            } else { /* Only right child */                                                 \
+                *node = it->right;                                                          \
+                free(it);                                                                   \
+                return 1;                                                                   \
+            }                                                                               \
+        } else if (h_right == 0) { /* Only left child */                                    \
+            *node = it->left;                                                               \
+            free(it);                                                                       \
+            return 1;                                                                       \
+        } else { /* Exchange with successor */                                              \
+            NAME##_exchange_and_balance(it, &it->right);                                    \
+            res = 1;                                                                        \
+        }                                                                                   \
+    }                                                                                       \
+    if (res) avl_balance_tree(node);                                                        \
+    return res;                                                                             \
+}                                                                                           \
+static __attribute__((unused)) TYPE*                                                        \
+NAME##_search(avl_node_t *node, TYPE *data)                                                 \
+{                                                                                           \
+    while (node != NULL) {                                                                  \
+        int result = NAME##_AVL_compare((TYPE *)node->data, data);                          \
+        if (result == 0) return (TYPE *)node->data;                                         \
+        if (result > 0) node = node->left;                                                  \
+        else node = node->right;                                                            \
+    }                                                                                       \
+    return NULL;                                                                            \
+}                                                                                           \
+static __attribute__((unused)) void                                                         \
+NAME##_free(avl_node_t **node)                                                              \
+{                                                                                           \
+    avl_node_t *it = *node;                                                                 \
+    if (it) {                                                                               \
+        NAME##_free(&it->left);                                                             \
+        NAME##_free(&it->right);                                                            \
+        free(it);                                                                           \
+        *node = NULL;                                                                       \
+    }                                                                                       \
+}                                                                                           \
+static void                                                                                 \
+NAME##_toarray_rec(avl_node_t *node, TYPE **ptr)                                            \
+{                                                                                           \
+    if (node->left != NULL) NAME##_toarray_rec(node->left, ptr);                            \
+    memcpy(*ptr, node->data, sizeof(TYPE));                                                 \
+    (*ptr)++;                                                                               \
+    if (node->right != NULL) NAME##_toarray_rec(node->right, ptr);                          \
+}                                                                                           \
+static __attribute__((unused)) TYPE*                                                        \
+NAME##_toarray(avl_node_t *node)                                                            \
+{                                                                                           \
+    size_t count = avl_count(node);                                                         \
+    TYPE *arr = (TYPE *)malloc(sizeof(TYPE) * count);                                       \
+    TYPE *ptr = arr;                                                                        \
+    NAME##_toarray_rec(node, &ptr);                                                         \
+    return arr;                                                                             \
+}                                                                                           \
+static __attribute__((unused)) avl_iter_t*                                                  \
+NAME##_iter(avl_node_t *node)                                                               \
+{                                                                                           \
+    return avl_iter(node);                                                                  \
+}                                                                                           \
+static __attribute__((unused)) TYPE*                                                        \
+NAME##_iter_next(avl_iter_t *iter)                                                          \
+{                                                                                           \
+    avl_node_t *result = avl_iter_next(iter);                                               \
+    if (result == NULL) return NULL;                                                        \
+    return (TYPE*)(result->data);                                                           \
+}                                                                                           \
+static __attribute__((unused)) void                                                         \
+NAME##_iter_free(avl_iter_t *iter)                                                          \
+{                                                                                           \
+    free(iter);                                                                             \
+}                                                                                           \
+static inline int                                                                           \
+NAME##_AVL_compare(TYPE *left, TYPE *right)
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/lace.c b/src/lace.c
new file mode 100644
index 000000000..9a0b3cb82
--- /dev/null
+++ b/src/lace.c
@@ -0,0 +1,1045 @@
+/*
+ * Copyright 2013-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h> // for errno
+#include <sched.h> // for sched_getaffinity
+#include <stdio.h>  // for fprintf
+#include <stdlib.h> // for memalign, malloc
+#include <string.h> // for memset
+#include <sys/mman.h> // for mprotect
+#include <sys/time.h> // for gettimeofday
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <lace.h>
+
+#ifndef USE_HWLOC
+#define USE_HWLOC 0
+#endif
+
+#if USE_HWLOC
+#include <hwloc.h>
+#endif
+
+// public Worker data
+static Worker **workers;
+static size_t default_stacksize = 0; // set by lace_init
+static size_t default_dqsize = 100000;
+
+#if USE_HWLOC
+static hwloc_topology_t topo;
+static unsigned int n_nodes, n_cores, n_pus;
+#endif
+
+static int verbosity = 0;
+
+static int n_workers = 0;
+static int enabled_workers = 0;
+
+// private Worker data (just for stats at end )
+static WorkerP **workers_p;
+
+// set to 0 when quitting
+static int lace_quits = 0;
+
+// for storing private Worker data
+#ifdef __linux__ // use gcc thread-local storage (i.e. __thread variables)
+static __thread WorkerP *current_worker;
+#else
+static pthread_key_t worker_key;
+#endif
+static pthread_attr_t worker_attr;
+
+static pthread_cond_t wait_until_done = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t wait_until_done_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+struct lace_worker_init
+{
+    void* stack;
+    size_t stacksize;
+};
+
+static struct lace_worker_init *workers_init;
+
+lace_newframe_t lace_newframe;
+
+WorkerP*
+lace_get_worker()
+{
+#ifdef __linux__
+    return current_worker;
+#else
+    return (WorkerP*)pthread_getspecific(worker_key);
+#endif
+}
+
+Task*
+lace_get_head(WorkerP *self)
+{
+    Task *dq = self->dq;
+    if (dq[0].thief == 0) return dq;
+    if (dq[1].thief == 0) return dq+1;
+    if (dq[2].thief == 0) return dq+2;
+
+    size_t low = 2;
+    size_t high = self->end - self->dq;
+
+    for (;;) {
+        if (low*2 >= high) {
+            break;
+        } else if (dq[low*2].thief == 0) {
+            high=low*2;
+            break;
+        } else {
+            low*=2;
+        }
+    }
+
+    while (low < high) {
+        size_t mid = low + (high-low)/2;
+        if (dq[mid].thief == 0) high = mid;
+        else low = mid + 1;
+    }
+
+    return dq+low;
+}
+
+size_t
+lace_workers()
+{
+    return n_workers;
+}
+
+size_t
+lace_default_stacksize()
+{
+    return default_stacksize;
+}
+
+#ifndef cas
+#define cas(ptr, old, new) (__sync_bool_compare_and_swap((ptr),(old),(new)))
+#endif
+
+#if LACE_PIE_TIMES
+static uint64_t count_at_start, count_at_end;
+static long long unsigned us_elapsed_timer;
+
+static void
+us_elapsed_start(void)
+{
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    us_elapsed_timer = now.tv_sec * 1000000LL + now.tv_usec;
+}
+
+static long long unsigned
+us_elapsed(void)
+{
+    struct timeval now;
+    long long unsigned t;
+
+    gettimeofday( &now, NULL );
+
+    t = now.tv_sec * 1000000LL + now.tv_usec;
+
+    return t - us_elapsed_timer;
+}
+#endif
+
+#if USE_HWLOC
+// Lock used only during parallel lace_init_worker...
+static volatile int __attribute__((aligned(64))) lock = 0;
+static inline void
+lock_acquire()
+{
+    while (1) {
+        while (lock) {}
+        if (cas(&lock, 0, 1)) return;
+    }
+}
+static inline void
+lock_release()
+{
+    lock=0;
+}
+#endif
+
+/* Barrier */
+#define BARRIER_MAX_THREADS 128
+
+typedef union __attribute__((__packed__))
+{
+    volatile size_t val;
+    char            pad[LINE_SIZE];
+} asize_t;
+
+typedef struct {
+    volatile int __attribute__((aligned(LINE_SIZE))) count;
+    volatile int __attribute__((aligned(LINE_SIZE))) wait;
+    /* the following is needed only for destroy: */
+    asize_t             entered[BARRIER_MAX_THREADS];
+} barrier_t;
+
+barrier_t lace_bar;
+
+void
+lace_barrier()
+{
+    int id = lace_get_worker()->worker;
+
+    lace_bar.entered[id].val = 1; // signal entry
+
+    int wait = lace_bar.wait;
+    if (enabled_workers == __sync_add_and_fetch(&lace_bar.count, 1)) {
+        lace_bar.count = 0; // reset counter
+        lace_bar.wait = 1 - wait; // flip wait
+        lace_bar.entered[id].val = 0; // signal exit
+    } else {
+        while (wait == lace_bar.wait) {} // wait
+        lace_bar.entered[id].val = 0; // signal exit
+    }
+}
+
+static void
+lace_barrier_init()
+{
+    assert(n_workers <= BARRIER_MAX_THREADS);
+    memset(&lace_bar, 0, sizeof(barrier_t));
+}
+
+static void
+lace_barrier_destroy()
+{
+    // wait for all to exit
+    for (int i=0; i<n_workers; i++) {
+        while (1 == lace_bar.entered[i].val) {}
+    }
+}
+
+void
+lace_init_worker(int worker, size_t dq_size)
+{
+    Worker *wt = NULL;
+    WorkerP *w = NULL;
+
+    if (dq_size == 0) dq_size = default_dqsize;
+
+#if USE_HWLOC
+    // Get our logical processor
+    hwloc_obj_t pu = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, worker % n_pus);
+
+    // Pin our thread...
+    hwloc_set_cpubind(topo, pu->cpuset, HWLOC_CPUBIND_THREAD);
+
+    // Allocate memory on our node...
+    lock_acquire();
+    wt = (Worker *)hwloc_alloc_membind(topo, sizeof(Worker), pu->cpuset, HWLOC_MEMBIND_BIND, 0);
+    w = (WorkerP *)hwloc_alloc_membind(topo, sizeof(WorkerP), pu->cpuset, HWLOC_MEMBIND_BIND, 0);
+    if (wt == NULL || w == NULL || (w->dq = (Task*)hwloc_alloc_membind(topo, dq_size * sizeof(Task), pu->cpuset, HWLOC_MEMBIND_BIND, 0)) == NULL) {
+        fprintf(stderr, "Lace error: Unable to allocate memory for the Lace worker!\n");
+        exit(1);
+    }
+    lock_release();
+#else
+    // Allocate memory...
+    if (posix_memalign((void**)&wt, LINE_SIZE, sizeof(Worker)) ||
+        posix_memalign((void**)&w, LINE_SIZE, sizeof(WorkerP)) || 
+        posix_memalign((void**)&w->dq, LINE_SIZE, dq_size * sizeof(Task))) {
+            fprintf(stderr, "Lace error: Unable to allocate memory for the Lace worker!\n");
+            exit(1);
+    }
+#endif
+
+    // Initialize public worker data
+    wt->dq = w->dq;
+    wt->ts.v = 0;
+    wt->allstolen = 0;
+    wt->movesplit = 0;
+
+    // Initialize private worker data
+    w->_public = wt;
+    w->end = w->dq + dq_size;
+    w->split = w->dq;
+    w->allstolen = 0;
+    w->worker = worker;
+#if USE_HWLOC
+    w->pu = worker % n_pus;
+#else
+    w->pu = -1;
+#endif
+    w->enabled = 1;
+    if (workers_init[worker].stack != 0) {
+        w->stack_trigger = ((size_t)workers_init[worker].stack) + workers_init[worker].stacksize/20;
+    } else {
+        w->stack_trigger = 0;
+    }
+
+#if LACE_COUNT_EVENTS
+    // Reset counters
+    { int k; for (k=0; k<CTR_MAX; k++) w->ctr[k] = 0; }
+#endif
+
+    // Set pointers
+#ifdef __linux__
+    current_worker = w;
+#else
+    pthread_setspecific(worker_key, w);
+#endif
+    workers[worker] = wt;
+    workers_p[worker] = w;
+
+    // Synchronize with others
+    lace_barrier();
+
+#if LACE_PIE_TIMES
+    w->time = gethrtime();
+    w->level = 0;
+#endif
+}
+
+#if defined(__APPLE__) && !defined(pthread_barrier_t)
+
+typedef int pthread_barrierattr_t;
+typedef struct
+{
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+    int count;
+    int tripCount;
+} pthread_barrier_t;
+
+static int
+pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count)
+{
+    if(count == 0)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if(pthread_mutex_init(&barrier->mutex, 0) < 0)
+    {
+        return -1;
+    }
+    if(pthread_cond_init(&barrier->cond, 0) < 0)
+    {
+        pthread_mutex_destroy(&barrier->mutex);
+        return -1;
+    }
+    barrier->tripCount = count;
+    barrier->count = 0;
+
+    return 0;
+    (void)attr;
+}
+
+static int
+pthread_barrier_destroy(pthread_barrier_t *barrier)
+{
+    pthread_cond_destroy(&barrier->cond);
+    pthread_mutex_destroy(&barrier->mutex);
+    return 0;
+}
+
+static int
+pthread_barrier_wait(pthread_barrier_t *barrier)
+{
+    pthread_mutex_lock(&barrier->mutex);
+    ++(barrier->count);
+    if(barrier->count >= barrier->tripCount)
+    {
+        barrier->count = 0;
+        pthread_cond_broadcast(&barrier->cond);
+        pthread_mutex_unlock(&barrier->mutex);
+        return 1;
+    }
+    else
+    {
+        pthread_cond_wait(&barrier->cond, &(barrier->mutex));
+        pthread_mutex_unlock(&barrier->mutex);
+        return 0;
+    }
+}
+
+#endif // defined(__APPLE__) && !defined(pthread_barrier_t)
+
+static pthread_barrier_t suspend_barrier;
+static volatile int must_suspend = 0, suspended = 0;
+
+void
+lace_suspend()
+{
+    if (suspended == 0) {
+        suspended = 1;
+        must_suspend = 1;
+        lace_barrier();
+        must_suspend = 0;
+    }
+}
+
+void
+lace_resume()
+{
+    if (suspended == 1) {
+        suspended = 0;
+        pthread_barrier_wait(&suspend_barrier);
+    }
+}
+
+/**
+ * With set_workers, all workers 0..(N-1) are enabled and N..max are disabled.
+ * You can never disable the current worker or reduce the number of workers below 1.
+ */
+void
+lace_disable_worker(int worker)
+{
+    int self = lace_get_worker()->worker;
+    if (worker == self) return;
+    if (workers_p[worker]->enabled == 1) {
+        workers_p[worker]->enabled = 0;
+        enabled_workers--;
+    }
+}
+
+void
+lace_enable_worker(int worker)
+{
+    int self = lace_get_worker()->worker;
+    if (worker == self) return;
+    if (workers_p[worker]->enabled == 0) {
+        workers_p[worker]->enabled = 1;
+        enabled_workers++;
+    }
+}
+
+void
+lace_set_workers(int workercount)
+{
+    if (workercount < 1) workercount = 1;
+    if (workercount > n_workers) workercount = n_workers;
+    enabled_workers = workercount;
+    int self = lace_get_worker()->worker;
+    if (self >= workercount) workercount--;
+    for (int i=0; i<n_workers; i++) {
+        workers_p[i]->enabled = (i < workercount || i == self) ? 1 : 0;
+    }
+}
+
+int
+lace_enabled_workers()
+{
+    return enabled_workers;
+}
+
+static inline uint32_t
+rng(uint32_t *seed, int max)
+{
+    uint32_t next = *seed;
+
+    next *= 1103515245;
+    next += 12345;
+
+    *seed = next;
+
+    return next % max;
+}
+
+VOID_TASK_IMPL_0(lace_steal_random)
+{
+    Worker *victim = workers[(__lace_worker->worker + 1 + rng(&__lace_worker->seed, n_workers-1)) % n_workers];
+
+    YIELD_NEWFRAME();
+
+    PR_COUNTSTEALS(__lace_worker, CTR_steal_tries);
+    Worker *res = lace_steal(__lace_worker, __lace_dq_head, victim);
+    if (res == LACE_STOLEN) {
+        PR_COUNTSTEALS(__lace_worker, CTR_steals);
+    } else if (res == LACE_BUSY) {
+        PR_COUNTSTEALS(__lace_worker, CTR_steal_busy);
+    }
+}
+
+VOID_TASK_IMPL_1(lace_steal_random_loop, int*, quit)
+{
+    while(!(*(volatile int*)quit)) {
+        lace_steal_random();
+
+        if (must_suspend) {
+            lace_barrier();
+            do {
+                pthread_barrier_wait(&suspend_barrier);
+            } while (__lace_worker->enabled == 0);
+        }
+    }
+}
+
+static lace_startup_cb main_cb;
+
+static void*
+lace_main_wrapper(void *arg)
+{
+    lace_init_worker(0, 0);
+    WorkerP *self = lace_get_worker();
+
+#if LACE_PIE_TIMES
+    self->time = gethrtime();
+#endif
+
+    lace_time_event(self, 1);
+    main_cb(self, self->dq, arg);
+    lace_exit();
+    pthread_cond_broadcast(&wait_until_done);
+
+    return NULL;
+}
+
+VOID_TASK_IMPL_1(lace_steal_loop, int*, quit)
+{
+    // Determine who I am
+    const int worker_id = __lace_worker->worker;
+
+    // Prepare self, victim
+    Worker ** const self = &workers[worker_id];
+    Worker **victim = self;
+
+#if LACE_PIE_TIMES
+    __lace_worker->time = gethrtime();
+#endif
+
+    uint32_t seed = worker_id;
+    unsigned int n = n_workers;
+    int i=0;
+
+    while(*(volatile int*)quit == 0) {
+        // Select victim
+        if( i>0 ) {
+            i--;
+            victim++;
+            if (victim == self) victim++;
+            if (victim >= workers + n) victim = workers;
+            if (victim == self) victim++;
+        } else {
+            i = rng(&seed, 40); // compute random i 0..40
+            victim = workers + (rng(&seed, n-1) + worker_id + 1) % n;
+        }
+
+        PR_COUNTSTEALS(__lace_worker, CTR_steal_tries);
+        Worker *res = lace_steal(__lace_worker, __lace_dq_head, *victim);
+        if (res == LACE_STOLEN) {
+            PR_COUNTSTEALS(__lace_worker, CTR_steals);
+        } else if (res == LACE_BUSY) {
+            PR_COUNTSTEALS(__lace_worker, CTR_steal_busy);
+        }
+
+        YIELD_NEWFRAME();
+
+        if (must_suspend) {
+            lace_barrier();
+            do {
+                pthread_barrier_wait(&suspend_barrier);
+            } while (__lace_worker->enabled == 0);
+        }
+    }
+}
+
+static void*
+lace_default_worker(void* arg)
+{
+    lace_init_worker((size_t)arg, 0);
+    WorkerP *__lace_worker = lace_get_worker();
+    Task *__lace_dq_head = __lace_worker->dq;
+    lace_steal_loop(&lace_quits);
+    lace_time_event(__lace_worker, 9);
+    lace_barrier();
+    return NULL;
+}
+
+pthread_t
+lace_spawn_worker(int worker, size_t stacksize, void* (*fun)(void*), void* arg)
+{
+    // Determine stack size
+    if (stacksize == 0) stacksize = default_stacksize;
+
+    size_t pagesize = sysconf(_SC_PAGESIZE);
+    stacksize = (stacksize + pagesize - 1) & ~(pagesize - 1); // ceil(stacksize, pagesize)
+
+#if USE_HWLOC
+    // Get our logical processor
+    hwloc_obj_t pu = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, worker % n_pus);
+
+    // Allocate memory for the program stack
+    lock_acquire();
+    void *stack_location = hwloc_alloc_membind(topo, stacksize + pagesize, pu->cpuset, HWLOC_MEMBIND_BIND, 0);
+    lock_release();
+    if (stack_location == 0) {
+        fprintf(stderr, "Lace error: Unable to allocate memory for the pthread stack!\n");
+        exit(1);
+    }
+#else
+    void *stack_location = mmap(NULL, stacksize + pagesize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+    if (stack_location == MAP_FAILED) {
+        fprintf(stderr, "Lace error: Cannot allocate program stack: %s!\n", strerror(errno));
+        exit(1);
+    }
+#endif
+
+    if (0 != mprotect(stack_location, pagesize, PROT_NONE)) {
+        fprintf(stderr, "Lace error: Unable to protect the allocated program stack with a guard page!\n");
+        exit(1);
+    }
+    stack_location = (uint8_t *)stack_location + pagesize; // skip protected page.
+    if (0 != pthread_attr_setstack(&worker_attr, stack_location, stacksize)) {
+        fprintf(stderr, "Lace error: Unable to set the pthread stack in Lace!\n");
+        exit(1);
+    }
+
+    workers_init[worker].stack = stack_location;
+    workers_init[worker].stacksize = stacksize;
+
+    if (fun == 0) {
+        fun = lace_default_worker;
+        arg = (void*)(size_t)worker;
+    }
+
+    pthread_t res;
+    pthread_create(&res, &worker_attr, fun, arg);
+    return res;
+}
+
+static int
+get_cpu_count()
+{
+#if USE_HWLOC
+    int count = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
+#elif defined(sched_getaffinity)
+    /* Best solution: find actual available cpus */
+    cpu_set_t cs;
+    CPU_ZERO(&cs);
+    sched_getaffinity(0, sizeof(cs), &cs);
+    int count = CPU_COUNT(&cs);
+#elif defined(_SC_NPROCESSORS_ONLN)
+    /* Fallback */
+    int count = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+    /* Okay... */
+    int count = 1;
+#endif
+    return count < 1 ? 1 : count;
+}
+
+void
+lace_set_verbosity(int level)
+{
+    verbosity = level;
+}
+
+void
+lace_init(int n, size_t dqsize)
+{
+#if USE_HWLOC
+    hwloc_topology_init(&topo);
+    hwloc_topology_load(topo);
+
+    n_nodes = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE);
+    n_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
+    n_pus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
+#endif
+
+    // Initialize globals
+    n_workers = n;
+    if (n_workers == 0) n_workers = get_cpu_count();
+    enabled_workers = n_workers;
+    if (dqsize != 0) default_dqsize = dqsize;
+    lace_quits = 0;
+
+    // Create barrier for all workers
+    lace_barrier_init();
+
+    // Create suspend barrier
+    pthread_barrier_init(&suspend_barrier, NULL, n_workers);
+
+    // Allocate array with all workers
+    if (posix_memalign((void**)&workers, LINE_SIZE, n_workers*sizeof(Worker*)) != 0 ||
+        posix_memalign((void**)&workers_p, LINE_SIZE, n_workers*sizeof(WorkerP*)) != 0) {
+        fprintf(stderr, "Lace error: unable to allocate memory!\n");
+        exit(1);
+    }
+
+    // Create pthread key
+#ifndef __linux__
+    pthread_key_create(&worker_key, NULL);
+#endif
+
+    // Prepare structures for thread creation
+    pthread_attr_init(&worker_attr);
+
+    // Set contention scope to system (instead of process)
+    pthread_attr_setscope(&worker_attr, PTHREAD_SCOPE_SYSTEM);
+
+    // Get default stack size
+    if (pthread_attr_getstacksize(&worker_attr, &default_stacksize) != 0) {
+        fprintf(stderr, "Lace warning: pthread_attr_getstacksize returned error!\n");
+        default_stacksize = 1048576; // 1 megabyte default
+    }
+
+    if (verbosity) {
+#if USE_HWLOC
+        fprintf(stderr, "Initializing Lace, %u nodes, %u cores, %u logical processors, %d workers.\n", n_nodes, n_cores, n_pus, n_workers);
+#else
+        fprintf(stderr, "Initializing Lace, %d workers.\n", n_workers);
+#endif
+    }
+
+    // Prepare lace_init structure
+    workers_init = (struct lace_worker_init*)calloc(1, sizeof(struct lace_worker_init) * n_workers);
+
+    lace_newframe.t = NULL;
+
+#if LACE_PIE_TIMES
+    // Initialize counters for pie times
+    us_elapsed_start();
+    count_at_start = gethrtime();
+#endif
+}
+
+void
+lace_startup(size_t stacksize, lace_startup_cb cb, void *arg)
+{
+    if (stacksize == 0) stacksize = default_stacksize;
+
+    if (verbosity) {
+        if (cb != 0) {
+            fprintf(stderr, "Lace startup, creating %d worker threads with program stack %zu bytes.\n", n_workers, stacksize);
+        } else if (n_workers == 1) {
+            fprintf(stderr, "Lace startup, creating 0 worker threads.\n");
+        } else {
+            fprintf(stderr, "Lace startup, creating %d worker threads with program stack %zu bytes.\n", n_workers-1, stacksize);
+        }
+    }
+
+    /* Spawn workers */
+    int i;
+    for (i=1; i<n_workers; i++) lace_spawn_worker(i, stacksize, 0, 0);
+
+    if (cb != 0) {
+        main_cb = cb;
+        lace_spawn_worker(0, stacksize, lace_main_wrapper, arg);
+
+        // Suspend this thread until cb returns
+        pthread_mutex_lock(&wait_until_done_mutex);
+        pthread_cond_wait(&wait_until_done, &wait_until_done_mutex);
+        pthread_mutex_unlock(&wait_until_done_mutex);
+    } else {
+        // use this thread as worker and return control
+        lace_init_worker(0, 0);
+        lace_time_event(lace_get_worker(), 1);
+    }
+}
+
+#if LACE_COUNT_EVENTS
+static uint64_t ctr_all[CTR_MAX];
+#endif
+
+void
+lace_count_reset()
+{
+#if LACE_COUNT_EVENTS
+    int i;
+    size_t j;
+
+    for (i=0;i<n_workers;i++) {
+        for (j=0;j<CTR_MAX;j++) {
+            workers_p[i]->ctr[j] = 0;
+        }
+    }
+
+#if LACE_PIE_TIMES
+    for (i=0;i<n_workers;i++) {
+        workers_p[i]->time = gethrtime();
+        if (i != 0) workers_p[i]->level = 0;
+    }
+
+    us_elapsed_start();
+    count_at_start = gethrtime();
+#endif
+#endif
+}
+
+void
+lace_count_report_file(FILE *file)
+{
+#if LACE_COUNT_EVENTS
+    int i;
+    size_t j;
+
+    for (j=0;j<CTR_MAX;j++) ctr_all[j] = 0;
+    for (i=0;i<n_workers;i++) {
+        uint64_t *wctr = workers_p[i]->ctr;
+        for (j=0;j<CTR_MAX;j++) {
+            ctr_all[j] += wctr[j];
+        }
+    }
+
+#if LACE_COUNT_TASKS
+    for (i=0;i<n_workers;i++) {
+        fprintf(file, "Tasks (%d): %zu\n", i, workers_p[i]->ctr[CTR_tasks]);
+    }
+    fprintf(file, "Tasks (sum): %zu\n", ctr_all[CTR_tasks]);
+    fprintf(file, "\n");
+#endif
+
+#if LACE_COUNT_STEALS
+    for (i=0;i<n_workers;i++) {
+        fprintf(file, "Steals (%d): %zu good/%zu busy of %zu tries; leaps: %zu good/%zu busy of %zu tries\n", i,
+            workers_p[i]->ctr[CTR_steals], workers_p[i]->ctr[CTR_steal_busy],
+            workers_p[i]->ctr[CTR_steal_tries], workers_p[i]->ctr[CTR_leaps],
+            workers_p[i]->ctr[CTR_leap_busy], workers_p[i]->ctr[CTR_leap_tries]);
+    }
+    fprintf(file, "Steals (sum): %zu good/%zu busy of %zu tries; leaps: %zu good/%zu busy of %zu tries\n", 
+        ctr_all[CTR_steals], ctr_all[CTR_steal_busy],
+        ctr_all[CTR_steal_tries], ctr_all[CTR_leaps],
+        ctr_all[CTR_leap_busy], ctr_all[CTR_leap_tries]);
+    fprintf(file, "\n");
+#endif
+
+#if LACE_COUNT_STEALS && LACE_COUNT_TASKS
+    for (i=0;i<n_workers;i++) {
+        fprintf(file, "Tasks per steal (%d): %zu\n", i,
+            workers_p[i]->ctr[CTR_tasks]/(workers_p[i]->ctr[CTR_steals]+workers_p[i]->ctr[CTR_leaps]));
+    }
+    fprintf(file, "Tasks per steal (sum): %zu\n", ctr_all[CTR_tasks]/(ctr_all[CTR_steals]+ctr_all[CTR_leaps]));
+    fprintf(file, "\n");
+#endif
+
+#if LACE_COUNT_SPLITS
+    for (i=0;i<n_workers;i++) {
+        fprintf(file, "Splits (%d): %zu shrinks, %zu grows, %zu outgoing requests\n", i,
+            workers_p[i]->ctr[CTR_split_shrink], workers_p[i]->ctr[CTR_split_grow], workers_p[i]->ctr[CTR_split_req]);
+    }
+    fprintf(file, "Splits (sum): %zu shrinks, %zu grows, %zu outgoing requests\n",
+        ctr_all[CTR_split_shrink], ctr_all[CTR_split_grow], ctr_all[CTR_split_req]);
+    fprintf(file, "\n");
+#endif
+
+#if LACE_PIE_TIMES
+    count_at_end = gethrtime();
+
+    uint64_t count_per_ms = (count_at_end - count_at_start) / (us_elapsed() / 1000);
+    double dcpm = (double)count_per_ms;
+
+    uint64_t sum_count;
+    sum_count = ctr_all[CTR_init] + ctr_all[CTR_wapp] + ctr_all[CTR_lapp] + ctr_all[CTR_wsteal] + ctr_all[CTR_lsteal]
+              + ctr_all[CTR_close] + ctr_all[CTR_wstealsucc] + ctr_all[CTR_lstealsucc] + ctr_all[CTR_wsignal]
+              + ctr_all[CTR_lsignal];
+
+    fprintf(file, "Measured clock (tick) frequency: %.2f GHz\n", count_per_ms / 1000000.0);
+    fprintf(file, "Aggregated time per pie slice, total time: %.2f CPU seconds\n\n", sum_count / (1000*dcpm));
+
+    for (i=0;i<n_workers;i++) {
+        fprintf(file, "Startup time (%d):    %10.2f ms\n", i, workers_p[i]->ctr[CTR_init] / dcpm);
+        fprintf(file, "Steal work (%d):      %10.2f ms\n", i, workers_p[i]->ctr[CTR_wapp] / dcpm);
+        fprintf(file, "Leap work (%d):       %10.2f ms\n", i, workers_p[i]->ctr[CTR_lapp] / dcpm);
+        fprintf(file, "Steal overhead (%d):  %10.2f ms\n", i, (workers_p[i]->ctr[CTR_wstealsucc]+workers_p[i]->ctr[CTR_wsignal]) / dcpm);
+        fprintf(file, "Leap overhead (%d):   %10.2f ms\n", i, (workers_p[i]->ctr[CTR_lstealsucc]+workers_p[i]->ctr[CTR_lsignal]) / dcpm);
+        fprintf(file, "Steal search (%d):    %10.2f ms\n", i, (workers_p[i]->ctr[CTR_wsteal]-workers_p[i]->ctr[CTR_wstealsucc]-workers_p[i]->ctr[CTR_wsignal]) / dcpm);
+        fprintf(file, "Leap search (%d):     %10.2f ms\n", i, (workers_p[i]->ctr[CTR_lsteal]-workers_p[i]->ctr[CTR_lstealsucc]-workers_p[i]->ctr[CTR_lsignal]) / dcpm);
+        fprintf(file, "Exit time (%d):       %10.2f ms\n", i, workers_p[i]->ctr[CTR_close] / dcpm);
+        fprintf(file, "\n");
+    }
+
+    fprintf(file, "Startup time (sum):    %10.2f ms\n", ctr_all[CTR_init] / dcpm);
+    fprintf(file, "Steal work (sum):      %10.2f ms\n", ctr_all[CTR_wapp] / dcpm);
+    fprintf(file, "Leap work (sum):       %10.2f ms\n", ctr_all[CTR_lapp] / dcpm);
+    fprintf(file, "Steal overhead (sum):  %10.2f ms\n", (ctr_all[CTR_wstealsucc]+ctr_all[CTR_wsignal]) / dcpm);
+    fprintf(file, "Leap overhead (sum):   %10.2f ms\n", (ctr_all[CTR_lstealsucc]+ctr_all[CTR_lsignal]) / dcpm);
+    fprintf(file, "Steal search (sum):    %10.2f ms\n", (ctr_all[CTR_wsteal]-ctr_all[CTR_wstealsucc]-ctr_all[CTR_wsignal]) / dcpm);
+    fprintf(file, "Leap search (sum):     %10.2f ms\n", (ctr_all[CTR_lsteal]-ctr_all[CTR_lstealsucc]-ctr_all[CTR_lsignal]) / dcpm);
+    fprintf(file, "Exit time (sum):       %10.2f ms\n", ctr_all[CTR_close] / dcpm);
+    fprintf(file, "\n" );
+#endif
+#endif
+    return;
+    (void)file;
+}
+
+void lace_exit()
+{
+    lace_time_event(lace_get_worker(), 2);
+
+    // first suspend all other threads
+    lace_suspend();
+
+    // now enable all threads and tell them to quit
+    lace_set_workers(n_workers);
+    lace_quits = 1;
+
+    // now resume all threads and wait until they all pass the barrier
+    lace_resume();
+    lace_barrier();
+
+    // finally, destroy the barriers
+    lace_barrier_destroy();
+    pthread_barrier_destroy(&suspend_barrier);
+
+#if LACE_COUNT_EVENTS
+    lace_count_report_file(stderr);
+#endif
+}
+
+void
+lace_exec_in_new_frame(WorkerP *__lace_worker, Task *__lace_dq_head, Task *root)
+{
+    TailSplit old;
+    uint8_t old_as;
+
+    // save old tail, split, allstolen and initiate new frame
+    {
+        Worker *wt = __lace_worker->_public;
+
+        old_as = wt->allstolen;
+        wt->allstolen = 1;
+        old.ts.split = wt->ts.ts.split;
+        wt->ts.ts.split = 0;
+        mfence();
+        old.ts.tail = wt->ts.ts.tail;
+
+        TailSplit ts_new;
+        ts_new.ts.tail = __lace_dq_head - __lace_worker->dq;
+        ts_new.ts.split = __lace_dq_head - __lace_worker->dq;
+        wt->ts.v = ts_new.v;
+
+        __lace_worker->split = __lace_dq_head;
+        __lace_worker->allstolen = 1;
+    }
+
+    // wait until all workers are ready
+    lace_barrier();
+
+    // execute task
+    root->f(__lace_worker, __lace_dq_head, root);
+    compiler_barrier();
+
+    // wait until all workers are back (else they may steal from previous frame)
+    lace_barrier();
+
+    // restore tail, split, allstolen
+    {
+        Worker *wt = __lace_worker->_public;
+        wt->allstolen = old_as;
+        wt->ts.v = old.v;
+        __lace_worker->split = __lace_worker->dq + old.ts.split;
+        __lace_worker->allstolen = old_as;
+    }
+}
+
+VOID_TASK_IMPL_2(lace_steal_loop_root, Task*, t, int*, done)
+{
+    t->f(__lace_worker, __lace_dq_head, t);
+    *done = 1;
+}
+
+VOID_TASK_2(lace_together_helper, Task*, t, volatile int*, finished)
+{
+    t->f(__lace_worker, __lace_dq_head, t);
+
+    for (;;) {
+        int f = *finished;
+        if (cas(finished, f, f-1)) break;
+    }
+
+    while (*finished != 0) STEAL_RANDOM();
+}
+
+static void
+lace_sync_and_exec(WorkerP *__lace_worker, Task *__lace_dq_head, Task *root)
+{
+    // wait until other workers have made a local copy
+    lace_barrier();
+
+    // one worker sets t to 0 again
+    if (LACE_WORKER_ID == 0) lace_newframe.t = 0;
+    // else while (*(volatile Task**)&lace_newframe.t != 0) {}
+
+    // the above line is commented out since lace_exec_in_new_frame includes
+    // a lace_barrier before the task is executed
+
+    lace_exec_in_new_frame(__lace_worker, __lace_dq_head, root);
+}
+
+void
+lace_yield(WorkerP *__lace_worker, Task *__lace_dq_head)
+{
+    // make a local copy of the task
+    Task _t;
+    memcpy(&_t, lace_newframe.t, sizeof(Task));
+
+    // wait until all workers have made a local copy
+    lace_barrier();
+
+    // one worker sets t to 0 again
+    if (LACE_WORKER_ID == 0) lace_newframe.t = 0;
+    // else while (*(volatile Task**)&lace_newframe.t != 0) {}
+
+    // the above line is commented out since lace_exec_in_new_frame includes
+    // a lace_barrier before the task is executed
+
+    lace_exec_in_new_frame(__lace_worker, __lace_dq_head, &_t);
+}
+
+void
+lace_do_together(WorkerP *__lace_worker, Task *__lace_dq_head, Task *t)
+{
+    /* synchronization integer */
+    int done = n_workers;
+
+    /* wrap task in lace_together_helper */
+    Task _t2;
+    TD_lace_together_helper *t2 = (TD_lace_together_helper *)&_t2;
+    t2->f = lace_together_helper_WRAP;
+    t2->thief = THIEF_TASK;
+    t2->d.args.arg_1 = t;
+    t2->d.args.arg_2 = &done;
+
+    while (!cas(&lace_newframe.t, 0, &_t2)) lace_yield(__lace_worker, __lace_dq_head);
+    lace_sync_and_exec(__lace_worker, __lace_dq_head, &_t2);
+}
+
+void
+lace_do_newframe(WorkerP *__lace_worker, Task *__lace_dq_head, Task *t)
+{
+    /* synchronization integer */
+    int done = 0;
+
+    /* wrap task in lace_steal_loop_root */
+    Task _t2;
+    TD_lace_steal_loop_root *t2 = (TD_lace_steal_loop_root *)&_t2;
+    t2->f = lace_steal_loop_root_WRAP;
+    t2->thief = THIEF_TASK;
+    t2->d.args.arg_1 = t;
+    t2->d.args.arg_2 = &done;
+
+    /* and create the lace_steal_loop task for other workers */
+    Task _s;
+    TD_lace_steal_loop *s = (TD_lace_steal_loop *)&_s;
+    s->f = &lace_steal_loop_WRAP;
+    s->thief = THIEF_TASK;
+    s->d.args.arg_1 = &done;
+
+    compiler_barrier();
+
+    while (!cas(&lace_newframe.t, 0, &_s)) lace_yield(__lace_worker, __lace_dq_head);
+    lace_sync_and_exec(__lace_worker, __lace_dq_head, &_t2);
+}
diff --git a/src/lace.h b/src/lace.h
new file mode 100644
index 000000000..8d6d1b645
--- /dev/null
+++ b/src/lace.h
@@ -0,0 +1,2743 @@
+/* 
+ * Copyright 2013-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <pthread.h> /* for pthread_t */
+
+#ifndef __LACE_H__
+#define __LACE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Some flags */
+
+#ifndef LACE_DEBUG_PROGRAMSTACK /* Write to stderr when 95% program stack reached */
+#define LACE_DEBUG_PROGRAMSTACK 0
+#endif
+
+#ifndef LACE_LEAP_RANDOM /* Use random leaping when leapfrogging fails */
+#define LACE_LEAP_RANDOM 1
+#endif
+
+#ifndef LACE_PIE_TIMES /* Record time spent stealing and leapfrogging */
+#define LACE_PIE_TIMES 0
+#endif
+
+#ifndef LACE_COUNT_TASKS /* Count number of tasks executed */
+#define LACE_COUNT_TASKS 0
+#endif
+
+#ifndef LACE_COUNT_STEALS /* Count number of steals performed */
+#define LACE_COUNT_STEALS 0
+#endif
+
+#ifndef LACE_COUNT_SPLITS /* Count number of times the split point is moved */
+#define LACE_COUNT_SPLITS 0
+#endif
+
+#ifndef LACE_COUNT_EVENTS
+#define LACE_COUNT_EVENTS (LACE_PIE_TIMES || LACE_COUNT_TASKS || LACE_COUNT_STEALS || LACE_COUNT_SPLITS)
+#endif
+
+/* Typical cacheline size of system architectures */
+#ifndef LINE_SIZE
+#define LINE_SIZE 64
+#endif
+
+/* The size of a pointer, 8 bytes on a 64-bit architecture */
+#define P_SZ (sizeof(void *))
+
+#define PAD(x,b) ( ( (b) - ((x)%(b)) ) & ((b)-1) ) /* b must be power of 2 */
+#define ROUND(x,b) ( (x) + PAD( (x), (b) ) )
+
+/* The size is in bytes. Note that this is without the extra overhead from Lace.
+   The value must be greater than or equal to the maximum size of your tasks.
+   The task size is the maximum of the size of the result or of the sum of the parameter sizes. */
+#ifndef LACE_TASKSIZE
+#define LACE_TASKSIZE (6)*P_SZ
+#endif
+
+/* Some fences */
+#ifndef compiler_barrier
+#define compiler_barrier() { asm volatile("" ::: "memory"); }
+#endif
+
+#ifndef mfence
+#define mfence() { asm volatile("mfence" ::: "memory"); }
+#endif
+
+/* Compiler specific branch prediction optimization */
+#ifndef likely
+#define likely(x)       __builtin_expect((x),1)
+#endif
+
+#ifndef unlikely
+#define unlikely(x)     __builtin_expect((x),0)
+#endif
+
+#if LACE_PIE_TIMES
+/* High resolution timer */
+static inline uint64_t gethrtime()
+{
+    uint32_t hi, lo;
+    asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) :: "memory");
+    return (uint64_t)hi<<32 | lo;
+}
+#endif
+
+#if LACE_COUNT_EVENTS
+void lace_count_reset();
+void lace_count_report_file(FILE *file);
+#endif
+
+#if LACE_COUNT_TASKS
+#define PR_COUNTTASK(s) PR_INC(s,CTR_tasks)
+#else
+#define PR_COUNTTASK(s) /* Empty */
+#endif
+
+#if LACE_COUNT_STEALS
+#define PR_COUNTSTEALS(s,i) PR_INC(s,i)
+#else
+#define PR_COUNTSTEALS(s,i) /* Empty */
+#endif
+
+#if LACE_COUNT_SPLITS
+#define PR_COUNTSPLITS(s,i) PR_INC(s,i)
+#else
+#define PR_COUNTSPLITS(s,i) /* Empty */
+#endif
+
+#if LACE_COUNT_EVENTS
+#define PR_ADD(s,i,k) ( ((s)->ctr[i])+=k )
+#else
+#define PR_ADD(s,i,k) /* Empty */
+#endif
+#define PR_INC(s,i) PR_ADD(s,i,1)
+
+typedef enum {
+#ifdef LACE_COUNT_TASKS
+    CTR_tasks,       /* Number of tasks spawned */
+#endif
+#ifdef LACE_COUNT_STEALS
+    CTR_steal_tries, /* Number of steal attempts */
+    CTR_leap_tries,  /* Number of leap attempts */
+    CTR_steals,      /* Number of succesful steals */
+    CTR_leaps,       /* Number of succesful leaps */
+    CTR_steal_busy,  /* Number of steal busies */
+    CTR_leap_busy,   /* Number of leap busies */
+#endif
+#ifdef LACE_COUNT_SPLITS
+    CTR_split_grow,  /* Number of split right */
+    CTR_split_shrink,/* Number of split left */
+    CTR_split_req,   /* Number of split requests */
+#endif
+    CTR_fast_sync,   /* Number of fast syncs */
+    CTR_slow_sync,   /* Number of slow syncs */
+#ifdef LACE_PIE_TIMES
+    CTR_init,        /* Timer for initialization */
+    CTR_close,       /* Timer for shutdown */
+    CTR_wapp,        /* Timer for application code (steal) */
+    CTR_lapp,        /* Timer for application code (leap) */
+    CTR_wsteal,      /* Timer for steal code (steal) */
+    CTR_lsteal,      /* Timer for steal code (leap) */
+    CTR_wstealsucc,  /* Timer for succesful steal code (steal) */
+    CTR_lstealsucc,  /* Timer for succesful steal code (leap) */
+    CTR_wsignal,     /* Timer for signal after work (steal) */
+    CTR_lsignal,     /* Timer for signal after work (leap) */
+#endif
+    CTR_MAX
+} CTR_index;
+
+struct _WorkerP;
+struct _Worker;
+struct _Task;
+
+#define THIEF_EMPTY     ((struct _Worker*)0x0)
+#define THIEF_TASK      ((struct _Worker*)0x1)
+#define THIEF_COMPLETED ((struct _Worker*)0x2)
+
+#define TASK_COMMON_FIELDS(type)                               \
+    void (*f)(struct _WorkerP *, struct _Task *, struct type *);  \
+    struct _Worker * volatile thief;
+
+struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) };
+#define LACE_COMMON_FIELD_SIZE sizeof(struct __lace_common_fields_only)
+
+typedef struct _Task {
+    TASK_COMMON_FIELDS(_Task);
+    char p1[PAD(LACE_COMMON_FIELD_SIZE, P_SZ)];
+    char d[LACE_TASKSIZE];
+    char p2[PAD(ROUND(LACE_COMMON_FIELD_SIZE, P_SZ) + LACE_TASKSIZE, LINE_SIZE)];
+} Task;
+
+typedef union __attribute__((packed)) {
+    struct {
+        uint32_t tail;
+        uint32_t split;
+    } ts;
+    uint64_t v;
+} TailSplit;
+
+typedef struct _Worker {
+    Task *dq;
+    TailSplit ts;
+    uint8_t allstolen;
+
+    char pad1[PAD(P_SZ+sizeof(TailSplit)+1, LINE_SIZE)];
+
+    uint8_t movesplit;
+} Worker;
+
+typedef struct _WorkerP {
+    Task *dq;                   // same as dq
+    Task *split;                // same as dq+ts.ts.split
+    Task *end;                  // dq+dq_size
+    Worker *_public;            // pointer to public Worker struct
+    size_t stack_trigger;       // for stack overflow detection
+    int16_t worker;             // what is my worker id?
+    int16_t pu;                 // my pu (for HWLOC)
+    uint8_t allstolen;          // my allstolen
+    volatile int8_t enabled;    // if this worker is enabled
+
+#if LACE_COUNT_EVENTS
+    uint64_t ctr[CTR_MAX];      // counters
+    volatile uint64_t time;
+    volatile int level;
+#endif
+
+    uint32_t seed;              // my random seed (for lace_steal_random)
+} WorkerP;
+
+#define LACE_TYPEDEF_CB(t, f, ...) typedef t (*f)(WorkerP *, Task *, ##__VA_ARGS__);
+LACE_TYPEDEF_CB(void, lace_startup_cb, void*);
+
+/**
+ * Set verbosity level (0 = no startup messages, 1 = startup messages)
+ * Default level: 0
+ */
+void lace_set_verbosity(int level);
+
+/**
+ * Initialize master structures for Lace with <n_workers> workers
+ * and default deque size of <dqsize>.
+ * Does not create new threads.
+ * Tries to detect number of cpus, if n_workers equals 0.
+ */
+void lace_init(int n_workers, size_t dqsize);
+
+/**
+ * After lace_init, start all worker threads.
+ * If cb,arg are set, suspend this thread, call cb(arg) in a new thread
+ * and exit Lace upon return
+ * Otherwise, the current thread is initialized as a Lace thread.
+ */
+void lace_startup(size_t stacksize, lace_startup_cb, void* arg);
+
+/**
+ * Initialize current thread as worker <idx> and allocate a deque with size <dqsize>.
+ * Use this when manually creating worker threads.
+ */
+void lace_init_worker(int idx, size_t dqsize);
+
+/**
+ * Manually spawn worker <idx> with (optional) program stack size <stacksize>.
+ * If fun,arg are set, overrides default startup method.
+ * Typically: for workers 1...(n_workers-1): lace_spawn_worker(i, stack_size, 0, 0);
+ */
+pthread_t lace_spawn_worker(int idx, size_t stacksize, void *(*fun)(void*), void* arg);
+
+/**
+ * Steal a random task.
+ */
+#define lace_steal_random() CALL(lace_steal_random)
+void lace_steal_random_CALL(WorkerP*, Task*);
+
+/**
+ * Steal random tasks until parameter *quit is set
+ * Note: task declarations at end; quit is of type int*
+ */
+#define lace_steal_random_loop(quit) CALL(lace_steal_random_loop, quit)
+#define lace_steal_loop(quit) CALL(lace_steal_loop, quit)
+
+/**
+ * Barrier (all workers must enter it before progressing)
+ */
+void lace_barrier();
+
+/**
+ * Suspend and resume all other workers.
+ * May only be used when all other workers are idle.
+ */
+void lace_suspend();
+void lace_resume();
+
+/**
+ * When all tasks are suspended, workers can be temporarily disabled.
+ * With set_workers, all workers 0..(N-1) are enabled and N..max are disabled.
+ * You can never disable the current worker or reduce the number of workers below 1.
+ * You cannot add workers.
+ */
+void lace_disable_worker(int worker);
+void lace_enable_worker(int worker);
+void lace_set_workers(int workercount);
+int lace_enabled_workers();
+
+/**
+ * Retrieve number of Lace workers
+ */
+size_t lace_workers();
+
+/**
+ * Retrieve default program stack size
+ */
+size_t lace_default_stacksize();
+
+/**
+ * Retrieve current worker.
+ */
+WorkerP *lace_get_worker();
+
+/**
+ * Retrieve the current head of the deque
+ */
+Task *lace_get_head(WorkerP *);
+
+/**
+ * Exit Lace. Automatically called when started with cb,arg.
+ */
+void lace_exit();
+
+#define LACE_STOLEN   ((Worker*)0)
+#define LACE_BUSY     ((Worker*)1)
+#define LACE_NOWORK   ((Worker*)2)
+
+#define TASK(f)           ( f##_CALL )
+#define WRAP(f, ...)      ( f((WorkerP *)__lace_worker, (Task *)__lace_dq_head, ##__VA_ARGS__) )
+#define SYNC(f)           ( __lace_dq_head--, WRAP(f##_SYNC) )
+#define DROP()            ( __lace_dq_head--, WRAP(lace_drop) )
+#define SPAWN(f, ...)     ( WRAP(f##_SPAWN, ##__VA_ARGS__), __lace_dq_head++ )
+#define CALL(f, ...)      ( WRAP(f##_CALL, ##__VA_ARGS__) )
+#define TOGETHER(f, ...)  ( WRAP(f##_TOGETHER, ##__VA_ARGS__) )
+#define NEWFRAME(f, ...)  ( WRAP(f##_NEWFRAME, ##__VA_ARGS__) )
+#define STEAL_RANDOM()    ( CALL(lace_steal_random) )
+#define LACE_WORKER_ID    ( __lace_worker->worker )
+#define LACE_WORKER_PU    ( __lace_worker->pu )
+
+/* Use LACE_ME to initialize Lace variables, in case you want to call multiple Lace tasks */
+#define LACE_ME WorkerP * __attribute__((unused)) __lace_worker = lace_get_worker(); Task * __attribute__((unused)) __lace_dq_head = lace_get_head(__lace_worker);
+
+#define TASK_IS_STOLEN(t) ((size_t)t->thief > 1)
+#define TASK_IS_COMPLETED(t) ((size_t)t->thief == 2)
+#define TASK_RESULT(t) (&t->d[0])
+
+#if LACE_DEBUG_PROGRAMSTACK
+static inline void CHECKSTACK(WorkerP *w)
+{
+    if (w->stack_trigger != 0) {
+        register size_t rsp;
+        asm volatile("movq %%rsp, %0" : "+r"(rsp) : : "cc");
+        if (rsp < w->stack_trigger) {
+            fputs("Warning: program stack 95% used!\n", stderr);
+            w->stack_trigger = 0;
+        }
+    }
+}
+#else
+#define CHECKSTACK(w) {}
+#endif
+
+typedef struct
+{
+    Task *t;
+    uint8_t all;
+    char pad[64-sizeof(Task *)-sizeof(uint8_t)];
+} lace_newframe_t;
+
+extern lace_newframe_t lace_newframe;
+
+/**
+ * Internal function to start participating on a task in a new frame
+ * Usually, <root> is set to NULL and the task is copied from lace_newframe.t
+ * It is possible to override the start task by setting <root>.
+ */
+void lace_do_together(WorkerP *__lace_worker, Task *__lace_dq_head, Task *task);
+void lace_do_newframe(WorkerP *__lace_worker, Task *__lace_dq_head, Task *task);
+
+void lace_yield(WorkerP *__lace_worker, Task *__lace_dq_head);
+#define YIELD_NEWFRAME() { if (unlikely((*(volatile Task**)&lace_newframe.t) != NULL)) lace_yield(__lace_worker, __lace_dq_head); }
+
+#if LACE_PIE_TIMES
+static void lace_time_event( WorkerP *w, int event )
+{
+    uint64_t now = gethrtime(),
+             prev = w->time;
+
+    switch( event ) {
+
+        // Enter application code
+        case 1 :
+            if(  w->level /* level */ == 0 ) {
+                PR_ADD( w, CTR_init, now - prev );
+                w->level = 1;
+            } else if( w->level /* level */ == 1 ) {
+                PR_ADD( w, CTR_wsteal, now - prev );
+                PR_ADD( w, CTR_wstealsucc, now - prev );
+            } else {
+                PR_ADD( w, CTR_lsteal, now - prev );
+                PR_ADD( w, CTR_lstealsucc, now - prev );
+            }
+            break;
+
+            // Exit application code
+        case 2 :
+            if( w->level /* level */ == 1 ) {
+                PR_ADD( w, CTR_wapp, now - prev );
+            } else {
+                PR_ADD( w, CTR_lapp, now - prev );
+            }
+            break;
+
+            // Enter sync on stolen
+        case 3 :
+            if( w->level /* level */ == 1 ) {
+                PR_ADD( w, CTR_wapp, now - prev );
+            } else {
+                PR_ADD( w, CTR_lapp, now - prev );
+            }
+            w->level++;
+            break;
+
+            // Exit sync on stolen
+        case 4 :
+            if( w->level /* level */ == 1 ) {
+                fprintf( stderr, "This should not happen, level = %d\n", w->level );
+            } else {
+                PR_ADD( w, CTR_lsteal, now - prev );
+            }
+            w->level--;
+            break;
+
+            // Return from failed steal
+        case 7 :
+            if( w->level /* level */ == 0 ) {
+                PR_ADD( w, CTR_init, now - prev );
+            } else if( w->level /* level */ == 1 ) {
+                PR_ADD( w, CTR_wsteal, now - prev );
+            } else {
+                PR_ADD( w, CTR_lsteal, now - prev );
+            }
+            break;
+
+            // Signalling time
+        case 8 :
+            if( w->level /* level */ == 1 ) {
+                PR_ADD( w, CTR_wsignal, now - prev );
+                PR_ADD( w, CTR_wsteal, now - prev );
+            } else {
+                PR_ADD( w, CTR_lsignal, now - prev );
+                PR_ADD( w, CTR_lsteal, now - prev );
+            }
+            break;
+
+            // Done
+        case 9 :
+            if( w->level /* level */ == 0 ) {
+                PR_ADD( w, CTR_init, now - prev );
+            } else {
+                PR_ADD( w, CTR_close, now - prev );
+            }
+            break;
+
+        default: return;
+    }
+
+    w->time = now;
+}
+#else
+#define lace_time_event( w, e ) /* Empty */
+#endif
+
+static Worker* __attribute__((noinline))
+lace_steal(WorkerP *self, Task *__dq_head, Worker *victim)
+{
+    if (!victim->allstolen) {
+        /* Must be a volatile. In GCC 4.8, if it is not declared volatile, the
+           compiler will optimize extra memory accesses to victim->ts instead
+           of comparing the local values ts.ts.tail and ts.ts.split, causing
+           thieves to steal non existent tasks! */
+        register TailSplit ts;
+        ts.v = *(volatile uint64_t *)&victim->ts.v;
+        if (ts.ts.tail < ts.ts.split) {
+            register TailSplit ts_new;
+            ts_new.v = ts.v;
+            ts_new.ts.tail++;
+            if (__sync_bool_compare_and_swap(&victim->ts.v, ts.v, ts_new.v)) {
+                // Stolen
+                Task *t = &victim->dq[ts.ts.tail];
+                t->thief = self->_public;
+                lace_time_event(self, 1);
+                t->f(self, __dq_head, t);
+                lace_time_event(self, 2);
+                t->thief = THIEF_COMPLETED;
+                lace_time_event(self, 8);
+                return LACE_STOLEN;
+            }
+
+            lace_time_event(self, 7);
+            return LACE_BUSY;
+        }
+
+        if (victim->movesplit == 0) {
+            victim->movesplit = 1;
+            PR_COUNTSPLITS(self, CTR_split_req);
+        }
+    }
+
+    lace_time_event(self, 7);
+    return LACE_NOWORK;
+}
+
+static int
+lace_shrink_shared(WorkerP *w)
+{
+    Worker *wt = w->_public;
+    TailSplit ts;
+    ts.v = wt->ts.v; /* Force in 1 memory read */
+    uint32_t tail = ts.ts.tail;
+    uint32_t split = ts.ts.split;
+
+    if (tail != split) {
+        uint32_t newsplit = (tail + split)/2;
+        wt->ts.ts.split = newsplit;
+        mfence();
+        tail = *(volatile uint32_t *)&(wt->ts.ts.tail);
+        if (tail != split) {
+            if (unlikely(tail > newsplit)) {
+                newsplit = (tail + split) / 2;
+                wt->ts.ts.split = newsplit;
+            }
+            w->split = w->dq + newsplit;
+            PR_COUNTSPLITS(w, CTR_split_shrink);
+            return 0;
+        }
+    }
+
+    wt->allstolen = 1;
+    w->allstolen = 1;
+    return 1;
+}
+
+static inline void
+lace_leapfrog(WorkerP *__lace_worker, Task *__lace_dq_head)
+{
+    lace_time_event(__lace_worker, 3);
+    Task *t = __lace_dq_head;
+    Worker *thief = t->thief;
+    if (thief != THIEF_COMPLETED) {
+        while ((size_t)thief <= 1) thief = t->thief;
+
+        /* PRE-LEAP: increase head again */
+        __lace_dq_head += 1;
+
+        /* Now leapfrog */
+        int attempts = 32;
+        while (thief != THIEF_COMPLETED) {
+            PR_COUNTSTEALS(__lace_worker, CTR_leap_tries);
+            Worker *res = lace_steal(__lace_worker, __lace_dq_head, thief);
+            if (res == LACE_NOWORK) {
+                YIELD_NEWFRAME();
+                if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; }
+            } else if (res == LACE_STOLEN) {
+                PR_COUNTSTEALS(__lace_worker, CTR_leaps);
+            } else if (res == LACE_BUSY) {
+                PR_COUNTSTEALS(__lace_worker, CTR_leap_busy);
+            }
+            compiler_barrier();
+            thief = t->thief;
+        }
+
+        /* POST-LEAP: really pop the finished task */
+        /*            no need to decrease __lace_dq_head, since it is a local variable */
+        compiler_barrier();
+        if (__lace_worker->allstolen == 0) {
+            /* Assume: tail = split = head (pre-pop) */
+            /* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */
+            Worker *wt = __lace_worker->_public;
+            wt->allstolen = 1;
+            __lace_worker->allstolen = 1;
+        }
+    }
+
+    compiler_barrier();
+    t->thief = THIEF_EMPTY;
+    lace_time_event(__lace_worker, 4);
+}
+
+static __attribute__((noinline))
+void lace_drop_slow(WorkerP *w, Task *__dq_head)
+{
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) lace_leapfrog(w, __dq_head);
+}
+
+static inline __attribute__((unused))
+void lace_drop(WorkerP *w, Task *__dq_head)
+{
+    if (likely(0 == w->_public->movesplit)) {
+        if (likely(w->split <= __dq_head)) {
+            return;
+        }
+    }
+    lace_drop_slow(w, __dq_head);
+}
+
+
+
+// Task macros for tasks of arity 0
+
+#define TASK_DECL_0(RTYPE, NAME)                                                      \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union {  RTYPE res; } d;                                                            \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * );                                                \
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head )                                       \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head )                                   \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head )                                    \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head );                                                \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head );                                        \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_0(RTYPE, NAME)                                                      \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head );                                            \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head );                     \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head )                                       \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head );                                                \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
+
+#define TASK_0(RTYPE, NAME) TASK_DECL_0(RTYPE, NAME) TASK_IMPL_0(RTYPE, NAME)
+
+#define VOID_TASK_DECL_0(NAME)                                                        \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+                                                                                      \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * );                                                 \
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head )                                       \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head )                                    \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head )                                    \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+                                                                                      \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head );                                                \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head );                                        \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_0(NAME)                                                        \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head );                                                      \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head );                      \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head )                                        \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head );                                                \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
+
+#define VOID_TASK_0(NAME) VOID_TASK_DECL_0(NAME) VOID_TASK_IMPL_0(NAME)
+
+
+// Task macros for tasks of arity 1
+
+#define TASK_DECL_1(RTYPE, NAME, ATYPE_1)                                             \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; } args; RTYPE res; } d;                            \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1);                                 \
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                    \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                     \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                               \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                       \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)                                      \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1);                           \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1);            \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1);                                         \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
+
+#define TASK_1(RTYPE, NAME, ATYPE_1, ARG_1) TASK_DECL_1(RTYPE, NAME, ATYPE_1) TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)
+
+#define VOID_TASK_DECL_1(NAME, ATYPE_1)                                               \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; } args; } d;                                       \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1);                                  \
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                        \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                     \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                     \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1;                                                         \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                               \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1);                       \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)                                        \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1);                                     \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1);             \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1)                         \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1);                                         \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
+
+#define VOID_TASK_1(NAME, ATYPE_1, ARG_1) VOID_TASK_DECL_1(NAME, ATYPE_1) VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)
+
+
+// Task macros for tasks of arity 2
+
+#define TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2)                                    \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; } args; RTYPE res; } d;             \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2);                  \
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)     \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)      \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);              \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);      \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)                      \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2);   \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2);                                  \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
+
+#define TASK_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
+
+#define VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2)                                      \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; } args; } d;                        \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2);                   \
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)         \
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)      \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)      \
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2;                                \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);              \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);      \
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)                        \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2);                    \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2);    \
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2)          \
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2);                                  \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
+
+#define VOID_TASK_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
+
+
+// Task macros for tasks of arity 3
+
+#define TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3)                           \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; RTYPE res; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3);   \
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)      \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3);                           \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
+
+#define TASK_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
+
+#define VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3)                             \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; } d;         \
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3);    \
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3;       \
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)        \
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3);                           \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
+
+#define VOID_TASK_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
+
+
+// Task macros for tasks of arity 4
+
+#define TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4)                  \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; RTYPE res; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4);                    \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
+
+#define TASK_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
+
+#define VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4)                    \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4);                    \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
+
+#define VOID_TASK_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
+
+
+// Task macros for tasks of arity 5
+
+#define TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5)         \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; RTYPE res; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5);             \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
+
+#define TASK_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
+
+#define VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5)           \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5);             \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
+
+#define VOID_TASK_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
+
+
+// Task macros for tasks of arity 6
+
+#define TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)\
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; RTYPE res; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
+static inline RTYPE NAME##_SYNC(WorkerP *, Task *);                                   \
+static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *);                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ((TD_##NAME *)t)->d.res;                                                   \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                   \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ((TD_##NAME *)t)->d.res;                                               \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head)                                        \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+    t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6);      \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
+
+#define TASK_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
+
+#define VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)  \
+                                                                                      \
+typedef struct _TD_##NAME {                                                           \
+  TASK_COMMON_FIELDS(_TD_##NAME)                                                      \
+  union { struct {  ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; } d;\
+} TD_##NAME;                                                                          \
+                                                                                      \
+/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
+typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
+                                                                                      \
+void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *);                                     \
+void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
+static inline void NAME##_SYNC(WorkerP *, Task *);                                    \
+static void NAME##_SYNC_SLOW(WorkerP *, Task *);                                      \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    PR_COUNTTASK(w);                                                                  \
+                                                                                      \
+    TD_##NAME *t;                                                                     \
+    TailSplit ts;                                                                     \
+    uint32_t head, split, newsplit;                                                   \
+                                                                                      \
+    /* assert(__dq_head < w->end); */ /* Assuming to be true */                       \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (unlikely(w->allstolen)) {                                                     \
+        if (wt->movesplit) wt->movesplit = 0;                                         \
+        head = __dq_head - w->dq;                                                     \
+        ts = (TailSplit){{head,head+1}};                                              \
+        wt->ts.v = ts.v;                                                              \
+        compiler_barrier();                                                           \
+        wt->allstolen = 0;                                                            \
+        w->split = __dq_head+1;                                                       \
+        w->allstolen = 0;                                                             \
+    } else if (unlikely(wt->movesplit)) {                                             \
+        head = __dq_head - w->dq;                                                     \
+        split = w->split - w->dq;                                                     \
+        newsplit = (split + head + 2)/2;                                              \
+        wt->ts.ts.split = newsplit;                                                   \
+        w->split = w->dq + newsplit;                                                  \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+                                                                                      \
+    lace_do_newframe(w, __dq_head, &_t);                                              \
+    return ;                                                                          \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    Task _t;                                                                          \
+    TD_##NAME *t = (TD_##NAME *)&_t;                                                  \
+    t->f = &NAME##_WRAP;                                                              \
+    t->thief = THIEF_TASK;                                                            \
+     t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
+                                                                                      \
+    lace_do_together(w, __dq_head, &_t);                                              \
+}                                                                                     \
+                                                                                      \
+static __attribute__((noinline))                                                      \
+void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head)                                    \
+{                                                                                     \
+    TD_##NAME *t;                                                                     \
+                                                                                      \
+    if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) {          \
+        lace_leapfrog(w, __dq_head);                                                  \
+        t = (TD_##NAME *)__dq_head;                                                   \
+        return ;                                                                      \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    Worker *wt = w->_public;                                                          \
+    if (wt->movesplit) {                                                              \
+        Task *t = w->split;                                                           \
+        size_t diff = __dq_head - t;                                                  \
+        diff = (diff + 1) / 2;                                                        \
+        w->split = t + diff;                                                          \
+        wt->ts.ts.split += diff;                                                      \
+        compiler_barrier();                                                           \
+        wt->movesplit = 0;                                                            \
+        PR_COUNTSPLITS(w, CTR_split_grow);                                            \
+    }                                                                                 \
+                                                                                      \
+    compiler_barrier();                                                               \
+                                                                                      \
+    t = (TD_##NAME *)__dq_head;                                                       \
+    t->thief = THIEF_EMPTY;                                                           \
+    return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((unused))                                                 \
+void NAME##_SYNC(WorkerP *w, Task *__dq_head)                                         \
+{                                                                                     \
+    /* assert (__dq_head > 0); */  /* Commented out because we assume contract */     \
+                                                                                      \
+    if (likely(0 == w->_public->movesplit)) {                                         \
+        if (likely(w->split <= __dq_head)) {                                          \
+            TD_##NAME *t = (TD_##NAME *)__dq_head;                                    \
+            t->thief = THIEF_EMPTY;                                                   \
+            return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+        }                                                                             \
+    }                                                                                 \
+                                                                                      \
+    return NAME##_SYNC_SLOW(w, __dq_head);                                            \
+}                                                                                     \
+                                                                                      \
+                                                                                      \
+
+#define VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
+void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused)))   \
+{                                                                                     \
+     NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
+                                                                                      \
+/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
+void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
+{                                                                                     \
+    CHECKSTACK(w);                                                                    \
+    return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6);      \
+}                                                                                     \
+                                                                                      \
+static inline __attribute__((always_inline))                                          \
+void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
+
+#define VOID_TASK_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
+
+
+VOID_TASK_DECL_0(lace_steal_random);
+VOID_TASK_DECL_1(lace_steal_random_loop, int*);
+VOID_TASK_DECL_1(lace_steal_loop, int*);
+VOID_TASK_DECL_2(lace_steal_loop_root, Task *, int*);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/llmsset.c b/src/llmsset.c
new file mode 100644
index 000000000..9c21f2e94
--- /dev/null
+++ b/src/llmsset.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <errno.h>  // for errno
+#include <stdint.h> // for uint64_t etc
+#include <stdio.h>  // for printf
+#include <stdlib.h>
+#include <string.h> // memset
+#include <sys/mman.h> // for mmap
+
+#include <llmsset.h>
+#include <stats.h>
+#include <tls.h>
+
+#ifndef USE_HWLOC
+#define USE_HWLOC 0
+#endif
+
+#if USE_HWLOC
+#include <hwloc.h>
+
+static hwloc_topology_t topo;
+#endif
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef cas
+#define cas(ptr, old, new) (__sync_bool_compare_and_swap((ptr),(old),(new)))
+#endif
+
+DECLARE_THREAD_LOCAL(my_region, uint64_t);
+
+VOID_TASK_0(llmsset_reset_region)
+{
+    LOCALIZE_THREAD_LOCAL(my_region, uint64_t);
+    my_region = (uint64_t)-1; // no region
+    SET_THREAD_LOCAL(my_region, my_region);
+}
+
+static uint64_t
+claim_data_bucket(const llmsset_t dbs)
+{
+    LOCALIZE_THREAD_LOCAL(my_region, uint64_t);
+
+    for (;;) {
+        if (my_region != (uint64_t)-1) {
+            // find empty bucket in region <my_region>
+            uint64_t *ptr = dbs->bitmap2 + (my_region*8);
+            int i=0;
+            for (;i<8;) {
+                uint64_t v = *ptr;
+                if (v != 0xffffffffffffffffLL) {
+                    int j = __builtin_clzll(~v);
+                    *ptr |= (0x8000000000000000LL>>j);
+                    return (8 * my_region + i) * 64 + j;
+                }
+                i++;
+                ptr++;
+            }
+        } else {
+            // special case on startup or after garbage collection
+            my_region += (lace_get_worker()->worker*(dbs->table_size/(64*8)))/lace_workers();
+        }
+        uint64_t count = dbs->table_size/(64*8);
+        for (;;) {
+            // check if table maybe full
+            if (count-- == 0) return (uint64_t)-1;
+
+            my_region += 1;
+            if (my_region >= (dbs->table_size/(64*8))) my_region = 0;
+
+            // try to claim it
+            uint64_t *ptr = dbs->bitmap1 + (my_region/64);
+            uint64_t mask = 0x8000000000000000LL >> (my_region&63);
+            uint64_t v;
+restart:
+            v = *ptr;
+            if (v & mask) continue; // taken
+            if (cas(ptr, v, v|mask)) break;
+            else goto restart;
+        }
+        SET_THREAD_LOCAL(my_region, my_region);
+    }
+}
+
+static void
+release_data_bucket(const llmsset_t dbs, uint64_t index)
+{
+    uint64_t *ptr = dbs->bitmap2 + (index/64);
+    uint64_t mask = 0x8000000000000000LL >> (index&63);
+    *ptr &= ~mask;
+}
+
+static void
+set_custom_bucket(const llmsset_t dbs, uint64_t index, int on)
+{
+    uint64_t *ptr = dbs->bitmapc + (index/64);
+    uint64_t mask = 0x8000000000000000LL >> (index&63);
+    if (on) *ptr |= mask;
+    else *ptr &= ~mask;
+}
+
+static int
+get_custom_bucket(const llmsset_t dbs, uint64_t index)
+{
+    uint64_t *ptr = dbs->bitmapc + (index/64);
+    uint64_t mask = 0x8000000000000000LL >> (index&63);
+    return (*ptr & mask) ? 1 : 0;
+}
+
+#ifndef rotl64
+static inline uint64_t
+rotl64(uint64_t x, int8_t r)
+{
+    return ((x<<r) | (x>>(64-r)));
+}
+#endif
+
+uint64_t
+llmsset_hash(const uint64_t a, const uint64_t b, const uint64_t seed)
+{
+    const uint64_t prime = 1099511628211;
+
+    uint64_t hash = seed;
+    hash = hash ^ a;
+    hash = rotl64(hash, 47);
+    hash = hash * prime;
+    hash = hash ^ b;
+    hash = rotl64(hash, 31);
+    hash = hash * prime;
+
+    return hash ^ (hash >> 32);
+}
+
+/*
+ * CL_MASK and CL_MASK_R are for the probe sequence calculation.
+ * With 64 bytes per cacheline, there are 8 64-bit values per cacheline.
+ */
+// The LINE_SIZE is defined in lace.h
+static const uint64_t CL_MASK     = ~(((LINE_SIZE) / 8) - 1);
+static const uint64_t CL_MASK_R   = ((LINE_SIZE) / 8) - 1;
+
+/* 40 bits for the index, 24 bits for the hash */
+#define MASK_INDEX ((uint64_t)0x000000ffffffffff)
+#define MASK_HASH  ((uint64_t)0xffffff0000000000)
+
+static inline uint64_t
+llmsset_lookup2(const llmsset_t dbs, uint64_t a, uint64_t b, int* created, const int custom)
+{
+    uint64_t hash_rehash = 14695981039346656037LLU;
+    if (custom) hash_rehash = dbs->hash_cb(a, b, hash_rehash);
+    else hash_rehash = llmsset_hash(a, b, hash_rehash);
+
+    const uint64_t hash = hash_rehash & MASK_HASH;
+    uint64_t idx, last, cidx = 0;
+    int i=0;
+
+#if LLMSSET_MASK
+    last = idx = hash_rehash & dbs->mask;
+#else
+    last = idx = hash_rehash % dbs->table_size;
+#endif
+
+    for (;;) {
+        volatile uint64_t *bucket = dbs->table + idx;
+        uint64_t v = *bucket;
+
+        if (v == 0) {
+            if (cidx == 0) {
+                cidx = claim_data_bucket(dbs);
+                if (cidx == (uint64_t)-1) return 0; // failed to claim a data bucket
+                if (custom) dbs->create_cb(&a, &b);
+                uint64_t *d_ptr = ((uint64_t*)dbs->data) + 2*cidx;
+                d_ptr[0] = a;
+                d_ptr[1] = b;
+            }
+            if (cas(bucket, 0, hash | cidx)) {
+                if (custom) set_custom_bucket(dbs, cidx, custom);
+                *created = 1;
+                return cidx;
+            } else {
+                v = *bucket;
+            }
+        }
+
+        if (hash == (v & MASK_HASH)) {
+            uint64_t d_idx = v & MASK_INDEX;
+            uint64_t *d_ptr = ((uint64_t*)dbs->data) + 2*d_idx;
+            if (custom) {
+                if (dbs->equals_cb(a, b, d_ptr[0], d_ptr[1])) {
+                    if (cidx != 0) {
+                        dbs->destroy_cb(a, b);
+                        release_data_bucket(dbs, cidx);
+                    }
+                    *created = 0;
+                    return d_idx;
+                }
+            } else {
+                if (d_ptr[0] == a && d_ptr[1] == b) {
+                    if (cidx != 0) release_data_bucket(dbs, cidx);
+                    *created = 0;
+                    return d_idx;
+                }
+            }
+        }
+
+        sylvan_stats_count(LLMSSET_LOOKUP);
+
+        // find next idx on probe sequence
+        idx = (idx & CL_MASK) | ((idx+1) & CL_MASK_R);
+        if (idx == last) {
+            if (++i == dbs->threshold) return 0; // failed to find empty spot in probe sequence
+
+            // go to next cache line in probe sequence
+            if (custom) hash_rehash = dbs->hash_cb(a, b, hash_rehash);
+            else hash_rehash = llmsset_hash(a, b, hash_rehash);
+
+#if LLMSSET_MASK
+            last = idx = hash_rehash & dbs->mask;
+#else
+            last = idx = hash_rehash % dbs->table_size;
+#endif
+        }
+    }
+}
+
+uint64_t
+llmsset_lookup(const llmsset_t dbs, const uint64_t a, const uint64_t b, int* created)
+{
+    return llmsset_lookup2(dbs, a, b, created, 0);
+}
+
+uint64_t
+llmsset_lookupc(const llmsset_t dbs, const uint64_t a, const uint64_t b, int* created)
+{
+    return llmsset_lookup2(dbs, a, b, created, 1);
+}
+
+static inline int
+llmsset_rehash_bucket(const llmsset_t dbs, uint64_t d_idx)
+{
+    const uint64_t * const d_ptr = ((uint64_t*)dbs->data) + 2*d_idx;
+    const uint64_t a = d_ptr[0];
+    const uint64_t b = d_ptr[1];
+
+    uint64_t hash_rehash = 14695981039346656037LLU;
+    const int custom = get_custom_bucket(dbs, d_idx) ? 1 : 0;
+    if (custom) hash_rehash = dbs->hash_cb(a, b, hash_rehash);
+    else hash_rehash = llmsset_hash(a, b, hash_rehash);
+    const uint64_t new_v = (hash_rehash & MASK_HASH) | d_idx;
+    int i=0;
+
+    uint64_t idx, last;
+#if LLMSSET_MASK
+    last = idx = hash_rehash & dbs->mask;
+#else
+    last = idx = hash_rehash % dbs->table_size;
+#endif
+
+    for (;;) {
+        volatile uint64_t *bucket = &dbs->table[idx];
+        if (*bucket == 0 && cas(bucket, 0, new_v)) return 1;
+
+        // find next idx on probe sequence
+        idx = (idx & CL_MASK) | ((idx+1) & CL_MASK_R);
+        if (idx == last) {
+            if (++i == dbs->threshold) return 0; // failed to find empty spot in probe sequence
+
+            // go to next cache line in probe sequence
+            if (custom) hash_rehash = dbs->hash_cb(a, b, hash_rehash);
+            else hash_rehash = llmsset_hash(a, b, hash_rehash);
+
+#if LLMSSET_MASK
+            last = idx = hash_rehash & dbs->mask;
+#else
+            last = idx = hash_rehash % dbs->table_size;
+#endif
+        }
+    }
+}
+
+llmsset_t
+llmsset_create(size_t initial_size, size_t max_size)
+{
+#if USE_HWLOC
+    hwloc_topology_init(&topo);
+    hwloc_topology_load(topo);
+#endif
+
+    llmsset_t dbs = NULL;
+    if (posix_memalign((void**)&dbs, LINE_SIZE, sizeof(struct llmsset)) != 0) {
+        fprintf(stderr, "llmsset_create: Unable to allocate memory!\n");
+        exit(1);
+    }
+
+#if LLMSSET_MASK
+    /* Check if initial_size and max_size are powers of 2 */
+    if (__builtin_popcountll(initial_size) != 1) {
+        fprintf(stderr, "llmsset_create: initial_size is not a power of 2!\n");
+        exit(1);
+    }
+
+    if (__builtin_popcountll(max_size) != 1) {
+        fprintf(stderr, "llmsset_create: max_size is not a power of 2!\n");
+        exit(1);
+    }
+#endif
+
+    if (initial_size > max_size) {
+        fprintf(stderr, "llmsset_create: initial_size > max_size!\n");
+        exit(1);
+    }
+
+    // minimum size is now 512 buckets (region size, but of course, n_workers * 512 is suggested as minimum)
+
+    if (initial_size < 512) {
+        fprintf(stderr, "llmsset_create: initial_size too small!\n");
+        exit(1);
+    }
+
+    dbs->max_size = max_size;
+    llmsset_set_size(dbs, initial_size);
+
+    /* This implementation of "resizable hash table" allocates the max_size table in virtual memory,
+       but only uses the "actual size" part in real memory */
+
+    dbs->table = (uint64_t*)mmap(0, dbs->max_size * 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    dbs->data = (uint8_t*)mmap(0, dbs->max_size * 16, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    /* Also allocate bitmaps. Each region is 64*8 = 512 buckets.
+       Overhead of bitmap1: 1 bit per 4096 bucket.
+       Overhead of bitmap2: 1 bit per bucket.
+       Overhead of bitmapc: 1 bit per bucket. */
+
+    dbs->bitmap1 = (uint64_t*)mmap(0, dbs->max_size / (512*8), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    dbs->bitmap2 = (uint64_t*)mmap(0, dbs->max_size / 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    dbs->bitmapc = (uint64_t*)mmap(0, dbs->max_size / 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    if (dbs->table == (uint64_t*)-1 || dbs->data == (uint8_t*)-1 || dbs->bitmap1 == (uint64_t*)-1 || dbs->bitmap2 == (uint64_t*)-1 || dbs->bitmapc == (uint64_t*)-1) {
+        fprintf(stderr, "llmsset_create: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+
+#if defined(madvise) && defined(MADV_RANDOM)
+    madvise(dbs->table, dbs->max_size * 8, MADV_RANDOM);
+#endif
+
+#if USE_HWLOC
+    hwloc_set_area_membind(topo, dbs->table, dbs->max_size * 8, hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_INTERLEAVE, 0);
+    hwloc_set_area_membind(topo, dbs->data, dbs->max_size * 16, hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_FIRSTTOUCH, 0);
+    hwloc_set_area_membind(topo, dbs->bitmap1, dbs->max_size / (512*8), hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_INTERLEAVE, 0);
+    hwloc_set_area_membind(topo, dbs->bitmap2, dbs->max_size / 8, hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_FIRSTTOUCH, 0);
+    hwloc_set_area_membind(topo, dbs->bitmapc, dbs->max_size / 8, hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_FIRSTTOUCH, 0);
+#endif
+
+    // forbid first two positions (index 0 and 1)
+    dbs->bitmap2[0] = 0xc000000000000000LL;
+
+    dbs->hash_cb = NULL;
+    dbs->equals_cb = NULL;
+    dbs->create_cb = NULL;
+    dbs->destroy_cb = NULL;
+
+    // yes, ugly. for now, we use a global thread-local value.
+    // that is a problem with multiple tables.
+    // so, for now, do NOT use multiple tables!!
+
+    LACE_ME;
+    INIT_THREAD_LOCAL(my_region);
+    TOGETHER(llmsset_reset_region);
+
+    return dbs;
+}
+
+void
+llmsset_free(llmsset_t dbs)
+{
+    munmap(dbs->table, dbs->max_size * 8);
+    munmap(dbs->data, dbs->max_size * 16);
+    munmap(dbs->bitmap1, dbs->max_size / (512*8));
+    munmap(dbs->bitmap2, dbs->max_size / 8);
+    munmap(dbs->bitmapc, dbs->max_size / 8);
+    free(dbs);
+}
+
+VOID_TASK_IMPL_1(llmsset_clear, llmsset_t, dbs)
+{
+    // just reallocate...
+    if (mmap(dbs->table, dbs->max_size * 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != (void*)-1) {
+#if defined(madvise) && defined(MADV_RANDOM)
+        madvise(dbs->table, sizeof(uint64_t[dbs->max_size]), MADV_RANDOM);
+#endif
+#if USE_HWLOC
+        hwloc_set_area_membind(topo, dbs->table, sizeof(uint64_t[dbs->max_size]), hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_INTERLEAVE, 0);
+#endif
+    } else {
+        // reallocate failed... expensive fallback
+        memset(dbs->table, 0, dbs->max_size * 8);
+    }
+
+    if (mmap(dbs->bitmap1, dbs->max_size / (512*8), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != (void*)-1) {
+#if USE_HWLOC
+        hwloc_set_area_membind(topo, dbs->bitmap1, dbs->max_size / (512*8), hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_INTERLEAVE, 0);
+#endif
+    } else {
+        memset(dbs->bitmap1, 0, dbs->max_size / (512*8));
+    }
+
+    if (mmap(dbs->bitmap2, dbs->max_size / 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != (void*)-1) {
+#if USE_HWLOC
+        hwloc_set_area_membind(topo, dbs->bitmap2, dbs->max_size / 8, hwloc_topology_get_allowed_cpuset(topo), HWLOC_MEMBIND_FIRSTTOUCH, 0);
+#endif
+    } else {
+        memset(dbs->bitmap2, 0, dbs->max_size / 8);
+    }
+
+    // forbid first two positions (index 0 and 1)
+    dbs->bitmap2[0] = 0xc000000000000000LL;
+
+    TOGETHER(llmsset_reset_region);
+}
+
+int
+llmsset_is_marked(const llmsset_t dbs, uint64_t index)
+{
+    volatile uint64_t *ptr = dbs->bitmap2 + (index/64);
+    uint64_t mask = 0x8000000000000000LL >> (index&63);
+    return (*ptr & mask) ? 1 : 0;
+}
+
+int
+llmsset_mark(const llmsset_t dbs, uint64_t index)
+{
+    volatile uint64_t *ptr = dbs->bitmap2 + (index/64);
+    uint64_t mask = 0x8000000000000000LL >> (index&63);
+    for (;;) {
+        uint64_t v = *ptr;
+        if (v & mask) return 0;
+        if (cas(ptr, v, v|mask)) return 1;
+    }
+}
+
+VOID_TASK_3(llmsset_rehash_par, llmsset_t, dbs, size_t, first, size_t, count)
+{
+    if (count > 512) {
+        size_t split = count/2;
+        SPAWN(llmsset_rehash_par, dbs, first, split);
+        CALL(llmsset_rehash_par, dbs, first + split, count - split);
+        SYNC(llmsset_rehash_par);
+    } else {
+        uint64_t *ptr = dbs->bitmap2 + (first / 64);
+        uint64_t mask = 0x8000000000000000LL >> (first & 63);
+        for (size_t k=0; k<count; k++) {
+            if (*ptr & mask) llmsset_rehash_bucket(dbs, first+k);
+            mask >>= 1;
+            if (mask == 0) {
+                ptr++;
+                mask = 0x8000000000000000LL;
+            }
+        }
+    }
+}
+
+VOID_TASK_IMPL_1(llmsset_rehash, llmsset_t, dbs)
+{
+    CALL(llmsset_rehash_par, dbs, 0, dbs->table_size);
+}
+
+TASK_3(size_t, llmsset_count_marked_par, llmsset_t, dbs, size_t, first, size_t, count)
+{
+    if (count > 512) {
+        size_t split = count/2;
+        SPAWN(llmsset_count_marked_par, dbs, first, split);
+        size_t right = CALL(llmsset_count_marked_par, dbs, first + split, count - split);
+        size_t left = SYNC(llmsset_count_marked_par);
+        return left + right;
+    } else {
+        size_t result = 0;
+        uint64_t *ptr = dbs->bitmap2 + (first / 64);
+        if (count == 512) {
+            result += __builtin_popcountll(ptr[0]);
+            result += __builtin_popcountll(ptr[1]);
+            result += __builtin_popcountll(ptr[2]);
+            result += __builtin_popcountll(ptr[3]);
+            result += __builtin_popcountll(ptr[4]);
+            result += __builtin_popcountll(ptr[5]);
+            result += __builtin_popcountll(ptr[6]);
+            result += __builtin_popcountll(ptr[7]);
+        } else {
+            uint64_t mask = 0x8000000000000000LL >> (first & 63);
+            for (size_t k=0; k<count; k++) {
+                if (*ptr & mask) result += 1;
+                mask >>= 1;
+                if (mask == 0) {
+                    ptr++;
+                    mask = 0x8000000000000000LL;
+                }
+            }
+        }
+        return result;
+    }
+}
+
+TASK_IMPL_1(size_t, llmsset_count_marked, llmsset_t, dbs)
+{
+    return CALL(llmsset_count_marked_par, dbs, 0, dbs->table_size);
+}
+
+VOID_TASK_3(llmsset_destroy_par, llmsset_t, dbs, size_t, first, size_t, count)
+{
+    if (count > 1024) {
+        size_t split = count/2;
+        SPAWN(llmsset_destroy_par, dbs, first, split);
+        CALL(llmsset_destroy_par, dbs, first + split, count - split);
+        SYNC(llmsset_destroy_par);
+    } else {
+        for (size_t k=first; k<first+count; k++) {
+            volatile uint64_t *ptr2 = dbs->bitmap2 + (k/64);
+            volatile uint64_t *ptrc = dbs->bitmapc + (k/64);
+            uint64_t mask = 0x8000000000000000LL >> (k&63);
+
+            // if not marked but is custom
+            if ((*ptr2 & mask) == 0 && (*ptrc & mask)) {
+                uint64_t *d_ptr = ((uint64_t*)dbs->data) + 2*k;
+                dbs->destroy_cb(d_ptr[0], d_ptr[1]);
+                *ptrc &= ~mask;
+            }
+        }
+    }
+}
+
+VOID_TASK_IMPL_1(llmsset_destroy_unmarked, llmsset_t, dbs)
+{
+    if (dbs->destroy_cb == NULL) return; // no custom function
+    CALL(llmsset_destroy_par, dbs, 0, dbs->table_size);
+}
+
+/**
+ * Set custom functions
+ */
+void llmsset_set_custom(const llmsset_t dbs, llmsset_hash_cb hash_cb, llmsset_equals_cb equals_cb, llmsset_create_cb create_cb, llmsset_destroy_cb destroy_cb)
+{
+    dbs->hash_cb = hash_cb;
+    dbs->equals_cb = equals_cb;
+    dbs->create_cb = create_cb;
+    dbs->destroy_cb = destroy_cb;
+}
diff --git a/src/llmsset.h b/src/llmsset.h
new file mode 100644
index 000000000..84c55e23b
--- /dev/null
+++ b/src/llmsset.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <lace.h>
+
+#ifndef LLMSSET_H
+#define LLMSSET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef LLMSSET_MASK
+#define LLMSSET_MASK 0 // set to 1 to use bit mask instead of modulo
+#endif
+
+/**
+ * Lockless hash table (set) to store 16-byte keys.
+ * Each unique key is associated with a 42-bit number.
+ *
+ * The set has support for stop-the-world garbage collection.
+ * Methods llmsset_clear, llmsset_mark and llmsset_rehash implement garbage collection.
+ * During their execution, llmsset_lookup is not allowed.
+ */
+
+/**
+ * hash(a, b, seed)
+ * equals(lhs_a, lhs_b, rhs_a, rhs_b)
+ * create(a, b) -- with a,b pointers, allows changing pointers on create of node,
+ *                 but must keep hash/equals same!
+ * destroy(a, b)
+ */
+typedef uint64_t (*llmsset_hash_cb)(uint64_t, uint64_t, uint64_t);
+typedef int (*llmsset_equals_cb)(uint64_t, uint64_t, uint64_t, uint64_t);
+typedef void (*llmsset_create_cb)(uint64_t *, uint64_t *);
+typedef void (*llmsset_destroy_cb)(uint64_t, uint64_t);
+
+typedef struct llmsset
+{
+    uint64_t          *table;       // table with hashes
+    uint8_t           *data;        // table with values
+    uint64_t          *bitmap1;     // ownership bitmap (per 512 buckets)
+    uint64_t          *bitmap2;     // bitmap for "contains data"
+    uint64_t          *bitmapc;     // bitmap for "use custom functions"
+    size_t            max_size;     // maximum size of the hash table (for resizing)
+    size_t            table_size;   // size of the hash table (number of slots) --> power of 2!
+#if LLMSSET_MASK
+    size_t            mask;         // size-1
+#endif
+    size_t            f_size;
+    llmsset_hash_cb   hash_cb;      // custom hash function
+    llmsset_equals_cb equals_cb;    // custom equals function
+    llmsset_create_cb create_cb;    // custom create function
+    llmsset_destroy_cb destroy_cb;  // custom destroy function
+    int16_t           threshold;    // number of iterations for insertion until returning error
+} *llmsset_t;
+
+/**
+ * Retrieve a pointer to the data associated with the 42-bit value.
+ */
+static inline void*
+llmsset_index_to_ptr(const llmsset_t dbs, size_t index)
+{
+    return dbs->data + index * 16;
+}
+
+/**
+ * Create the set.
+ * This will allocate a set of <max_size> buckets in virtual memory.
+ * The actual space used is <initial_size> buckets.
+ */
+llmsset_t llmsset_create(size_t initial_size, size_t max_size);
+
+/**
+ * Free the set.
+ */
+void llmsset_free(llmsset_t dbs);
+
+/**
+ * Retrieve the maximum size of the set.
+ */
+static inline size_t
+llmsset_get_max_size(const llmsset_t dbs)
+{
+    return dbs->max_size;
+}
+
+/**
+ * Retrieve the current size of the lockless MS set.
+ */
+static inline size_t
+llmsset_get_size(const llmsset_t dbs)
+{
+    return dbs->table_size;
+}
+
+/**
+ * Set the table size of the set.
+ * Typically called during garbage collection, after clear and before rehash.
+ * Returns 0 if dbs->table_size > dbs->max_size!
+ */
+static inline void
+llmsset_set_size(llmsset_t dbs, size_t size)
+{
+    /* check bounds (don't be rediculous) */
+    if (size > 128 && size <= dbs->max_size) {
+        dbs->table_size = size;
+#if LLMSSET_MASK
+        /* Warning: if size is not a power of two, you will get interesting behavior */
+        dbs->mask = dbs->table_size - 1;
+#endif
+        dbs->threshold = (64 - __builtin_clzll(dbs->table_size)) + 4; // doubling table_size increases threshold by 1
+    }
+}
+
+/**
+ * Core function: find existing data or add new.
+ * Returns the unique 42-bit value associated with the data, or 0 when table is full.
+ * Also, this value will never equal 0 or 1.
+ * Note: garbage collection during lookup strictly forbidden
+ */
+uint64_t llmsset_lookup(const llmsset_t dbs, const uint64_t a, const uint64_t b, int *created);
+
+/**
+ * Same as lookup, but use the custom functions
+ */
+uint64_t llmsset_lookupc(const llmsset_t dbs, const uint64_t a, const uint64_t b, int *created);
+
+/**
+ * To perform garbage collection, the user is responsible that no lookups are performed during the process.
+ *
+ * 1) call llmsset_clear 
+ * 2) call llmsset_mark for every bucket to rehash
+ * 3) call llmsset_rehash 
+ */
+VOID_TASK_DECL_1(llmsset_clear, llmsset_t);
+#define llmsset_clear(dbs) CALL(llmsset_clear, dbs)
+
+/**
+ * Check if a certain data bucket is marked (in use).
+ */
+int llmsset_is_marked(const llmsset_t dbs, uint64_t index);
+
+/**
+ * During garbage collection, buckets are marked (for rehashing) with this function.
+ * Returns 0 if the node was already marked, or non-zero if it was not marked.
+ * May also return non-zero if multiple workers marked at the same time.
+ */
+int llmsset_mark(const llmsset_t dbs, uint64_t index);
+
+/**
+ * Rehash all marked buckets.
+ */
+VOID_TASK_DECL_1(llmsset_rehash, llmsset_t);
+#define llmsset_rehash(dbs) CALL(llmsset_rehash, dbs)
+
+/**
+ * Retrieve number of marked buckets.
+ */
+TASK_DECL_1(size_t, llmsset_count_marked, llmsset_t);
+#define llmsset_count_marked(dbs) CALL(llmsset_count_marked, dbs)
+
+/**
+ * During garbage collection, this method calls the destroy callback
+ * for all 'custom' data that is not kept.
+ */
+VOID_TASK_DECL_1(llmsset_destroy_unmarked, llmsset_t);
+#define llmsset_destroy_unmarked(dbs) CALL(llmsset_destroy_unmarked, dbs)
+
+/**
+ * Set custom functions
+ */
+void llmsset_set_custom(const llmsset_t dbs, llmsset_hash_cb hash_cb, llmsset_equals_cb equals_cb, llmsset_create_cb create_cb, llmsset_destroy_cb destroy_cb);
+
+/**
+ * Default hashing function
+ */
+uint64_t llmsset_hash(const uint64_t a, const uint64_t b, const uint64_t seed);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+
+#endif
diff --git a/src/refs.c b/src/refs.c
new file mode 100644
index 000000000..0d6963bfe
--- /dev/null
+++ b/src/refs.c
@@ -0,0 +1,598 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <assert.h> // for assert
+#include <errno.h>  // for errno
+#include <stdio.h>  // for fprintf
+#include <stdint.h> // for uint32_t etc
+#include <stdlib.h> // for exit
+#include <string.h> // for strerror
+#include <sys/mman.h> // for mmap
+
+#include <refs.h>
+
+#ifndef compiler_barrier
+#define compiler_barrier() { asm volatile("" ::: "memory"); }
+#endif
+
+#ifndef cas
+#define cas(ptr, old, new) (__sync_bool_compare_and_swap((ptr),(old),(new)))
+#endif
+
+/**
+ * Implementation of external references
+ * Based on a hash table for 40-bit non-null values, linear probing
+ * Use tombstones for deleting, higher bits for reference count
+ */
+static const uint64_t refs_ts = 0x7fffffffffffffff; // tombstone
+
+/* FNV-1a 64-bit hash */
+static inline uint64_t
+fnv_hash(uint64_t a)
+{
+    const uint64_t prime = 1099511628211;
+    uint64_t hash = 14695981039346656037LLU;
+    hash = (hash ^ a) * prime;
+    hash = (hash ^ ((a << 25) | (a >> 39))) * prime;
+    return hash ^ (hash >> 32);
+}
+
+// Count number of unique entries (not number of references)
+size_t
+refs_count(refs_table_t *tbl)
+{
+    size_t count = 0;
+    uint64_t *bucket = tbl->refs_table;
+    uint64_t * const end = bucket + tbl->refs_size;
+    while (bucket != end) {
+        if (*bucket != 0 && *bucket != refs_ts) count++;
+        bucket++;
+    }
+    return count;
+}
+
+static inline void
+refs_rehash(refs_table_t *tbl, uint64_t v)
+{
+    if (v == 0) return; // do not rehash empty value
+    if (v == refs_ts) return; // do not rehash tombstone
+
+    volatile uint64_t *bucket = tbl->refs_table + (fnv_hash(v & 0x000000ffffffffff) % tbl->refs_size);
+    uint64_t * const end = tbl->refs_table + tbl->refs_size;
+
+    int i = 128; // try 128 times linear probing
+    while (i--) {
+        if (*bucket == 0) { if (cas(bucket, 0, v)) return; }
+        if (++bucket == end) bucket = tbl->refs_table;
+    }
+
+    // assert(0); // impossible!
+}
+
+/**
+ * Called internally to assist resize operations
+ * Returns 1 for retry, 0 for done
+ */
+static int
+refs_resize_help(refs_table_t *tbl)
+{
+    if (0 == (tbl->refs_control & 0xf0000000)) return 0; // no resize in progress (anymore)
+    if (tbl->refs_control & 0x80000000) return 1; // still waiting for preparation
+
+    if (tbl->refs_resize_part >= tbl->refs_resize_size / 128) return 1; // all parts claimed
+    size_t part = __sync_fetch_and_add(&tbl->refs_resize_part, 1);
+    if (part >= tbl->refs_resize_size/128) return 1; // all parts claimed
+
+    // rehash all
+    int i;
+    volatile uint64_t *bucket = tbl->refs_resize_table + part * 128;
+    for (i=0; i<128; i++) refs_rehash(tbl, *bucket++);
+
+    __sync_fetch_and_add(&tbl->refs_resize_done, 1);
+    return 1;
+}
+
+static void
+refs_resize(refs_table_t *tbl)
+{
+    while (1) {
+        uint32_t v = tbl->refs_control;
+        if (v & 0xf0000000) {
+            // someone else started resize
+            // just rehash blocks until done
+            while (refs_resize_help(tbl)) continue;
+            return;
+        }
+        if (cas(&tbl->refs_control, v, 0x80000000 | v)) {
+            // wait until all users gone
+            while (tbl->refs_control != 0x80000000) continue;
+            break;
+        }
+    }
+
+    tbl->refs_resize_table = tbl->refs_table;
+    tbl->refs_resize_size = tbl->refs_size;
+    tbl->refs_resize_part = 0;
+    tbl->refs_resize_done = 0;
+
+    // calculate new size
+    size_t new_size = tbl->refs_size;
+    size_t count = refs_count(tbl);
+    if (count*4 > tbl->refs_size) new_size *= 2;
+
+    // allocate new table
+    uint64_t *new_table = (uint64_t*)mmap(0, new_size * sizeof(uint64_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (new_table == (uint64_t*)-1) {
+        fprintf(stderr, "refs: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+
+    // set new data and go
+    tbl->refs_table = new_table;
+    tbl->refs_size = new_size;
+    compiler_barrier();
+    tbl->refs_control = 0x40000000;
+
+    // until all parts are done, rehash blocks
+    while (tbl->refs_resize_done != tbl->refs_resize_size/128) refs_resize_help(tbl);
+
+    // done!
+    compiler_barrier();
+    tbl->refs_control = 0;
+
+    // unmap old table
+    munmap(tbl->refs_resize_table, tbl->refs_resize_size * sizeof(uint64_t));
+}
+
+/* Enter refs_modify */
+static inline void
+refs_enter(refs_table_t *tbl)
+{
+    for (;;) {
+        uint32_t v = tbl->refs_control;
+        if (v & 0xf0000000) {
+            while (refs_resize_help(tbl)) continue;
+        } else {
+            if (cas(&tbl->refs_control, v, v+1)) return;
+        }
+    }
+}
+
+/* Leave refs_modify */
+static inline void
+refs_leave(refs_table_t *tbl)
+{
+    for (;;) {
+        uint32_t v = tbl->refs_control;
+        if (cas(&tbl->refs_control, v, v-1)) return;
+    }
+}
+
+static inline int
+refs_modify(refs_table_t *tbl, const uint64_t a, const int dir)
+{
+    volatile uint64_t *bucket;
+    volatile uint64_t *ts_bucket;
+    uint64_t v, new_v;
+    int res, i;
+
+    refs_enter(tbl);
+
+ref_retry:
+    bucket = tbl->refs_table + (fnv_hash(a) & (tbl->refs_size - 1));
+    ts_bucket = NULL; // tombstone
+    i = 128; // try 128 times linear probing
+
+    while (i--) {
+ref_restart:
+        v = *bucket;
+        if (v == refs_ts) {
+            if (ts_bucket == NULL) ts_bucket = bucket;
+        } else if (v == 0) {
+            // not found
+            res = 0;
+            if (dir < 0) goto ref_exit;
+            if (ts_bucket != NULL) {
+                bucket = ts_bucket;
+                ts_bucket = NULL;
+                v = refs_ts;
+            }
+            new_v = a | (1ULL << 40);
+            goto ref_mod;
+        } else if ((v & 0x000000ffffffffff) == a) {
+            // found
+            res = 1;
+            uint64_t count = v >> 40;
+            if (count == 0x7fffff) goto ref_exit;
+            count += dir;
+            if (count == 0) new_v = refs_ts;
+            else new_v = a | (count << 40);
+            goto ref_mod;
+        }
+
+        if (++bucket == tbl->refs_table + tbl->refs_size) bucket = tbl->refs_table;
+    }
+
+    // not found after linear probing
+    if (dir < 0) {
+        res = 0;
+        goto ref_exit;
+    } else if (ts_bucket != NULL) {
+        bucket = ts_bucket;
+        ts_bucket = NULL;
+        v = refs_ts;
+        new_v = a | (1ULL << 40);
+        if (!cas(bucket, v, new_v)) goto ref_retry;
+        res = 1;
+        goto ref_exit;
+    } else {
+        // hash table full
+        refs_leave(tbl);
+        refs_resize(tbl);
+        return refs_modify(tbl, a, dir);
+    }
+
+ref_mod:
+    if (!cas(bucket, v, new_v)) goto ref_restart;
+
+ref_exit:
+    refs_leave(tbl);
+    return res;
+}
+
+void
+refs_up(refs_table_t *tbl, uint64_t a)
+{
+    refs_modify(tbl, a, 1);
+}
+
+void
+refs_down(refs_table_t *tbl, uint64_t a)
+{
+#ifdef NDEBUG
+    refs_modify(tbl, a, -1);
+#else
+    int res = refs_modify(tbl, a, -1);
+    assert(res != 0);
+#endif
+}
+
+uint64_t*
+refs_iter(refs_table_t *tbl, size_t first, size_t end)
+{
+    // assert(first < tbl->refs_size);
+    // assert(end <= tbl->refs_size);
+
+    uint64_t *bucket = tbl->refs_table + first;
+    while (bucket != tbl->refs_table + end) {
+        if (*bucket != 0 && *bucket != refs_ts) return bucket;
+        bucket++;
+    }
+    return NULL;
+}
+
+uint64_t
+refs_next(refs_table_t *tbl, uint64_t **_bucket, size_t end)
+{
+    uint64_t *bucket = *_bucket;
+    // assert(bucket != NULL);
+    // assert(end <= tbl->refs_size);
+    uint64_t result = *bucket & 0x000000ffffffffff;
+    bucket++;
+    while (bucket != tbl->refs_table + end) {
+        if (*bucket != 0 && *bucket != refs_ts) {
+            *_bucket = bucket;
+            return result;
+        }
+        bucket++;
+    }
+    *_bucket = NULL;
+    return result;
+}
+
+void
+refs_create(refs_table_t *tbl, size_t _refs_size)
+{
+    if (__builtin_popcountll(_refs_size) != 1) {
+        fprintf(stderr, "refs: Table size must be a power of 2!\n");
+        exit(1);
+    }
+
+    tbl->refs_size = _refs_size;
+    tbl->refs_table = (uint64_t*)mmap(0, tbl->refs_size * sizeof(uint64_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (tbl->refs_table == (uint64_t*)-1) {
+        fprintf(stderr, "refs: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+}
+
+void
+refs_free(refs_table_t *tbl)
+{
+    munmap(tbl->refs_table, tbl->refs_size * sizeof(uint64_t));
+}
+
+/**
+ * Simple implementation of a 64-bit resizable hash-table
+ * No idea if this is scalable... :( but it seems thread-safe
+ */
+
+// Count number of unique entries (not number of references)
+size_t
+protect_count(refs_table_t *tbl)
+{
+    size_t count = 0;
+    uint64_t *bucket = tbl->refs_table;
+    uint64_t * const end = bucket + tbl->refs_size;
+    while (bucket != end) {
+        if (*bucket != 0 && *bucket != refs_ts) count++;
+        bucket++;
+    }
+    return count;
+}
+
+static inline void
+protect_rehash(refs_table_t *tbl, uint64_t v)
+{
+    if (v == 0) return; // do not rehash empty value
+    if (v == refs_ts) return; // do not rehash tombstone
+
+    volatile uint64_t *bucket = tbl->refs_table + (fnv_hash(v) % tbl->refs_size);
+    uint64_t * const end = tbl->refs_table + tbl->refs_size;
+
+    int i = 128; // try 128 times linear probing
+    while (i--) {
+        if (*bucket == 0 && cas(bucket, 0, v)) return;
+        if (++bucket == end) bucket = tbl->refs_table;
+    }
+
+    assert(0); // whoops!
+}
+
+/**
+ * Called internally to assist resize operations
+ * Returns 1 for retry, 0 for done
+ */
+static int
+protect_resize_help(refs_table_t *tbl)
+{
+    if (0 == (tbl->refs_control & 0xf0000000)) return 0; // no resize in progress (anymore)
+    if (tbl->refs_control & 0x80000000) return 1; // still waiting for preparation
+    if (tbl->refs_resize_part >= tbl->refs_resize_size / 128) return 1; // all parts claimed
+    size_t part = __sync_fetch_and_add(&tbl->refs_resize_part, 1);
+    if (part >= tbl->refs_resize_size/128) return 1; // all parts claimed
+
+    // rehash all
+    int i;
+    volatile uint64_t *bucket = tbl->refs_resize_table + part * 128;
+    for (i=0; i<128; i++) protect_rehash(tbl, *bucket++);
+
+    __sync_fetch_and_add(&tbl->refs_resize_done, 1);
+    return 1;
+}
+
+static void
+protect_resize(refs_table_t *tbl)
+{
+    while (1) {
+        uint32_t v = tbl->refs_control;
+        if (v & 0xf0000000) {
+            // someone else started resize
+            // just rehash blocks until done
+            while (protect_resize_help(tbl)) continue;
+            return;
+        }
+        if (cas(&tbl->refs_control, v, 0x80000000 | v)) {
+            // wait until all users gone
+            while (tbl->refs_control != 0x80000000) continue;
+            break;
+        }
+    }
+
+    tbl->refs_resize_table = tbl->refs_table;
+    tbl->refs_resize_size = tbl->refs_size;
+    tbl->refs_resize_part = 0;
+    tbl->refs_resize_done = 0;
+
+    // calculate new size
+    size_t new_size = tbl->refs_size;
+    size_t count = refs_count(tbl);
+    if (count*4 > tbl->refs_size) new_size *= 2;
+
+    // allocate new table
+    uint64_t *new_table = (uint64_t*)mmap(0, new_size * sizeof(uint64_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (new_table == (uint64_t*)-1) {
+        fprintf(stderr, "refs: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+
+    // set new data and go
+    tbl->refs_table = new_table;
+    tbl->refs_size = new_size;
+    compiler_barrier();
+    tbl->refs_control = 0x40000000;
+
+    // until all parts are done, rehash blocks
+    while (tbl->refs_resize_done < tbl->refs_resize_size/128) protect_resize_help(tbl);
+
+    // done!
+    compiler_barrier();
+    tbl->refs_control = 0;
+
+    // unmap old table
+    munmap(tbl->refs_resize_table, tbl->refs_resize_size * sizeof(uint64_t));
+}
+
+static inline void
+protect_enter(refs_table_t *tbl)
+{
+    for (;;) {
+        uint32_t v = tbl->refs_control;
+        if (v & 0xf0000000) {
+            while (protect_resize_help(tbl)) continue;
+        } else {
+            if (cas(&tbl->refs_control, v, v+1)) return;
+        }
+    }
+}
+
+static inline void
+protect_leave(refs_table_t *tbl)
+{
+    for (;;) {
+        uint32_t v = tbl->refs_control;
+        if (cas(&tbl->refs_control, v, v-1)) return;
+    }
+}
+
+void
+protect_up(refs_table_t *tbl, uint64_t a)
+{
+    volatile uint64_t *bucket;
+    volatile uint64_t *ts_bucket;
+    uint64_t v;
+    int i;
+
+    protect_enter(tbl);
+
+ref_retry:
+    bucket = tbl->refs_table + (fnv_hash(a) & (tbl->refs_size - 1));
+    ts_bucket = NULL; // tombstone
+    i = 128; // try 128 times linear probing
+
+    while (i--) {
+ref_restart:
+        v = *bucket;
+        if (v == refs_ts) {
+            if (ts_bucket == NULL) ts_bucket = bucket;
+        } else if (v == 0) {
+            // go go go
+            if (ts_bucket != NULL) {
+                if (cas(ts_bucket, refs_ts, a)) {
+                    protect_leave(tbl);
+                    return;
+                } else {
+                    goto ref_retry;
+                }
+            } else {
+                if (cas(bucket, 0, a)) {
+                    protect_leave(tbl);
+                    return;
+                } else {
+                    goto ref_restart;
+                }
+            }
+        }
+
+        if (++bucket == tbl->refs_table + tbl->refs_size) bucket = tbl->refs_table;
+    }
+
+    // not found after linear probing
+    if (ts_bucket != NULL) {
+        if (cas(ts_bucket, refs_ts, a)) {
+            protect_leave(tbl);
+            return;
+        } else {
+            goto ref_retry;
+        }
+    } else {
+        // hash table full
+        protect_leave(tbl);
+        protect_resize(tbl);
+        protect_enter(tbl);
+        goto ref_retry;
+    }
+}
+
+void
+protect_down(refs_table_t *tbl, uint64_t a)
+{
+    volatile uint64_t *bucket;
+    protect_enter(tbl);
+
+    bucket = tbl->refs_table + (fnv_hash(a) & (tbl->refs_size - 1));
+    int i = 128; // try 128 times linear probing
+
+    while (i--) {
+        if (*bucket == a) {
+            *bucket = refs_ts;
+            protect_leave(tbl);
+            return;
+        }
+        if (++bucket == tbl->refs_table + tbl->refs_size) bucket = tbl->refs_table;
+    }
+
+    // not found after linear probing
+    assert(0);
+}
+
+uint64_t*
+protect_iter(refs_table_t *tbl, size_t first, size_t end)
+{
+    // assert(first < tbl->refs_size);
+    // assert(end <= tbl->refs_size);
+
+    uint64_t *bucket = tbl->refs_table + first;
+    while (bucket != tbl->refs_table + end) {
+        if (*bucket != 0 && *bucket != refs_ts) return bucket;
+        bucket++;
+    }
+    return NULL;
+}
+
+uint64_t
+protect_next(refs_table_t *tbl, uint64_t **_bucket, size_t end)
+{
+    uint64_t *bucket = *_bucket;
+    // assert(bucket != NULL);
+    // assert(end <= tbl->refs_size);
+    uint64_t result = *bucket;
+    bucket++;
+    while (bucket != tbl->refs_table + end) {
+        if (*bucket != 0 && *bucket != refs_ts) {
+            *_bucket = bucket;
+            return result;
+        }
+        bucket++;
+    }
+    *_bucket = NULL;
+    return result;
+}
+
+void
+protect_create(refs_table_t *tbl, size_t _refs_size)
+{
+    if (__builtin_popcountll(_refs_size) != 1) {
+        fprintf(stderr, "refs: Table size must be a power of 2!\n");
+        exit(1);
+    }
+
+    tbl->refs_size = _refs_size;
+    tbl->refs_table = (uint64_t*)mmap(0, tbl->refs_size * sizeof(uint64_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (tbl->refs_table == (uint64_t*)-1) {
+        fprintf(stderr, "refs: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+}
+
+void
+protect_free(refs_table_t *tbl)
+{
+    munmap(tbl->refs_table, tbl->refs_size * sizeof(uint64_t));
+    tbl->refs_table = 0;
+}
diff --git a/src/refs.h b/src/refs.h
new file mode 100644
index 000000000..928948c90
--- /dev/null
+++ b/src/refs.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+#include <stdint.h> // for uint32_t etc
+
+#ifndef REFS_INLINE_H
+#define REFS_INLINE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * Implementation of external references
+ * Based on a hash table for 40-bit non-null values, linear probing
+ * Use tombstones for deleting, higher bits for reference count
+ */
+typedef struct
+{
+    uint64_t *refs_table;           // table itself
+    size_t refs_size;               // number of buckets
+
+    /* helpers during resize operation */
+    volatile uint32_t refs_control; // control field
+    uint64_t *refs_resize_table;    // previous table
+    size_t refs_resize_size;        // size of previous table
+    size_t refs_resize_part;        // which part is next
+    size_t refs_resize_done;        // how many parts are done
+} refs_table_t;
+
+// Count number of unique entries (not number of references)
+size_t refs_count(refs_table_t *tbl);
+
+// Increase or decrease reference to 40-bit value a
+// Will fail (assertion) if more down than up are called for a
+void refs_up(refs_table_t *tbl, uint64_t a);
+void refs_down(refs_table_t *tbl, uint64_t a);
+
+// Return a bucket or NULL to start iterating
+uint64_t *refs_iter(refs_table_t *tbl, size_t first, size_t end);
+
+// Continue iterating, set bucket to next bucket or NULL
+uint64_t refs_next(refs_table_t *tbl, uint64_t **bucket, size_t end);
+
+// User must supply a pointer, refs_create and refs_free handle initialization/destruction
+void refs_create(refs_table_t *tbl, size_t _refs_size);
+void refs_free(refs_table_t *tbl);
+
+// The same, but now for 64-bit values ("protect pointers")
+size_t protect_count(refs_table_t *tbl);
+void protect_up(refs_table_t *tbl, uint64_t a);
+void protect_down(refs_table_t *tbl, uint64_t a);
+uint64_t *protect_iter(refs_table_t *tbl, size_t first, size_t end);
+uint64_t protect_next(refs_table_t *tbl, uint64_t **bucket, size_t end);
+void protect_create(refs_table_t *tbl, size_t _refs_size);
+void protect_free(refs_table_t *tbl);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+
+#endif
diff --git a/src/sha2.c b/src/sha2.c
new file mode 100644
index 000000000..db17dbb2d
--- /dev/null
+++ b/src/sha2.c
@@ -0,0 +1,1067 @@
+/*
+ * FILE:	sha2.c
+ * AUTHOR:	Aaron D. Gifford - http://www.aarongifford.com/
+ * 
+ * Copyright (c) 2000-2001, Aaron D. Gifford
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>	/* memcpy()/memset() or bcopy()/bzero() */
+#include <assert.h>	/* assert() */
+#include "sha2.h"
+
+/*
+ * ASSERT NOTE:
+ * Some sanity checking code is included using assert().  On my FreeBSD
+ * system, this additional code can be removed by compiling with NDEBUG
+ * defined.  Check your own systems manpage on assert() to see how to
+ * compile WITHOUT the sanity checking code on your system.
+ *
+ * UNROLLED TRANSFORM LOOP NOTE:
+ * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform
+ * loop version for the hash transform rounds (defined using macros
+ * later in this file).  Either define on the command line, for example:
+ *
+ *   cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c
+ *
+ * or define below:
+ *
+ *   #define SHA2_UNROLL_TRANSFORM
+ *
+ */
+
+
+/*** SHA-256/384/512 Machine Architecture Definitions *****************/
+/*
+ * BYTE_ORDER NOTE:
+ *
+ * Please make sure that your system defines BYTE_ORDER.  If your
+ * architecture is little-endian, make sure it also defines
+ * LITTLE_ENDIAN and that the two (BYTE_ORDER and LITTLE_ENDIAN) are
+ * equivilent.
+ *
+ * If your system does not define the above, then you can do so by
+ * hand like this:
+ *
+ *   #define LITTLE_ENDIAN 1234
+ *   #define BIG_ENDIAN    4321
+ *
+ * And for little-endian machines, add:
+ *
+ *   #define BYTE_ORDER LITTLE_ENDIAN 
+ *
+ * Or for big-endian machines:
+ *
+ *   #define BYTE_ORDER BIG_ENDIAN
+ *
+ * The FreeBSD machine this was written on defines BYTE_ORDER
+ * appropriately by including <sys/types.h> (which in turn includes
+ * <machine/endian.h> where the appropriate definitions are actually
+ * made).
+ */
+#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN)
+#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN
+#endif
+
+/*
+ * Define the followingsha2_* types to types of the correct length on
+ * the native archtecture.   Most BSD systems and Linux define u_intXX_t
+ * types.  Machines with very recent ANSI C headers, can use the
+ * uintXX_t definintions from inttypes.h by defining SHA2_USE_INTTYPES_H
+ * during compile or in the sha.h header file.
+ *
+ * Machines that support neither u_intXX_t nor inttypes.h's uintXX_t
+ * will need to define these three typedefs below (and the appropriate
+ * ones in sha.h too) by hand according to their system architecture.
+ *
+ * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t
+ * types and pointing out recent ANSI C support for uintXX_t in inttypes.h.
+ */
+#ifdef SHA2_USE_INTTYPES_H
+
+typedef uint8_t  sha2_byte;	/* Exactly 1 byte */
+typedef uint32_t sha2_word32;	/* Exactly 4 bytes */
+typedef uint64_t sha2_word64;	/* Exactly 8 bytes */
+
+#else /* SHA2_USE_INTTYPES_H */
+
+typedef u_int8_t  sha2_byte;	/* Exactly 1 byte */
+typedef u_int32_t sha2_word32;	/* Exactly 4 bytes */
+typedef u_int64_t sha2_word64;	/* Exactly 8 bytes */
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+/* NOTE: Most of these are in sha2.h */
+#define SHA256_SHORT_BLOCK_LENGTH	(SHA256_BLOCK_LENGTH - 8)
+#define SHA384_SHORT_BLOCK_LENGTH	(SHA384_BLOCK_LENGTH - 16)
+#define SHA512_SHORT_BLOCK_LENGTH	(SHA512_BLOCK_LENGTH - 16)
+
+
+/*** ENDIAN REVERSAL MACROS *******************************************/
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define REVERSE32(w,x)	{ \
+	sha2_word32 tmp = (w); \
+	tmp = (tmp >> 16) | (tmp << 16); \
+	(x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \
+}
+#define REVERSE64(w,x)	{ \
+	sha2_word64 tmp = (w); \
+	tmp = (tmp >> 32) | (tmp << 32); \
+	tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \
+	      ((tmp & 0x00ff00ff00ff00ffULL) << 8); \
+	(x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \
+	      ((tmp & 0x0000ffff0000ffffULL) << 16); \
+}
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+
+/*
+ * Macro for incrementally adding the unsigned 64-bit integer n to the
+ * unsigned 128-bit integer (represented using a two-element array of
+ * 64-bit words):
+ */
+#define ADDINC128(w,n)	{ \
+	(w)[0] += (sha2_word64)(n); \
+	if ((w)[0] < (n)) { \
+		(w)[1]++; \
+	} \
+}
+
+/*
+ * Macros for copying blocks of memory and for zeroing out ranges
+ * of memory.  Using these macros makes it easy to switch from
+ * using memset()/memcpy() and using bzero()/bcopy().
+ *
+ * Please define either SHA2_USE_MEMSET_MEMCPY or define
+ * SHA2_USE_BZERO_BCOPY depending on which function set you
+ * choose to use:
+ */
+#if !defined(SHA2_USE_MEMSET_MEMCPY) && !defined(SHA2_USE_BZERO_BCOPY)
+/* Default to memset()/memcpy() if no option is specified */
+#define	SHA2_USE_MEMSET_MEMCPY	1
+#endif
+#if defined(SHA2_USE_MEMSET_MEMCPY) && defined(SHA2_USE_BZERO_BCOPY)
+/* Abort with an error if BOTH options are defined */
+#error Define either SHA2_USE_MEMSET_MEMCPY or SHA2_USE_BZERO_BCOPY, not both!
+#endif
+
+#ifdef SHA2_USE_MEMSET_MEMCPY
+#define MEMSET_BZERO(p,l)	memset((p), 0, (l))
+#define MEMCPY_BCOPY(d,s,l)	memcpy((d), (s), (l))
+#endif
+#ifdef SHA2_USE_BZERO_BCOPY
+#define MEMSET_BZERO(p,l)	bzero((p), (l))
+#define MEMCPY_BCOPY(d,s,l)	bcopy((s), (d), (l))
+#endif
+
+
+/*** THE SIX LOGICAL FUNCTIONS ****************************************/
+/*
+ * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
+ *
+ *   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
+ *   S is a ROTATION) because the SHA-256/384/512 description document
+ *   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
+ *   same "backwards" definition.
+ */
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+#define R(b,x) 		((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+#define S32(b,x)	(((x) >> (b)) | ((x) << (32 - (b))))
+/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
+#define S64(b,x)	(((x) >> (b)) | ((x) << (64 - (b))))
+
+/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
+#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+/* Four of six logical functions used in SHA-256: */
+#define Sigma0_256(x)	(S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+#define Sigma1_256(x)	(S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+#define sigma0_256(x)	(S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
+#define sigma1_256(x)	(S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
+
+/* Four of six logical functions used in SHA-384 and SHA-512: */
+#define Sigma0_512(x)	(S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
+#define Sigma1_512(x)	(S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
+#define sigma0_512(x)	(S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7,   (x)))
+#define sigma1_512(x)	(S64(19, (x)) ^ S64(61, (x)) ^ R( 6,   (x)))
+
+/*** INTERNAL FUNCTION PROTOTYPES *************************************/
+/* NOTE: These should not be accessed directly from outside this
+ * library -- they are intended for private internal visibility/use
+ * only.
+ */
+void SHA512_Last(SHA512_CTX*);
+void SHA256_Transform(SHA256_CTX*, const sha2_word32*);
+void SHA512_Transform(SHA512_CTX*, const sha2_word64*);
+
+
+/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
+/* Hash constant words K for SHA-256: */
+static const sha2_word32 K256[64] = {
+	0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
+	0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+	0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
+	0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+	0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+	0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+	0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
+	0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+	0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
+	0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+	0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
+	0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+	0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
+	0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+	0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+	0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Initial hash value H for SHA-256: */
+static const sha2_word32 sha256_initial_hash_value[8] = {
+	0x6a09e667UL,
+	0xbb67ae85UL,
+	0x3c6ef372UL,
+	0xa54ff53aUL,
+	0x510e527fUL,
+	0x9b05688cUL,
+	0x1f83d9abUL,
+	0x5be0cd19UL
+};
+
+/* Hash constant words K for SHA-384 and SHA-512: */
+static const sha2_word64 K512[80] = {
+	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Initial hash value H for SHA-384 */
+static const sha2_word64 sha384_initial_hash_value[8] = {
+	0xcbbb9d5dc1059ed8ULL,
+	0x629a292a367cd507ULL,
+	0x9159015a3070dd17ULL,
+	0x152fecd8f70e5939ULL,
+	0x67332667ffc00b31ULL,
+	0x8eb44a8768581511ULL,
+	0xdb0c2e0d64f98fa7ULL,
+	0x47b5481dbefa4fa4ULL
+};
+
+/* Initial hash value H for SHA-512 */
+static const sha2_word64 sha512_initial_hash_value[8] = {
+	0x6a09e667f3bcc908ULL,
+	0xbb67ae8584caa73bULL,
+	0x3c6ef372fe94f82bULL,
+	0xa54ff53a5f1d36f1ULL,
+	0x510e527fade682d1ULL,
+	0x9b05688c2b3e6c1fULL,
+	0x1f83d9abfb41bd6bULL,
+	0x5be0cd19137e2179ULL
+};
+
+/*
+ * Constant used by SHA256/384/512_End() functions for converting the
+ * digest to a readable hexadecimal character string:
+ */
+static const char *sha2_hex_digits = "0123456789abcdef";
+
+
+/*** SHA-256: *********************************************************/
+void SHA256_Init(SHA256_CTX* context) {
+	if (context == (SHA256_CTX*)0) {
+		return;
+	}
+	MEMCPY_BCOPY(context->state, sha256_initial_hash_value, SHA256_DIGEST_LENGTH);
+	MEMSET_BZERO(context->buffer, SHA256_BLOCK_LENGTH);
+	context->bitcount = 0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-256 round macros: */
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)	\
+	REVERSE32(*data++, W256[j]); \
+	T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
+             K256[j] + W256[j]; \
+	(d) += T1; \
+	(h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+	j++
+
+
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)	\
+	T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
+	     K256[j] + (W256[j] = *data++); \
+	(d) += T1; \
+	(h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+	j++
+
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+
+#define ROUND256(a,b,c,d,e,f,g,h)	\
+	s0 = W256[(j+1)&0x0f]; \
+	s0 = sigma0_256(s0); \
+	s1 = W256[(j+14)&0x0f]; \
+	s1 = sigma1_256(s1); \
+	T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \
+	     (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
+	(d) += T1; \
+	(h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+	j++
+
+void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
+	sha2_word32	a, b, c, d, e, f, g, h, s0, s1;
+	sha2_word32	T1, *W256;
+	int		j;
+
+	W256 = (sha2_word32*)context->buffer;
+
+	/* Initialize registers with the prev. intermediate value */
+	a = context->state[0];
+	b = context->state[1];
+	c = context->state[2];
+	d = context->state[3];
+	e = context->state[4];
+	f = context->state[5];
+	g = context->state[6];
+	h = context->state[7];
+
+	j = 0;
+	do {
+		/* Rounds 0 to 15 (unrolled): */
+		ROUND256_0_TO_15(a,b,c,d,e,f,g,h);
+		ROUND256_0_TO_15(h,a,b,c,d,e,f,g);
+		ROUND256_0_TO_15(g,h,a,b,c,d,e,f);
+		ROUND256_0_TO_15(f,g,h,a,b,c,d,e);
+		ROUND256_0_TO_15(e,f,g,h,a,b,c,d);
+		ROUND256_0_TO_15(d,e,f,g,h,a,b,c);
+		ROUND256_0_TO_15(c,d,e,f,g,h,a,b);
+		ROUND256_0_TO_15(b,c,d,e,f,g,h,a);
+	} while (j < 16);
+
+	/* Now for the remaining rounds to 64: */
+	do {
+		ROUND256(a,b,c,d,e,f,g,h);
+		ROUND256(h,a,b,c,d,e,f,g);
+		ROUND256(g,h,a,b,c,d,e,f);
+		ROUND256(f,g,h,a,b,c,d,e);
+		ROUND256(e,f,g,h,a,b,c,d);
+		ROUND256(d,e,f,g,h,a,b,c);
+		ROUND256(c,d,e,f,g,h,a,b);
+		ROUND256(b,c,d,e,f,g,h,a);
+	} while (j < 64);
+
+	/* Compute the current intermediate hash value */
+	context->state[0] += a;
+	context->state[1] += b;
+	context->state[2] += c;
+	context->state[3] += d;
+	context->state[4] += e;
+	context->state[5] += f;
+	context->state[6] += g;
+	context->state[7] += h;
+
+	/* Clean up */
+	a = b = c = d = e = f = g = h = T1 = 0;
+}
+
+#else /* SHA2_UNROLL_TRANSFORM */
+
+void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
+	sha2_word32	a, b, c, d, e, f, g, h, s0, s1;
+	sha2_word32	T1, T2, *W256;
+	int		j;
+
+	W256 = (sha2_word32*)context->buffer;
+
+	/* Initialize registers with the prev. intermediate value */
+	a = context->state[0];
+	b = context->state[1];
+	c = context->state[2];
+	d = context->state[3];
+	e = context->state[4];
+	f = context->state[5];
+	g = context->state[6];
+	h = context->state[7];
+
+	j = 0;
+	do {
+#if BYTE_ORDER == LITTLE_ENDIAN
+		/* Copy data while converting to host byte order */
+		REVERSE32(*data++,W256[j]);
+		/* Apply the SHA-256 compression function to update a..h */
+		T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+		/* Apply the SHA-256 compression function to update a..h with copy */
+		T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++);
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+		T2 = Sigma0_256(a) + Maj(a, b, c);
+		h = g;
+		g = f;
+		f = e;
+		e = d + T1;
+		d = c;
+		c = b;
+		b = a;
+		a = T1 + T2;
+
+		j++;
+	} while (j < 16);
+
+	do {
+		/* Part of the message block expansion: */
+		s0 = W256[(j+1)&0x0f];
+		s0 = sigma0_256(s0);
+		s1 = W256[(j+14)&0x0f];	
+		s1 = sigma1_256(s1);
+
+		/* Apply the SHA-256 compression function to update a..h */
+		T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + 
+		     (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0);
+		T2 = Sigma0_256(a) + Maj(a, b, c);
+		h = g;
+		g = f;
+		f = e;
+		e = d + T1;
+		d = c;
+		c = b;
+		b = a;
+		a = T1 + T2;
+
+		j++;
+	} while (j < 64);
+
+	/* Compute the current intermediate hash value */
+	context->state[0] += a;
+	context->state[1] += b;
+	context->state[2] += c;
+	context->state[3] += d;
+	context->state[4] += e;
+	context->state[5] += f;
+	context->state[6] += g;
+	context->state[7] += h;
+
+	/* Clean up */
+	a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
+	unsigned int	freespace, usedspace;
+
+	if (len == 0) {
+		/* Calling with no data is valid - we do nothing */
+		return;
+	}
+
+	/* Sanity check: */
+	assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0);
+
+	usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
+	if (usedspace > 0) {
+		/* Calculate how much free space is available in the buffer */
+		freespace = SHA256_BLOCK_LENGTH - usedspace;
+
+		if (len >= freespace) {
+			/* Fill the buffer completely and process it */
+			MEMCPY_BCOPY(&context->buffer[usedspace], data, freespace);
+			context->bitcount += freespace << 3;
+			len -= freespace;
+			data += freespace;
+			SHA256_Transform(context, (sha2_word32*)context->buffer);
+		} else {
+			/* The buffer is not yet full */
+			MEMCPY_BCOPY(&context->buffer[usedspace], data, len);
+			context->bitcount += len << 3;
+			/* Clean up: */
+			usedspace = freespace = 0;
+			return;
+		}
+	}
+	while (len >= SHA256_BLOCK_LENGTH) {
+		/* Process as many complete blocks as we can */
+		SHA256_Transform(context, (sha2_word32*)data);
+		context->bitcount += SHA256_BLOCK_LENGTH << 3;
+		len -= SHA256_BLOCK_LENGTH;
+		data += SHA256_BLOCK_LENGTH;
+	}
+	if (len > 0) {
+		/* There's left-overs, so save 'em */
+		MEMCPY_BCOPY(context->buffer, data, len);
+		context->bitcount += len << 3;
+	}
+	/* Clean up: */
+	usedspace = freespace = 0;
+}
+
+void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
+	sha2_word32	*d = (sha2_word32*)digest;
+	unsigned int	usedspace;
+
+	/* Sanity check: */
+	assert(context != (SHA256_CTX*)0);
+
+	/* If no digest buffer is passed, we don't bother doing this: */
+	if (digest != (sha2_byte*)0) {
+		usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
+#if BYTE_ORDER == LITTLE_ENDIAN
+		/* Convert FROM host byte order */
+		REVERSE64(context->bitcount,context->bitcount);
+#endif
+		if (usedspace > 0) {
+			/* Begin padding with a 1 bit: */
+			context->buffer[usedspace++] = 0x80;
+
+			if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) {
+				/* Set-up for the last transform: */
+				MEMSET_BZERO(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace);
+			} else {
+				if (usedspace < SHA256_BLOCK_LENGTH) {
+					MEMSET_BZERO(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace);
+				}
+				/* Do second-to-last transform: */
+				SHA256_Transform(context, (sha2_word32*)context->buffer);
+
+				/* And set-up for the last transform: */
+				MEMSET_BZERO(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
+			}
+		} else {
+			/* Set-up for the last transform: */
+			MEMSET_BZERO(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
+
+			/* Begin padding with a 1 bit: */
+			*context->buffer = 0x80;
+		}
+		/* Set the bit count: */
+        sha2_word64* ptr = (sha2_word64*)(&context->buffer[SHA256_SHORT_BLOCK_LENGTH]);
+        *ptr = context->bitcount;
+
+		/* Final transform: */
+		SHA256_Transform(context, (sha2_word32*)context->buffer);
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+		{
+			/* Convert TO host byte order */
+			int	j;
+			for (j = 0; j < 8; j++) {
+				REVERSE32(context->state[j],context->state[j]);
+				*d++ = context->state[j];
+			}
+		}
+#else
+		MEMCPY_BCOPY(d, context->state, SHA256_DIGEST_LENGTH);
+#endif
+	}
+
+	/* Clean up state data: */
+	MEMSET_BZERO(context, sizeof(SHA256_CTX));
+	usedspace = 0;
+}
+
+char *SHA256_End(SHA256_CTX* context, char buffer[]) {
+	sha2_byte	digest[SHA256_DIGEST_LENGTH], *d = digest;
+	int		i;
+
+	/* Sanity check: */
+	assert(context != (SHA256_CTX*)0);
+
+	if (buffer != (char*)0) {
+		SHA256_Final(digest, context);
+
+		for (i = 0; i < SHA256_DIGEST_LENGTH; i++) {
+			*buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+			*buffer++ = sha2_hex_digits[*d & 0x0f];
+			d++;
+		}
+		*buffer = (char)0;
+	} else {
+		MEMSET_BZERO(context, sizeof(SHA256_CTX));
+	}
+	MEMSET_BZERO(digest, SHA256_DIGEST_LENGTH);
+	return buffer;
+}
+
+char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) {
+	SHA256_CTX	context;
+
+	SHA256_Init(&context);
+	SHA256_Update(&context, data, len);
+	return SHA256_End(&context, digest);
+}
+
+
+/*** SHA-512: *********************************************************/
+void SHA512_Init(SHA512_CTX* context) {
+	if (context == (SHA512_CTX*)0) {
+		return;
+	}
+	MEMCPY_BCOPY(context->state, sha512_initial_hash_value, SHA512_DIGEST_LENGTH);
+	MEMSET_BZERO(context->buffer, SHA512_BLOCK_LENGTH);
+	context->bitcount[0] = context->bitcount[1] =  0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-512 round macros: */
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)	\
+	REVERSE64(*data++, W512[j]); \
+	T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
+             K512[j] + W512[j]; \
+	(d) += T1, \
+	(h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)), \
+	j++
+
+
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)	\
+	T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
+             K512[j] + (W512[j] = *data++); \
+	(d) += T1; \
+	(h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
+	j++
+
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+
+#define ROUND512(a,b,c,d,e,f,g,h)	\
+	s0 = W512[(j+1)&0x0f]; \
+	s0 = sigma0_512(s0); \
+	s1 = W512[(j+14)&0x0f]; \
+	s1 = sigma1_512(s1); \
+	T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + \
+             (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
+	(d) += T1; \
+	(h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
+	j++
+
+void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
+	sha2_word64	a, b, c, d, e, f, g, h, s0, s1;
+	sha2_word64	T1, *W512 = (sha2_word64*)context->buffer;
+	int		j;
+
+	/* Initialize registers with the prev. intermediate value */
+	a = context->state[0];
+	b = context->state[1];
+	c = context->state[2];
+	d = context->state[3];
+	e = context->state[4];
+	f = context->state[5];
+	g = context->state[6];
+	h = context->state[7];
+
+	j = 0;
+	do {
+		ROUND512_0_TO_15(a,b,c,d,e,f,g,h);
+		ROUND512_0_TO_15(h,a,b,c,d,e,f,g);
+		ROUND512_0_TO_15(g,h,a,b,c,d,e,f);
+		ROUND512_0_TO_15(f,g,h,a,b,c,d,e);
+		ROUND512_0_TO_15(e,f,g,h,a,b,c,d);
+		ROUND512_0_TO_15(d,e,f,g,h,a,b,c);
+		ROUND512_0_TO_15(c,d,e,f,g,h,a,b);
+		ROUND512_0_TO_15(b,c,d,e,f,g,h,a);
+	} while (j < 16);
+
+	/* Now for the remaining rounds up to 79: */
+	do {
+		ROUND512(a,b,c,d,e,f,g,h);
+		ROUND512(h,a,b,c,d,e,f,g);
+		ROUND512(g,h,a,b,c,d,e,f);
+		ROUND512(f,g,h,a,b,c,d,e);
+		ROUND512(e,f,g,h,a,b,c,d);
+		ROUND512(d,e,f,g,h,a,b,c);
+		ROUND512(c,d,e,f,g,h,a,b);
+		ROUND512(b,c,d,e,f,g,h,a);
+	} while (j < 80);
+
+	/* Compute the current intermediate hash value */
+	context->state[0] += a;
+	context->state[1] += b;
+	context->state[2] += c;
+	context->state[3] += d;
+	context->state[4] += e;
+	context->state[5] += f;
+	context->state[6] += g;
+	context->state[7] += h;
+
+	/* Clean up */
+	a = b = c = d = e = f = g = h = T1 = 0;
+}
+
+#else /* SHA2_UNROLL_TRANSFORM */
+
+void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
+	sha2_word64	a, b, c, d, e, f, g, h, s0, s1;
+	sha2_word64	T1, T2, *W512 = (sha2_word64*)context->buffer;
+	int		j;
+
+	/* Initialize registers with the prev. intermediate value */
+	a = context->state[0];
+	b = context->state[1];
+	c = context->state[2];
+	d = context->state[3];
+	e = context->state[4];
+	f = context->state[5];
+	g = context->state[6];
+	h = context->state[7];
+
+	j = 0;
+	do {
+#if BYTE_ORDER == LITTLE_ENDIAN
+		/* Convert TO host byte order */
+		REVERSE64(*data++, W512[j]);
+		/* Apply the SHA-512 compression function to update a..h */
+		T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+		/* Apply the SHA-512 compression function to update a..h with copy */
+		T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j] = *data++);
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+		T2 = Sigma0_512(a) + Maj(a, b, c);
+		h = g;
+		g = f;
+		f = e;
+		e = d + T1;
+		d = c;
+		c = b;
+		b = a;
+		a = T1 + T2;
+
+		j++;
+	} while (j < 16);
+
+	do {
+		/* Part of the message block expansion: */
+		s0 = W512[(j+1)&0x0f];
+		s0 = sigma0_512(s0);
+		s1 = W512[(j+14)&0x0f];
+		s1 =  sigma1_512(s1);
+
+		/* Apply the SHA-512 compression function to update a..h */
+		T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
+		     (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0);
+		T2 = Sigma0_512(a) + Maj(a, b, c);
+		h = g;
+		g = f;
+		f = e;
+		e = d + T1;
+		d = c;
+		c = b;
+		b = a;
+		a = T1 + T2;
+
+		j++;
+	} while (j < 80);
+
+	/* Compute the current intermediate hash value */
+	context->state[0] += a;
+	context->state[1] += b;
+	context->state[2] += c;
+	context->state[3] += d;
+	context->state[4] += e;
+	context->state[5] += f;
+	context->state[6] += g;
+	context->state[7] += h;
+
+	/* Clean up */
+	a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void SHA512_Update(SHA512_CTX* context, const sha2_byte *data, size_t len) {
+	unsigned int	freespace, usedspace;
+
+	if (len == 0) {
+		/* Calling with no data is valid - we do nothing */
+		return;
+	}
+
+	/* Sanity check: */
+	assert(context != (SHA512_CTX*)0 && data != (sha2_byte*)0);
+
+	usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
+	if (usedspace > 0) {
+		/* Calculate how much free space is available in the buffer */
+		freespace = SHA512_BLOCK_LENGTH - usedspace;
+
+		if (len >= freespace) {
+			/* Fill the buffer completely and process it */
+			MEMCPY_BCOPY(&context->buffer[usedspace], data, freespace);
+			ADDINC128(context->bitcount, freespace << 3);
+			len -= freespace;
+			data += freespace;
+			SHA512_Transform(context, (sha2_word64*)context->buffer);
+		} else {
+			/* The buffer is not yet full */
+			MEMCPY_BCOPY(&context->buffer[usedspace], data, len);
+			ADDINC128(context->bitcount, len << 3);
+			/* Clean up: */
+			usedspace = freespace = 0;
+			return;
+		}
+	}
+	while (len >= SHA512_BLOCK_LENGTH) {
+		/* Process as many complete blocks as we can */
+		SHA512_Transform(context, (sha2_word64*)data);
+		ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3);
+		len -= SHA512_BLOCK_LENGTH;
+		data += SHA512_BLOCK_LENGTH;
+	}
+	if (len > 0) {
+		/* There's left-overs, so save 'em */
+		MEMCPY_BCOPY(context->buffer, data, len);
+		ADDINC128(context->bitcount, len << 3);
+	}
+	/* Clean up: */
+	usedspace = freespace = 0;
+}
+
+void SHA512_Last(SHA512_CTX* context) {
+	unsigned int	usedspace;
+
+	usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
+#if BYTE_ORDER == LITTLE_ENDIAN
+	/* Convert FROM host byte order */
+	REVERSE64(context->bitcount[0],context->bitcount[0]);
+	REVERSE64(context->bitcount[1],context->bitcount[1]);
+#endif
+	if (usedspace > 0) {
+		/* Begin padding with a 1 bit: */
+		context->buffer[usedspace++] = 0x80;
+
+		if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) {
+			/* Set-up for the last transform: */
+			MEMSET_BZERO(&context->buffer[usedspace], SHA512_SHORT_BLOCK_LENGTH - usedspace);
+		} else {
+			if (usedspace < SHA512_BLOCK_LENGTH) {
+				MEMSET_BZERO(&context->buffer[usedspace], SHA512_BLOCK_LENGTH - usedspace);
+			}
+			/* Do second-to-last transform: */
+			SHA512_Transform(context, (sha2_word64*)context->buffer);
+
+			/* And set-up for the last transform: */
+			MEMSET_BZERO(context->buffer, SHA512_BLOCK_LENGTH - 2);
+		}
+	} else {
+		/* Prepare for final transform: */
+		MEMSET_BZERO(context->buffer, SHA512_SHORT_BLOCK_LENGTH);
+
+		/* Begin padding with a 1 bit: */
+		*context->buffer = 0x80;
+	}
+	/* Store the length of input data (in bits): */
+    sha2_word64 *ptr = (sha2_word64*)(&context->buffer[SHA512_SHORT_BLOCK_LENGTH]);
+    *ptr = context->bitcount[1];
+    ptr = (sha2_word64*)(&context->buffer[SHA512_SHORT_BLOCK_LENGTH+8]);
+    *ptr = context->bitcount[0];
+
+	/* Final transform: */
+	SHA512_Transform(context, (sha2_word64*)context->buffer);
+}
+
+void SHA512_Final(sha2_byte digest[], SHA512_CTX* context) {
+	sha2_word64	*d = (sha2_word64*)digest;
+
+	/* Sanity check: */
+	assert(context != (SHA512_CTX*)0);
+
+	/* If no digest buffer is passed, we don't bother doing this: */
+	if (digest != (sha2_byte*)0) {
+		SHA512_Last(context);
+
+		/* Save the hash data for output: */
+#if BYTE_ORDER == LITTLE_ENDIAN
+		{
+			/* Convert TO host byte order */
+			int	j;
+			for (j = 0; j < 8; j++) {
+				REVERSE64(context->state[j],context->state[j]);
+				*d++ = context->state[j];
+			}
+		}
+#else
+		MEMCPY_BCOPY(d, context->state, SHA512_DIGEST_LENGTH);
+#endif
+	}
+
+	/* Zero out state data */
+	MEMSET_BZERO(context, sizeof(SHA512_CTX));
+}
+
+char *SHA512_End(SHA512_CTX* context, char buffer[]) {
+	sha2_byte	digest[SHA512_DIGEST_LENGTH], *d = digest;
+	int		i;
+
+	/* Sanity check: */
+	assert(context != (SHA512_CTX*)0);
+
+	if (buffer != (char*)0) {
+		SHA512_Final(digest, context);
+
+		for (i = 0; i < SHA512_DIGEST_LENGTH; i++) {
+			*buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+			*buffer++ = sha2_hex_digits[*d & 0x0f];
+			d++;
+		}
+		*buffer = (char)0;
+	} else {
+		MEMSET_BZERO(context, sizeof(SHA512_CTX));
+	}
+	MEMSET_BZERO(digest, SHA512_DIGEST_LENGTH);
+	return buffer;
+}
+
+char* SHA512_Data(const sha2_byte* data, size_t len, char digest[SHA512_DIGEST_STRING_LENGTH]) {
+	SHA512_CTX	context;
+
+	SHA512_Init(&context);
+	SHA512_Update(&context, data, len);
+	return SHA512_End(&context, digest);
+}
+
+
+/*** SHA-384: *********************************************************/
+void SHA384_Init(SHA384_CTX* context) {
+	if (context == (SHA384_CTX*)0) {
+		return;
+	}
+	MEMCPY_BCOPY(context->state, sha384_initial_hash_value, SHA512_DIGEST_LENGTH);
+	MEMSET_BZERO(context->buffer, SHA384_BLOCK_LENGTH);
+	context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+void SHA384_Update(SHA384_CTX* context, const sha2_byte* data, size_t len) {
+	SHA512_Update((SHA512_CTX*)context, data, len);
+}
+
+void SHA384_Final(sha2_byte digest[], SHA384_CTX* context) {
+	sha2_word64	*d = (sha2_word64*)digest;
+
+	/* Sanity check: */
+	assert(context != (SHA384_CTX*)0);
+
+	/* If no digest buffer is passed, we don't bother doing this: */
+	if (digest != (sha2_byte*)0) {
+		SHA512_Last((SHA512_CTX*)context);
+
+		/* Save the hash data for output: */
+#if BYTE_ORDER == LITTLE_ENDIAN
+		{
+			/* Convert TO host byte order */
+			int	j;
+			for (j = 0; j < 6; j++) {
+				REVERSE64(context->state[j],context->state[j]);
+				*d++ = context->state[j];
+			}
+		}
+#else
+		MEMCPY_BCOPY(d, context->state, SHA384_DIGEST_LENGTH);
+#endif
+	}
+
+	/* Zero out state data */
+	MEMSET_BZERO(context, sizeof(SHA384_CTX));
+}
+
+char *SHA384_End(SHA384_CTX* context, char buffer[]) {
+	sha2_byte	digest[SHA384_DIGEST_LENGTH], *d = digest;
+	int		i;
+
+	/* Sanity check: */
+	assert(context != (SHA384_CTX*)0);
+
+	if (buffer != (char*)0) {
+		SHA384_Final(digest, context);
+
+		for (i = 0; i < SHA384_DIGEST_LENGTH; i++) {
+			*buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+			*buffer++ = sha2_hex_digits[*d & 0x0f];
+			d++;
+		}
+		*buffer = (char)0;
+	} else {
+		MEMSET_BZERO(context, sizeof(SHA384_CTX));
+	}
+	MEMSET_BZERO(digest, SHA384_DIGEST_LENGTH);
+	return buffer;
+}
+
+char* SHA384_Data(const sha2_byte* data, size_t len, char digest[SHA384_DIGEST_STRING_LENGTH]) {
+	SHA384_CTX	context;
+
+	SHA384_Init(&context);
+	SHA384_Update(&context, data, len);
+	return SHA384_End(&context, digest);
+}
+
diff --git a/src/sha2.h b/src/sha2.h
new file mode 100644
index 000000000..bf759ad45
--- /dev/null
+++ b/src/sha2.h
@@ -0,0 +1,197 @@
+/*
+ * FILE:	sha2.h
+ * AUTHOR:	Aaron D. Gifford - http://www.aarongifford.com/
+ * 
+ * Copyright (c) 2000-2001, Aaron D. Gifford
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $
+ */
+
+#ifndef __SHA2_H__
+#define __SHA2_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Import u_intXX_t size_t type definitions from system headers.  You
+ * may need to change this, or define these things yourself in this
+ * file.
+ */
+#include <sys/types.h>
+
+#ifdef SHA2_USE_INTTYPES_H
+
+#include <inttypes.h>
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+#define SHA256_BLOCK_LENGTH		64
+#define SHA256_DIGEST_LENGTH		32
+#define SHA256_DIGEST_STRING_LENGTH	(SHA256_DIGEST_LENGTH * 2 + 1)
+#define SHA384_BLOCK_LENGTH		128
+#define SHA384_DIGEST_LENGTH		48
+#define SHA384_DIGEST_STRING_LENGTH	(SHA384_DIGEST_LENGTH * 2 + 1)
+#define SHA512_BLOCK_LENGTH		128
+#define SHA512_DIGEST_LENGTH		64
+#define SHA512_DIGEST_STRING_LENGTH	(SHA512_DIGEST_LENGTH * 2 + 1)
+
+
+/*** SHA-256/384/512 Context Structures *******************************/
+/* NOTE: If your architecture does not define either u_intXX_t types or
+ * uintXX_t (from inttypes.h), you may need to define things by hand
+ * for your system:
+ */
+#if 0
+typedef unsigned char u_int8_t;		/* 1-byte  (8-bits)  */
+typedef unsigned int u_int32_t;		/* 4-bytes (32-bits) */
+typedef unsigned long long u_int64_t;	/* 8-bytes (64-bits) */
+#endif
+/*
+ * Most BSD systems already define u_intXX_t types, as does Linux.
+ * Some systems, however, like Compaq's Tru64 Unix instead can use
+ * uintXX_t types defined by very recent ANSI C standards and included
+ * in the file:
+ *
+ *   #include <inttypes.h>
+ *
+ * If you choose to use <inttypes.h> then please define: 
+ *
+ *   #define SHA2_USE_INTTYPES_H
+ *
+ * Or on the command line during compile:
+ *
+ *   cc -DSHA2_USE_INTTYPES_H ...
+ */
+#ifdef SHA2_USE_INTTYPES_H
+
+typedef struct _SHA256_CTX {
+	uint32_t	state[8];
+	uint64_t	bitcount;
+	uint8_t	buffer[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+typedef struct _SHA512_CTX {
+	uint64_t	state[8];
+	uint64_t	bitcount[2];
+	uint8_t	buffer[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+#else /* SHA2_USE_INTTYPES_H */
+
+typedef struct _SHA256_CTX {
+	u_int32_t	state[8];
+	u_int64_t	bitcount;
+	u_int8_t	buffer[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+typedef struct _SHA512_CTX {
+	u_int64_t	state[8];
+	u_int64_t	bitcount[2];
+	u_int8_t	buffer[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+typedef SHA512_CTX SHA384_CTX;
+
+
+/*** SHA-256/384/512 Function Prototypes ******************************/
+#ifndef NOPROTO
+#ifdef SHA2_USE_INTTYPES_H
+
+void SHA256_Init(SHA256_CTX *);
+void SHA256_Update(SHA256_CTX*, const uint8_t*, size_t);
+void SHA256_Final(uint8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*);
+char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]);
+char* SHA256_Data(const uint8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]);
+
+void SHA384_Init(SHA384_CTX*);
+void SHA384_Update(SHA384_CTX*, const uint8_t*, size_t);
+void SHA384_Final(uint8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*);
+char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]);
+char* SHA384_Data(const uint8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]);
+
+void SHA512_Init(SHA512_CTX*);
+void SHA512_Update(SHA512_CTX*, const uint8_t*, size_t);
+void SHA512_Final(uint8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*);
+char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]);
+char* SHA512_Data(const uint8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]);
+
+#else /* SHA2_USE_INTTYPES_H */
+
+void SHA256_Init(SHA256_CTX *);
+void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t);
+void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*);
+char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]);
+char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]);
+
+void SHA384_Init(SHA384_CTX*);
+void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t);
+void SHA384_Final(u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*);
+char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]);
+char* SHA384_Data(const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]);
+
+void SHA512_Init(SHA512_CTX*);
+void SHA512_Update(SHA512_CTX*, const u_int8_t*, size_t);
+void SHA512_Final(u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*);
+char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]);
+char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]);
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+#else /* NOPROTO */
+
+void SHA256_Init();
+void SHA256_Update();
+void SHA256_Final();
+char* SHA256_End();
+char* SHA256_Data();
+
+void SHA384_Init();
+void SHA384_Update();
+void SHA384_Final();
+char* SHA384_End();
+char* SHA384_Data();
+
+void SHA512_Init();
+void SHA512_Update();
+void SHA512_Final();
+char* SHA512_End();
+char* SHA512_Data();
+
+#endif /* NOPROTO */
+
+#ifdef	__cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __SHA2_H__ */
+
diff --git a/src/stats.c b/src/stats.c
new file mode 100644
index 000000000..18ad5c088
--- /dev/null
+++ b/src/stats.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>  // for errno
+#include <string.h> // memset
+#include <stats.h>
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <sylvan.h> // for nodes table
+
+#if SYLVAN_STATS
+
+#ifdef __ELF__
+__thread sylvan_stats_t sylvan_stats;
+#else
+pthread_key_t sylvan_stats_key;
+#endif
+
+#ifndef USE_HWLOC
+#define USE_HWLOC 0
+#endif
+
+#if USE_HWLOC
+#include <hwloc.h>
+static hwloc_topology_t topo;
+#endif
+
+VOID_TASK_0(sylvan_stats_reset_perthread)
+{
+#ifdef __ELF__
+    for (int i=0; i<SYLVAN_COUNTER_COUNTER; i++) {
+        sylvan_stats.counters[i] = 0;
+    }
+    for (int i=0; i<SYLVAN_TIMER_COUNTER; i++) {
+        sylvan_stats.timers[i] = 0;
+    }
+#else
+    sylvan_stats_t *sylvan_stats = pthread_getspecific(sylvan_stats_key);
+    if (sylvan_stats == NULL) {
+        sylvan_stats = mmap(0, sizeof(sylvan_stats_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+        if (sylvan_stats == (sylvan_stats_t *)-1) {
+            fprintf(stderr, "sylvan_stats: Unable to allocate memory: %s!\n", strerror(errno));
+            exit(1);
+        }
+#if USE_HWLOC
+        // Ensure the stats object is on our pu
+        hwloc_obj_t pu = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, LACE_WORKER_PU);
+        hwloc_set_area_membind(topo, sylvan_stats, sizeof(sylvan_stats_t), pu->cpuset, HWLOC_MEMBIND_BIND, 0);
+#endif
+        pthread_setspecific(sylvan_stats_key, sylvan_stats);
+    }
+    for (int i=0; i<SYLVAN_COUNTER_COUNTER; i++) {
+        sylvan_stats->counters[i] = 0;
+    }
+    for (int i=0; i<SYLVAN_TIMER_COUNTER; i++) {
+        sylvan_stats->timers[i] = 0;
+    }
+#endif
+}
+
+VOID_TASK_IMPL_0(sylvan_stats_init)
+{
+#ifndef __ELF__
+    pthread_key_create(&sylvan_stats_key, NULL);
+#endif
+#if USE_HWLOC
+    hwloc_topology_init(&topo);
+    hwloc_topology_load(topo);
+#endif
+    TOGETHER(sylvan_stats_reset_perthread);
+}
+
+/**
+ * Reset all counters (for statistics)
+ */
+VOID_TASK_IMPL_0(sylvan_stats_reset)
+{
+    TOGETHER(sylvan_stats_reset_perthread);
+}
+
+#define BLACK "\33[22;30m"
+#define GRAY "\33[01;30m"
+#define RED "\33[22;31m"
+#define LRED "\33[01;31m"
+#define GREEN "\33[22;32m"
+#define LGREEN "\33[01;32m"
+#define BLUE "\33[22;34m"
+#define LBLUE "\33[01;34m"
+#define BROWN "\33[22;33m"
+#define YELLOW "\33[01;33m"
+#define CYAN "\33[22;36m"
+#define LCYAN "\33[22;36m"
+#define MAGENTA "\33[22;35m"
+#define LMAGENTA "\33[01;35m"
+#define NC "\33[0m"
+#define BOLD "\33[1m"
+#define ULINE "\33[4m" //underline
+#define BLINK "\33[5m"
+#define INVERT "\33[7m"
+
+VOID_TASK_1(sylvan_stats_sum, sylvan_stats_t*, target)
+{
+#ifdef __ELF__
+    for (int i=0; i<SYLVAN_COUNTER_COUNTER; i++) {
+        __sync_fetch_and_add(&target->counters[i], sylvan_stats.counters[i]);
+    }
+    for (int i=0; i<SYLVAN_TIMER_COUNTER; i++) {
+        __sync_fetch_and_add(&target->timers[i], sylvan_stats.timers[i]);
+    }
+#else
+    sylvan_stats_t *sylvan_stats = pthread_getspecific(sylvan_stats_key);
+    if (sylvan_stats != NULL) {
+        for (int i=0; i<SYLVAN_COUNTER_COUNTER; i++) {
+            __sync_fetch_and_add(&target->counters[i], sylvan_stats->counters[i]);
+        }
+        for (int i=0; i<SYLVAN_TIMER_COUNTER; i++) {
+            __sync_fetch_and_add(&target->timers[i], sylvan_stats->timers[i]);
+        }
+    }
+#endif
+}
+
+void
+sylvan_stats_report(FILE *target, int color)
+{
+#if !SYLVAN_STATS
+    (void)target;
+    (void)color;
+    return;
+#else
+    (void)color;
+
+    sylvan_stats_t totals;
+    memset(&totals, 0, sizeof(sylvan_stats_t));
+
+    LACE_ME;
+    TOGETHER(sylvan_stats_sum, &totals);
+
+    // fix timers for MACH
+#ifdef __MACH__
+    mach_timebase_info_data_t timebase;
+    mach_timebase_info(&timebase);
+    uint64_t c = timebase.numer/timebase.denom;
+    for (int i=0;i<SYLVAN_TIMER_COUNTER;i++) totals.timers[i]*=c;
+#endif
+
+    if (color) fprintf(target, LRED  "*** " BOLD "Sylvan stats" NC LRED " ***" NC);
+    else fprintf(target, "*** Sylvan stats ***");
+
+    if (totals.counters[BDD_NODES_CREATED]) {
+        if (color) fprintf(target, ULINE LBLUE);
+        fprintf(target, "\nBDD operations count (cache reuse, cache put)\n");
+        if (color) fprintf(target, NC);
+        if (totals.counters[BDD_ITE]) fprintf(target, "ITE: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_ITE], totals.counters[BDD_ITE_CACHED], totals.counters[BDD_ITE_CACHEDPUT]);
+        if (totals.counters[BDD_AND]) fprintf(target, "AND: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_AND], totals.counters[BDD_AND_CACHED], totals.counters[BDD_AND_CACHEDPUT]);
+        if (totals.counters[BDD_XOR]) fprintf(target, "XOR: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_XOR], totals.counters[BDD_XOR_CACHED], totals.counters[BDD_XOR_CACHEDPUT]);
+        if (totals.counters[BDD_EXISTS]) fprintf(target, "Exists: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_EXISTS], totals.counters[BDD_EXISTS_CACHED], totals.counters[BDD_EXISTS_CACHEDPUT]);
+        if (totals.counters[BDD_AND_EXISTS]) fprintf(target, "AndExists: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_AND_EXISTS], totals.counters[BDD_AND_EXISTS_CACHED], totals.counters[BDD_AND_EXISTS_CACHEDPUT]);
+        if (totals.counters[BDD_RELNEXT]) fprintf(target, "RelNext: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_RELNEXT], totals.counters[BDD_RELNEXT_CACHED], totals.counters[BDD_RELNEXT_CACHEDPUT]);
+        if (totals.counters[BDD_RELPREV]) fprintf(target, "RelPrev: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_RELPREV], totals.counters[BDD_RELPREV_CACHED], totals.counters[BDD_RELPREV_CACHEDPUT]);
+        if (totals.counters[BDD_CLOSURE]) fprintf(target, "Closure: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_CLOSURE], totals.counters[BDD_CLOSURE_CACHED], totals.counters[BDD_CLOSURE_CACHEDPUT]);
+        if (totals.counters[BDD_COMPOSE]) fprintf(target, "Compose: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_COMPOSE], totals.counters[BDD_COMPOSE_CACHED], totals.counters[BDD_COMPOSE_CACHEDPUT]);
+        if (totals.counters[BDD_RESTRICT]) fprintf(target, "Restrict: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_RESTRICT], totals.counters[BDD_RESTRICT_CACHED], totals.counters[BDD_RESTRICT_CACHEDPUT]);
+        if (totals.counters[BDD_CONSTRAIN]) fprintf(target, "Constrain: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_CONSTRAIN], totals.counters[BDD_CONSTRAIN_CACHED], totals.counters[BDD_CONSTRAIN_CACHEDPUT]);
+        if (totals.counters[BDD_SUPPORT]) fprintf(target, "Support: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_SUPPORT], totals.counters[BDD_SUPPORT_CACHED], totals.counters[BDD_SUPPORT_CACHEDPUT]);
+        if (totals.counters[BDD_SATCOUNT]) fprintf(target, "SatCount: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_SATCOUNT], totals.counters[BDD_SATCOUNT_CACHED], totals.counters[BDD_SATCOUNT_CACHEDPUT]);
+        if (totals.counters[BDD_PATHCOUNT]) fprintf(target, "PathCount: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_PATHCOUNT], totals.counters[BDD_PATHCOUNT_CACHED], totals.counters[BDD_PATHCOUNT_CACHEDPUT]);
+        if (totals.counters[BDD_ISBDD]) fprintf(target, "IsBDD: %'"PRIu64 " (%'"PRIu64", %'"PRIu64 ")\n", totals.counters[BDD_ISBDD], totals.counters[BDD_ISBDD_CACHED], totals.counters[BDD_ISBDD_CACHEDPUT]);
+        fprintf(target, "BDD Nodes created: %'"PRIu64"\n", totals.counters[BDD_NODES_CREATED]);
+        fprintf(target, "BDD Nodes reused: %'"PRIu64"\n", totals.counters[BDD_NODES_REUSED]);
+    }
+
+    if (totals.counters[LDD_NODES_CREATED]) {
+        if (color) fprintf(target, ULINE LBLUE);
+        fprintf(target, "\nLDD operations count (cache reuse, cache put)\n");
+        if (color) fprintf(target, NC);
+        if (totals.counters[LDD_UNION]) fprintf(target, "Union: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_UNION], totals.counters[LDD_UNION_CACHED], totals.counters[LDD_UNION_CACHEDPUT]);
+        if (totals.counters[LDD_MINUS]) fprintf(target, "Minus: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_MINUS], totals.counters[LDD_MINUS_CACHED], totals.counters[LDD_MINUS_CACHEDPUT]);
+        if (totals.counters[LDD_INTERSECT]) fprintf(target, "Intersect: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_INTERSECT], totals.counters[LDD_INTERSECT_CACHED], totals.counters[LDD_INTERSECT_CACHEDPUT]);
+        if (totals.counters[LDD_RELPROD]) fprintf(target, "RelProd: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_RELPROD], totals.counters[LDD_RELPROD_CACHED], totals.counters[LDD_RELPROD_CACHEDPUT]);
+        if (totals.counters[LDD_RELPREV]) fprintf(target, "RelPrev: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_RELPREV], totals.counters[LDD_RELPREV_CACHED], totals.counters[LDD_RELPREV_CACHEDPUT]);
+        if (totals.counters[LDD_PROJECT]) fprintf(target, "Project: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_PROJECT], totals.counters[LDD_PROJECT_CACHED], totals.counters[LDD_PROJECT_CACHEDPUT]);
+        if (totals.counters[LDD_JOIN]) fprintf(target, "Join: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_JOIN], totals.counters[LDD_JOIN_CACHED], totals.counters[LDD_JOIN_CACHEDPUT]);
+        if (totals.counters[LDD_MATCH]) fprintf(target, "Match: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_MATCH], totals.counters[LDD_MATCH_CACHED], totals.counters[LDD_MATCH_CACHEDPUT]);
+        if (totals.counters[LDD_SATCOUNT]) fprintf(target, "SatCount: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_SATCOUNT], totals.counters[LDD_SATCOUNT_CACHED], totals.counters[LDD_SATCOUNT_CACHEDPUT]);
+        if (totals.counters[LDD_SATCOUNTL]) fprintf(target, "SatCountL: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_SATCOUNTL], totals.counters[LDD_SATCOUNTL_CACHED], totals.counters[LDD_SATCOUNTL_CACHEDPUT]);
+        if (totals.counters[LDD_ZIP]) fprintf(target, "Zip: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_ZIP], totals.counters[LDD_ZIP_CACHED], totals.counters[LDD_ZIP_CACHEDPUT]);
+        if (totals.counters[LDD_RELPROD_UNION]) fprintf(target, "RelProdUnion: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_RELPROD_UNION], totals.counters[LDD_RELPROD_UNION_CACHED], totals.counters[LDD_RELPROD_UNION_CACHEDPUT]);
+        if (totals.counters[LDD_PROJECT_MINUS]) fprintf(target, "ProjectMinus: %'"PRIu64 " (%'"PRIu64", %"PRIu64")\n", totals.counters[LDD_PROJECT_MINUS], totals.counters[LDD_PROJECT_MINUS_CACHED], totals.counters[LDD_PROJECT_MINUS_CACHEDPUT]);
+        fprintf(target, "LDD Nodes created: %'"PRIu64"\n", totals.counters[LDD_NODES_CREATED]);
+        fprintf(target, "LDD Nodes reused: %'"PRIu64"\n", totals.counters[LDD_NODES_REUSED]);
+    }
+
+    if (color) fprintf(target, ULINE LBLUE);
+    fprintf(target, "\nGarbage collection\n");
+    if (color) fprintf(target, NC);
+    fprintf(target, "Number of GC executions: %'"PRIu64"\n", totals.counters[SYLVAN_GC_COUNT]);
+    fprintf(target, "Total time spent: %'.6Lf sec.\n", (long double)totals.timers[SYLVAN_GC]/1000000000);
+
+    if (color) fprintf(target, ULINE LBLUE);
+    fprintf(target, "\nTables\n");
+    if (color) fprintf(target, NC);
+    fprintf(target, "Unique nodes table: %'zu of %'zu buckets filled.\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+    fprintf(target, "Operation cache: %'zu of %'zu buckets filled.\n", cache_getused(), cache_getsize());
+
+    if (color) fprintf(target, ULINE LBLUE);
+    fprintf(target, "\nUnique table\n");
+    if (color) fprintf(target, NC);
+    fprintf(target, "Number of lookup iterations: %'"PRIu64"\n", totals.counters[LLMSSET_LOOKUP]);
+#endif
+}
+
+#else
+
+VOID_TASK_IMPL_0(sylvan_stats_init)
+{
+}
+
+VOID_TASK_IMPL_0(sylvan_stats_reset)
+{
+}
+
+void
+sylvan_stats_report(FILE* target, int color)
+{
+    (void)target;
+    (void)color;
+}
+
+
+
+#endif
diff --git a/src/stats.h b/src/stats.h
new file mode 100644
index 000000000..c7bfcaae2
--- /dev/null
+++ b/src/stats.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lace.h>
+#include <sylvan_config.h>
+
+#ifndef SYLVAN_STATS_H
+#define SYLVAN_STATS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef enum {
+    BDD_ITE,
+    BDD_AND,
+    BDD_XOR,
+    BDD_EXISTS,
+    BDD_AND_EXISTS,
+    BDD_RELNEXT,
+    BDD_RELPREV,
+    BDD_SATCOUNT,
+    BDD_COMPOSE,
+    BDD_RESTRICT,
+    BDD_CONSTRAIN,
+    BDD_CLOSURE,
+    BDD_ISBDD,
+    BDD_SUPPORT,
+    BDD_PATHCOUNT,
+    BDD_ITE_CACHEDPUT,
+    BDD_AND_CACHEDPUT,
+    BDD_XOR_CACHEDPUT,
+    BDD_EXISTS_CACHEDPUT,
+    BDD_AND_EXISTS_CACHEDPUT,
+    BDD_RELNEXT_CACHEDPUT,
+    BDD_RELPREV_CACHEDPUT,
+    BDD_SATCOUNT_CACHEDPUT,
+    BDD_COMPOSE_CACHEDPUT,
+    BDD_RESTRICT_CACHEDPUT,
+    BDD_CONSTRAIN_CACHEDPUT,
+    BDD_CLOSURE_CACHEDPUT,
+    BDD_ISBDD_CACHEDPUT,
+    BDD_SUPPORT_CACHEDPUT,
+    BDD_PATHCOUNT_CACHEDPUT,
+    BDD_ITE_CACHED,
+    BDD_AND_CACHED,
+    BDD_XOR_CACHED,
+    BDD_EXISTS_CACHED,
+    BDD_AND_EXISTS_CACHED,
+    BDD_RELNEXT_CACHED,
+    BDD_RELPREV_CACHED,
+    BDD_SATCOUNT_CACHED,
+    BDD_COMPOSE_CACHED,
+    BDD_RESTRICT_CACHED,
+    BDD_CONSTRAIN_CACHED,
+    BDD_CLOSURE_CACHED,
+    BDD_ISBDD_CACHED,
+    BDD_SUPPORT_CACHED,
+    BDD_PATHCOUNT_CACHED,
+    BDD_NODES_CREATED,
+    BDD_NODES_REUSED,
+
+    LDD_UNION,
+    LDD_MINUS,
+    LDD_INTERSECT,
+    LDD_RELPROD,
+    LDD_RELPREV,
+    LDD_PROJECT,
+    LDD_JOIN,
+    LDD_MATCH,
+    LDD_SATCOUNT,
+    LDD_SATCOUNTL,
+    LDD_ZIP,
+    LDD_RELPROD_UNION,
+    LDD_PROJECT_MINUS,
+    LDD_UNION_CACHEDPUT,
+    LDD_MINUS_CACHEDPUT,
+    LDD_INTERSECT_CACHEDPUT,
+    LDD_RELPROD_CACHEDPUT,
+    LDD_RELPREV_CACHEDPUT,
+    LDD_PROJECT_CACHEDPUT,
+    LDD_JOIN_CACHEDPUT,
+    LDD_MATCH_CACHEDPUT,
+    LDD_SATCOUNT_CACHEDPUT,
+    LDD_SATCOUNTL_CACHEDPUT,
+    LDD_ZIP_CACHEDPUT,
+    LDD_RELPROD_UNION_CACHEDPUT,
+    LDD_PROJECT_MINUS_CACHEDPUT,
+    LDD_UNION_CACHED,
+    LDD_MINUS_CACHED,
+    LDD_INTERSECT_CACHED,
+    LDD_RELPROD_CACHED,
+    LDD_RELPREV_CACHED,
+    LDD_PROJECT_CACHED,
+    LDD_JOIN_CACHED,
+    LDD_MATCH_CACHED,
+    LDD_SATCOUNT_CACHED,
+    LDD_SATCOUNTL_CACHED,
+    LDD_ZIP_CACHED,
+    LDD_RELPROD_UNION_CACHED,
+    LDD_PROJECT_MINUS_CACHED,
+    LDD_NODES_CREATED,
+    LDD_NODES_REUSED,
+
+    LLMSSET_LOOKUP,
+
+    SYLVAN_GC_COUNT,
+    SYLVAN_COUNTER_COUNTER
+} Sylvan_Counters;
+
+typedef enum
+{
+    SYLVAN_GC,
+    SYLVAN_TIMER_COUNTER
+} Sylvan_Timers;
+
+/**
+ * Initialize stats system (done by sylvan_init_package)
+ */
+#define sylvan_stats_init() CALL(sylvan_stats_init)
+VOID_TASK_DECL_0(sylvan_stats_init)
+
+/**
+ * Reset all counters (for statistics)
+ */
+#define sylvan_stats_reset() CALL(sylvan_stats_reset)
+VOID_TASK_DECL_0(sylvan_stats_reset)
+
+/**
+ * Write statistic report to file (stdout, stderr, etc)
+ */
+void sylvan_stats_report(FILE* target, int color);
+
+#if SYLVAN_STATS
+
+/* Infrastructure for internal markings */
+typedef struct
+{
+    uint64_t counters[SYLVAN_COUNTER_COUNTER];
+    uint64_t timers[SYLVAN_TIMER_COUNTER];
+    uint64_t timers_startstop[SYLVAN_TIMER_COUNTER];
+} sylvan_stats_t;
+
+#ifdef __MACH__
+#include <mach/mach_time.h>
+#define getabstime() mach_absolute_time()
+#else
+#include <time.h>
+static uint64_t
+getabstime()
+{
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    uint64_t t = ts.tv_sec;
+    t *= 1000000000UL;
+    t += ts.tv_nsec;
+    return t;
+}
+#endif
+
+#ifdef __ELF__
+extern __thread sylvan_stats_t sylvan_stats;
+#else
+#include <pthread.h>
+extern pthread_key_t sylvan_stats_key;
+#endif
+
+static inline void
+sylvan_stats_count(size_t counter)
+{
+#ifdef __ELF__
+    sylvan_stats.counters[counter]++;
+#else
+    sylvan_stats_t *sylvan_stats = (sylvan_stats_t*)pthread_getspecific(sylvan_stats_key);
+    sylvan_stats->counters[counter]++;
+#endif
+}
+
+static inline void
+sylvan_stats_add(size_t counter, size_t amount)
+{
+#ifdef __ELF__
+    sylvan_stats.counters[counter]+=amount;
+#else
+    sylvan_stats_t *sylvan_stats = (sylvan_stats_t*)pthread_getspecific(sylvan_stats_key);
+    sylvan_stats->counters[counter]+=amount;
+#endif
+}
+
+static inline void
+sylvan_timer_start(size_t timer)
+{
+    uint64_t t = getabstime();
+
+#ifdef __ELF__
+    sylvan_stats.timers_startstop[timer] = t;
+#else
+    sylvan_stats_t *sylvan_stats = (sylvan_stats_t*)pthread_getspecific(sylvan_stats_key);
+    sylvan_stats->timers_startstop[timer] = t;
+#endif
+}
+
+static inline void
+sylvan_timer_stop(size_t timer)
+{
+    uint64_t t = getabstime();
+
+#ifdef __ELF__
+    sylvan_stats.timers[timer] += (t - sylvan_stats.timers_startstop[timer]);
+#else
+    sylvan_stats_t *sylvan_stats = (sylvan_stats_t*)pthread_getspecific(sylvan_stats_key);
+    sylvan_stats->timers[timer] += (t - sylvan_stats->timers_startstop[timer]);
+#endif
+}
+
+#else
+
+static inline void
+sylvan_stats_count(size_t counter)
+{
+    (void)counter;
+}
+
+static inline void
+sylvan_stats_add(size_t counter, size_t amount)
+{
+    (void)counter;
+    (void)amount;
+}
+
+static inline void
+sylvan_timer_start(size_t timer)
+{
+    (void)timer;
+}
+
+static inline void
+sylvan_timer_stop(size_t timer)
+{
+    (void)timer;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan.h b/src/sylvan.h
new file mode 100644
index 000000000..6fe09f9d6
--- /dev/null
+++ b/src/sylvan.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Sylvan: parallel BDD/ListDD package.
+ *
+ * This is a multi-core implementation of BDDs with complement edges.
+ *
+ * This package requires parallel the work-stealing framework Lace.
+ * Lace must be initialized before initializing Sylvan
+ *
+ * This package uses explicit referencing.
+ * Use sylvan_ref and sylvan_deref to manage external references.
+ *
+ * Garbage collection requires all workers to cooperate. Garbage collection is either initiated
+ * by the user (calling sylvan_gc) or when the nodes table is full. All Sylvan operations
+ * check whether they need to cooperate on garbage collection. Garbage collection cannot occur
+ * otherwise. This means that it is perfectly fine to do this:
+ *              BDD a = sylvan_ref(sylvan_and(b, c));
+ * since it is not possible that garbage collection occurs between the two calls.
+ *
+ * To temporarily disable garbage collection, use sylvan_gc_disable() and sylvan_gc_enable().
+ */
+
+#include <sylvan_config.h>
+
+#include <stdint.h>
+#include <stdio.h> // for FILE
+#include <stdlib.h>
+#include <lace.h> // for definitions
+
+#include <sylvan_cache.h>
+#include <llmsset.h>
+#include <stats.h>
+
+#ifndef SYLVAN_H
+#define SYLVAN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef SYLVAN_SIZE_FIBONACCI
+#define SYLVAN_SIZE_FIBONACCI 0
+#endif
+
+// For now, only support 64-bit systems
+typedef char __sylvan_check_size_t_is_8_bytes[(sizeof(uint64_t) == sizeof(size_t))?1:-1];
+
+/**
+ * Initialize the Sylvan parallel decision diagrams package.
+ *
+ * After initialization, call sylvan_init_bdd and/or sylvan_init_ldd if you want to use
+ * the BDD and/or LDD functionality.
+ *
+ * BDDs and LDDs share a common node table and operations cache.
+ *
+ * The node table is resizable.
+ * The table is resized automatically when >50% of the table is filled during garbage collection.
+ * This behavior can be customized by overriding the gc hook.
+ * 
+ * Memory usage:
+ * Every node requires 24 bytes memory. (16 bytes data + 8 bytes overhead)
+ * Every operation cache entry requires 36 bytes memory. (32 bytes data + 4 bytes overhead)
+ *
+ * Reasonable defaults: datasize of 1L<<26 (2048 MB), cachesize of 1L<<25 (1152 MB)
+ */
+void sylvan_init_package(size_t initial_tablesize, size_t max_tablesize, size_t initial_cachesize, size_t max_cachesize);
+
+/**
+ * Frees all Sylvan data (also calls the quit() functions of BDD/MDD parts)
+ */
+void sylvan_quit();
+
+/**
+ * Return number of occupied buckets in nodes table and total number of buckets.
+ */
+VOID_TASK_DECL_2(sylvan_table_usage, size_t*, size_t*);
+#define sylvan_table_usage(filled, total) (CALL(sylvan_table_usage, filled, total))
+
+/**
+ * Perform garbage collection.
+ *
+ * Garbage collection is performed in a new Lace frame, interrupting all ongoing work
+ * until garbage collection is completed.
+ *
+ * Garbage collection procedure:
+ * 1) The operation cache is cleared and the hash table is reset.
+ * 2) All live nodes are marked (to be rehashed). This is done by the "mark" callbacks.
+ * 3) The "hook" callback is called.
+ *    By default, this doubles the hash table size when it is >50% full.
+ * 4) All live nodes are rehashed into the hash table.
+ *
+ * The behavior of garbage collection can be customized by adding "mark" callbacks and
+ * replacing the "hook" callback.
+ */
+VOID_TASK_DECL_0(sylvan_gc);
+#define sylvan_gc() (CALL(sylvan_gc))
+
+/**
+ * Enable or disable garbage collection.
+ *
+ * This affects both automatic and manual garbage collection, i.e.,
+ * calling sylvan_gc() while garbage collection is disabled does not have any effect.
+ */
+void sylvan_gc_enable();
+void sylvan_gc_disable();
+
+/**
+ * Add a "mark" callback to the list of callbacks.
+ *
+ * These are called during garbage collection to recursively mark nodes.
+ *
+ * Default "mark" functions that mark external references (via sylvan_ref) and internal
+ * references (inside operations) are added by sylvan_init_bdd/sylvan_init_bdd.
+ *
+ * Functions are called in order.
+ * level 10: marking functions of Sylvan (external/internal references)
+ * level 20: call the hook function (for resizing)
+ * level 30: rehashing
+ */
+LACE_TYPEDEF_CB(void, gc_mark_cb);
+void sylvan_gc_add_mark(int order, gc_mark_cb callback);
+
+/**
+ * Set "hook" callback. There can be only one.
+ *
+ * The hook is called after the "mark" phase and before the "rehash" phase.
+ * This allows users to perform certain actions, such as resizing the nodes table
+ * and the operation cache. Also, dynamic resizing could be performed then.
+ */
+LACE_TYPEDEF_CB(void, gc_hook_cb);
+void sylvan_gc_set_hook(gc_hook_cb new_hook);
+
+/**
+ * One of the hooks for resizing behavior.
+ * Default if SYLVAN_AGGRESSIVE_RESIZE is set.
+ * Always double size on gc() until maximum reached.
+ */
+VOID_TASK_DECL_0(sylvan_gc_aggressive_resize);
+
+/**
+ * One of the hooks for resizing behavior.
+ * Default if SYLVAN_AGGRESSIVE_RESIZE is not set.
+ * Double size on gc() whenever >50% is used.
+ */
+VOID_TASK_DECL_0(sylvan_gc_default_hook);
+
+/**
+ * Set "notify on dead" callback for the nodes table.
+ * See also documentation in llmsset.h
+ */
+#define sylvan_set_ondead(cb, ctx) llmsset_set_ondead(nodes, cb, ctx)
+
+/**
+ * Global variables (number of workers, nodes table)
+ */
+
+extern llmsset_t nodes;
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include <sylvan_bdd.h>
+#include <sylvan_ldd.h>
+#include <sylvan_mtbdd.h>
+
+#endif
diff --git a/src/sylvan_bdd.c b/src/sylvan_bdd.c
new file mode 100644
index 000000000..74936a62d
--- /dev/null
+++ b/src/sylvan_bdd.c
@@ -0,0 +1,2820 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <math.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <avl.h>
+#include <refs.h>
+#include <sha2.h>
+#include <sylvan.h>
+#include <sylvan_common.h>
+
+/**
+ * Complement handling macros
+ */
+#define BDD_HASMARK(s)              (s&sylvan_complement?1:0)
+#define BDD_TOGGLEMARK(s)           (s^sylvan_complement)
+#define BDD_STRIPMARK(s)            (s&~sylvan_complement)
+#define BDD_TRANSFERMARK(from, to)  (to ^ (from & sylvan_complement))
+// Equal under mark
+#define BDD_EQUALM(a, b)            ((((a)^(b))&(~sylvan_complement))==0)
+
+/**
+ * BDD node structure
+ */
+typedef struct __attribute__((packed)) bddnode {
+    uint64_t a, b;
+} * bddnode_t; // 16 bytes
+
+#define GETNODE(bdd) ((bddnode_t)llmsset_index_to_ptr(nodes, bdd&0x000000ffffffffff))
+
+static inline int __attribute__((unused))
+bddnode_getcomp(bddnode_t n)
+{
+    return n->a & 0x8000000000000000 ? 1 : 0;
+}
+
+static inline uint64_t
+bddnode_getlow(bddnode_t n)
+{
+    return n->b & 0x000000ffffffffff; // 40 bits
+}
+
+static inline uint64_t
+bddnode_gethigh(bddnode_t n)
+{
+    return n->a & 0x800000ffffffffff; // 40 bits plus high bit of first
+}
+
+static inline uint32_t
+bddnode_getvariable(bddnode_t n)
+{
+    return (uint32_t)(n->b >> 40);
+}
+
+static inline int
+bddnode_getmark(bddnode_t n)
+{
+    return n->a & 0x2000000000000000 ? 1 : 0;
+}
+
+static inline void
+bddnode_setmark(bddnode_t n, int mark)
+{
+    if (mark) n->a |= 0x2000000000000000;
+    else n->a &= 0xdfffffffffffffff;
+}
+
+static inline void
+bddnode_makenode(bddnode_t n, uint32_t var, uint64_t low, uint64_t high)
+{
+    n->a = high;
+    n->b = ((uint64_t)var)<<40 | low;
+}
+
+/**
+ * Implementation of garbage collection.
+ */
+
+/* Recursively mark BDD nodes as 'in use' */
+VOID_TASK_IMPL_1(sylvan_gc_mark_rec, BDD, bdd)
+{
+    if (bdd == sylvan_false || bdd == sylvan_true) return;
+
+    if (llmsset_mark(nodes, bdd&0x000000ffffffffff)) {
+        bddnode_t n = GETNODE(bdd);
+        SPAWN(sylvan_gc_mark_rec, bddnode_getlow(n));
+        CALL(sylvan_gc_mark_rec, bddnode_gethigh(n));
+        SYNC(sylvan_gc_mark_rec);
+    }
+}
+
+/**
+ * External references
+ */
+
+refs_table_t bdd_refs;
+refs_table_t bdd_protected;
+static int bdd_protected_created = 0;
+
+BDD
+sylvan_ref(BDD a)
+{
+    if (a == sylvan_false || a == sylvan_true) return a;
+    refs_up(&bdd_refs, BDD_STRIPMARK(a));
+    return a;
+}
+
+void
+sylvan_deref(BDD a)
+{
+    if (a == sylvan_false || a == sylvan_true) return;
+    refs_down(&bdd_refs, BDD_STRIPMARK(a));
+}
+
+void
+sylvan_protect(BDD *a)
+{
+    if (!bdd_protected_created) {
+        // In C++, sometimes sylvan_protect is called before Sylvan is initialized. Just create a table.
+        protect_create(&bdd_protected, 4096);
+        bdd_protected_created = 1;
+    }
+    protect_up(&bdd_protected, (size_t)a);
+}
+
+void
+sylvan_unprotect(BDD *a)
+{
+    if (bdd_protected.refs_table != NULL) protect_down(&bdd_protected, (size_t)a);
+}
+
+size_t
+sylvan_count_refs()
+{
+    return refs_count(&bdd_refs);
+}
+
+size_t
+sylvan_count_protected()
+{
+    return protect_count(&bdd_protected);
+}
+
+/* Called during garbage collection */
+VOID_TASK_0(sylvan_gc_mark_external_refs)
+{
+    // iterate through refs hash table, mark all found
+    size_t count=0;
+    uint64_t *it = refs_iter(&bdd_refs, 0, bdd_refs.refs_size);
+    while (it != NULL) {
+        BDD to_mark = refs_next(&bdd_refs, &it, bdd_refs.refs_size);
+        SPAWN(sylvan_gc_mark_rec, to_mark);
+        count++;
+    }
+    while (count--) {
+        SYNC(sylvan_gc_mark_rec);
+    }
+}
+
+VOID_TASK_0(sylvan_gc_mark_protected)
+{
+    // iterate through refs hash table, mark all found
+    size_t count=0;
+    uint64_t *it = protect_iter(&bdd_protected, 0, bdd_protected.refs_size);
+    while (it != NULL) {
+        BDD *to_mark = (BDD*)protect_next(&bdd_protected, &it, bdd_protected.refs_size);
+        SPAWN(sylvan_gc_mark_rec, *to_mark);
+        count++;
+    }
+    while (count--) {
+        SYNC(sylvan_gc_mark_rec);
+    }
+}
+
+/* Infrastructure for internal markings */
+DECLARE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+
+VOID_TASK_0(bdd_refs_mark_task)
+{
+    LOCALIZE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+    size_t i, j=0;
+    for (i=0; i<bdd_refs_key->r_count; i++) {
+        if (j >= 40) {
+            while (j--) SYNC(sylvan_gc_mark_rec);
+            j=0;
+        }
+        SPAWN(sylvan_gc_mark_rec, bdd_refs_key->results[i]);
+        j++;
+    }
+    for (i=0; i<bdd_refs_key->s_count; i++) {
+        Task *t = bdd_refs_key->spawns[i];
+        if (!TASK_IS_STOLEN(t)) break;
+        if (TASK_IS_COMPLETED(t)) {
+            if (j >= 40) {
+                while (j--) SYNC(sylvan_gc_mark_rec);
+                j=0;
+            }
+            SPAWN(sylvan_gc_mark_rec, *(BDD*)TASK_RESULT(t));
+            j++;
+        }
+    }
+    while (j--) SYNC(sylvan_gc_mark_rec);
+}
+
+VOID_TASK_0(bdd_refs_mark)
+{
+    TOGETHER(bdd_refs_mark_task);
+}
+
+VOID_TASK_0(bdd_refs_init_task)
+{
+    bdd_refs_internal_t s = (bdd_refs_internal_t)malloc(sizeof(struct bdd_refs_internal));
+    s->r_size = 128;
+    s->r_count = 0;
+    s->s_size = 128;
+    s->s_count = 0;
+    s->results = (BDD*)malloc(sizeof(BDD) * 128);
+    s->spawns = (Task**)malloc(sizeof(Task*) * 128);
+    SET_THREAD_LOCAL(bdd_refs_key, s);
+}
+
+VOID_TASK_0(bdd_refs_init)
+{
+    INIT_THREAD_LOCAL(bdd_refs_key);
+    TOGETHER(bdd_refs_init_task);
+    sylvan_gc_add_mark(10, TASK(bdd_refs_mark));
+}
+
+/**
+ * Initialize and quit functions
+ */
+
+static int granularity = 1; // default
+
+static void
+sylvan_quit_bdd()
+{
+    refs_free(&bdd_refs);
+    if (bdd_protected_created) {
+        protect_free(&bdd_protected);
+        bdd_protected_created = 0;
+    }
+}
+
+void
+sylvan_init_bdd(int _granularity)
+{
+    sylvan_register_quit(sylvan_quit_bdd);
+    sylvan_gc_add_mark(10, TASK(sylvan_gc_mark_external_refs));
+    sylvan_gc_add_mark(10, TASK(sylvan_gc_mark_protected));
+
+    granularity = _granularity;
+
+    // Sanity check
+    if (sizeof(struct bddnode) != 16) {
+        fprintf(stderr, "Invalid size of bdd nodes: %ld\n", sizeof(struct bddnode));
+        exit(1);
+    }
+
+    refs_create(&bdd_refs, 1024);
+    if (!bdd_protected_created) {
+        protect_create(&bdd_protected, 4096);
+        bdd_protected_created = 1;
+    }
+
+    LACE_ME;
+    CALL(bdd_refs_init);
+}
+
+/**
+ * Core BDD operations
+ */
+
+BDD
+sylvan_makenode(BDDVAR level, BDD low, BDD high)
+{
+    if (low == high) return low;
+
+    // Normalization to keep canonicity
+    // low will have no mark
+
+    struct bddnode n;
+    int mark;
+
+    if (BDD_HASMARK(low)) {
+        mark = 1;
+        low = BDD_TOGGLEMARK(low);
+        high = BDD_TOGGLEMARK(high);
+    } else {
+        mark = 0;
+    }
+
+    bddnode_makenode(&n, level, low, high);
+
+    BDD result;
+    int created;
+    uint64_t index = llmsset_lookup(nodes, n.a, n.b, &created);
+    if (index == 0) {
+        LACE_ME;
+
+        bdd_refs_push(low);
+        bdd_refs_push(high);
+        sylvan_gc();
+        bdd_refs_pop(2);
+
+        index = llmsset_lookup(nodes, n.a, n.b, &created);
+        if (index == 0) {
+            fprintf(stderr, "BDD Unique table full, %zu of %zu buckets filled!\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+            exit(1);
+        }
+    }
+
+    if (created) sylvan_stats_count(BDD_NODES_CREATED);
+    else sylvan_stats_count(BDD_NODES_REUSED);
+
+    result = index;
+    return mark ? result | sylvan_complement : result;
+}
+
+BDD
+sylvan_ithvar(BDDVAR level)
+{
+    return sylvan_makenode(level, sylvan_false, sylvan_true);
+}
+
+BDDVAR
+sylvan_var(BDD bdd)
+{
+    return bddnode_getvariable(GETNODE(bdd));
+}
+
+static BDD
+node_low(BDD bdd, bddnode_t node)
+{
+    return BDD_TRANSFERMARK(bdd, bddnode_getlow(node));
+}
+
+static BDD
+node_high(BDD bdd, bddnode_t node)
+{
+    return BDD_TRANSFERMARK(bdd, bddnode_gethigh(node));
+}
+
+BDD
+sylvan_low(BDD bdd)
+{
+    if (sylvan_isconst(bdd)) return bdd;
+    return node_low(bdd, GETNODE(bdd));
+}
+
+BDD
+sylvan_high(BDD bdd)
+{
+    if (sylvan_isconst(bdd)) return bdd;
+    return node_high(bdd, GETNODE(bdd));
+}
+
+/**
+ * Implementation of unary, binary and if-then-else operators.
+ */
+TASK_IMPL_3(BDD, sylvan_and, BDD, a, BDD, b, BDDVAR, prev_level)
+{
+    /* Terminal cases */
+    if (a == sylvan_true) return b;
+    if (b == sylvan_true) return a;
+    if (a == sylvan_false) return sylvan_false;
+    if (b == sylvan_false) return sylvan_false;
+    if (a == b) return a;
+    if (a == BDD_TOGGLEMARK(b)) return sylvan_false;
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_AND);
+
+    /* Improve for caching */
+    if (BDD_STRIPMARK(a) > BDD_STRIPMARK(b)) {
+        BDD t = b;
+        b = a;
+        a = t;
+    }
+
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR level = va < vb ? va : vb;
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_AND, a, b, sylvan_false, &result)) {
+            sylvan_stats_count(BDD_AND_CACHED);
+            return result;
+        }
+    }
+
+    // Get cofactors
+    BDD aLow = a, aHigh = a;
+    BDD bLow = b, bHigh = b;
+    if (level == va) {
+        aLow = node_low(a, na);
+        aHigh = node_high(a, na);
+    }
+    if (level == vb) {
+        bLow = node_low(b, nb);
+        bHigh = node_high(b, nb);
+    }
+
+    // Recursive computation
+    BDD low=sylvan_invalid, high=sylvan_invalid, result;
+
+    int n=0;
+
+    if (aHigh == sylvan_true) {
+        high = bHigh;
+    } else if (aHigh == sylvan_false || bHigh == sylvan_false) {
+        high = sylvan_false;
+    } else if (bHigh == sylvan_true) {
+        high = aHigh;
+    } else {
+        bdd_refs_spawn(SPAWN(sylvan_and, aHigh, bHigh, level));
+        n=1;
+    }
+
+    if (aLow == sylvan_true) {
+        low = bLow;
+    } else if (aLow == sylvan_false || bLow == sylvan_false) {
+        low = sylvan_false;
+    } else if (bLow == sylvan_true) {
+        low = aLow;
+    } else {
+        low = CALL(sylvan_and, aLow, bLow, level);
+    }
+
+    if (n) {
+        bdd_refs_push(low);
+        high = bdd_refs_sync(SYNC(sylvan_and));
+        bdd_refs_pop(1);
+    }
+
+    result = sylvan_makenode(level, low, high);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_AND, a, b, sylvan_false, result)) sylvan_stats_count(BDD_AND_CACHEDPUT);
+    }
+
+    return result;
+}
+
+TASK_IMPL_3(BDD, sylvan_xor, BDD, a, BDD, b, BDDVAR, prev_level)
+{
+    /* Terminal cases */
+    if (a == sylvan_false) return b;
+    if (b == sylvan_false) return a;
+    if (a == sylvan_true) return sylvan_not(b);
+    if (b == sylvan_true) return sylvan_not(a);
+    if (a == b) return sylvan_false;
+    if (a == sylvan_not(b)) return sylvan_true;
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_XOR);
+
+    /* Improve for caching */
+    if (BDD_STRIPMARK(a) > BDD_STRIPMARK(b)) {
+        BDD t = b;
+        b = a;
+        a = t;
+    }
+
+    // XOR(~A,B) => XOR(A,~B)
+    if (BDD_HASMARK(a)) {
+        a = BDD_STRIPMARK(a);
+        b = sylvan_not(b);
+    }
+
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR level = va < vb ? va : vb;
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_XOR, a, b, sylvan_false, &result)) {
+            sylvan_stats_count(BDD_XOR_CACHED);
+            return result;
+        }
+    }
+
+    // Get cofactors
+    BDD aLow = a, aHigh = a;
+    BDD bLow = b, bHigh = b;
+    if (level == va) {
+        aLow = node_low(a, na);
+        aHigh = node_high(a, na);
+    }
+    if (level == vb) {
+        bLow = node_low(b, nb);
+        bHigh = node_high(b, nb);
+    }
+
+    // Recursive computation
+    BDD low, high, result;
+
+    bdd_refs_spawn(SPAWN(sylvan_xor, aHigh, bHigh, level));
+    low = CALL(sylvan_xor, aLow, bLow, level);
+    bdd_refs_push(low);
+    high = bdd_refs_sync(SYNC(sylvan_xor));
+    bdd_refs_pop(1);
+
+    result = sylvan_makenode(level, low, high);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_XOR, a, b, sylvan_false, result)) sylvan_stats_count(BDD_XOR_CACHEDPUT);
+    }
+
+    return result;
+}
+
+
+TASK_IMPL_4(BDD, sylvan_ite, BDD, a, BDD, b, BDD, c, BDDVAR, prev_level)
+{
+    /* Terminal cases */
+    if (a == sylvan_true) return b;
+    if (a == sylvan_false) return c;
+    if (a == b) b = sylvan_true;
+    if (a == sylvan_not(b)) b = sylvan_false;
+    if (a == c) c = sylvan_false;
+    if (a == sylvan_not(c)) c = sylvan_true;
+    if (b == c) return b;
+    if (b == sylvan_true && c == sylvan_false) return a;
+    if (b == sylvan_false && c == sylvan_true) return sylvan_not(a);
+
+    /* Cases that reduce to AND and XOR */
+
+    // ITE(A,B,0) => AND(A,B)
+    if (c == sylvan_false) return CALL(sylvan_and, a, b, prev_level);
+
+    // ITE(A,1,C) => ~AND(~A,~C)
+    if (b == sylvan_true) return sylvan_not(CALL(sylvan_and, sylvan_not(a), sylvan_not(c), prev_level));
+
+    // ITE(A,0,C) => AND(~A,C)
+    if (b == sylvan_false) return CALL(sylvan_and, sylvan_not(a), c, prev_level);
+
+    // ITE(A,B,1) => ~AND(A,~B)
+    if (c == sylvan_true) return sylvan_not(CALL(sylvan_and, a, sylvan_not(b), prev_level));
+
+    // ITE(A,B,~B) => XOR(A,~B)
+    if (b == sylvan_not(c)) return CALL(sylvan_xor, a, c, 0);
+
+    /* At this point, there are no more terminals */
+
+    /* Canonical for optimal cache use */
+
+    // ITE(~A,B,C) => ITE(A,C,B)
+    if (BDD_HASMARK(a)) {
+        a = BDD_STRIPMARK(a);
+        BDD t = c;
+        c = b;
+        b = t;
+    }
+
+    // ITE(A,~B,C) => ~ITE(A,B,~C)
+    int mark = 0;
+    if (BDD_HASMARK(b)) {
+        b = sylvan_not(b);
+        c = sylvan_not(c);
+        mark = 1;
+    }
+
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+    bddnode_t nc = GETNODE(c);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR vc = bddnode_getvariable(nc);
+
+    // Get lowest level
+    BDDVAR level = vb < vc ? vb : vc;
+
+    // Fast case
+    if (va < level && node_low(a, na) == sylvan_false && node_high(a, na) == sylvan_true) {
+        BDD result = sylvan_makenode(va, c, b);
+        return mark ? sylvan_not(result) : result;
+    }
+
+    if (va < level) level = va;
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_ITE);
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_ITE, a, b, c, &result)) {
+            sylvan_stats_count(BDD_ITE_CACHED);
+            return mark ? sylvan_not(result) : result;
+        }
+    }
+
+    // Get cofactors
+    BDD aLow = a, aHigh = a;
+    BDD bLow = b, bHigh = b;
+    BDD cLow = c, cHigh = c;
+    if (level == va) {
+        aLow = node_low(a, na);
+        aHigh = node_high(a, na);
+    }
+    if (level == vb) {
+        bLow = node_low(b, nb);
+        bHigh = node_high(b, nb);
+    }
+    if (level == vc) {
+        cLow = node_low(c, nc);
+        cHigh = node_high(c, nc);
+    }
+
+    // Recursive computation
+    BDD low=sylvan_invalid, high=sylvan_invalid, result;
+
+    int n=0;
+
+    if (aHigh == sylvan_true) {
+        high = bHigh;
+    } else if (aHigh == sylvan_false) {
+        high = cHigh;
+    } else {
+        bdd_refs_spawn(SPAWN(sylvan_ite, aHigh, bHigh, cHigh, level));
+        n=1;
+    }
+
+    if (aLow == sylvan_true) {
+        low = bLow;
+    } else if (aLow == sylvan_false) {
+        low = cLow;
+    } else {
+        low = CALL(sylvan_ite, aLow, bLow, cLow, level);
+    }
+
+    if (n) {
+        bdd_refs_push(low);
+        high = bdd_refs_sync(SYNC(sylvan_ite));
+        bdd_refs_pop(1);
+    }
+
+    result = sylvan_makenode(level, low, high);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_ITE, a, b, c, result)) sylvan_stats_count(BDD_ITE_CACHEDPUT);
+    }
+
+    return mark ? sylvan_not(result) : result;
+}
+
+/**
+ * Calculate constrain a @ c
+ */
+TASK_IMPL_3(BDD, sylvan_constrain, BDD, a, BDD, b, BDDVAR, prev_level)
+{
+    /* Trivial cases */
+    if (b == sylvan_true) return a;
+    if (b == sylvan_false) return sylvan_false;
+    if (sylvan_isconst(a)) return a;
+    if (a == b) return sylvan_true;
+    if (a == sylvan_not(b)) return sylvan_false;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_CONSTRAIN);
+
+    // a != constant and b != constant
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR level = va < vb ? va : vb;
+
+    // CONSULT CACHE
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_CONSTRAIN, a, b, 0, &result)) {
+            sylvan_stats_count(BDD_CONSTRAIN_CACHED);
+            return result;
+        }
+    }
+
+    // DETERMINE TOP BDDVAR AND COFACTORS
+
+    BDD aLow, aHigh, bLow, bHigh;
+
+    if (level == va) {
+        aLow = node_low(a, na);
+        aHigh = node_high(a, na);
+    } else {
+        aLow = aHigh = a;
+    }
+
+    if (level == vb) {
+        bLow = node_low(b, nb);
+        bHigh = node_high(b, nb);
+    } else {
+        bLow = bHigh = b;
+    }
+
+    BDD result;
+
+    BDD low=sylvan_invalid, high=sylvan_invalid;
+    if (bLow == sylvan_false) return CALL(sylvan_constrain, aHigh, bHigh, level);
+    if (bLow == sylvan_true) {
+        if (bHigh == sylvan_false) return aLow;
+        if (bHigh == sylvan_true) {
+            result = sylvan_makenode(level, aLow, bHigh);
+        } else {
+            high = CALL(sylvan_constrain, aHigh, bHigh, level);
+            result = sylvan_makenode(level, aLow, high);
+        }
+    } else {
+        if (bHigh == sylvan_false) return CALL(sylvan_constrain, aLow, bLow, level);
+        if (bHigh == sylvan_true) {
+            low = CALL(sylvan_constrain, aLow, bLow, level);
+            result = sylvan_makenode(level, low, bHigh);
+        } else {
+            bdd_refs_spawn(SPAWN(sylvan_constrain, aLow, bLow, level));
+            high = CALL(sylvan_constrain, aHigh, bHigh, level);
+            bdd_refs_push(high);
+            low = bdd_refs_sync(SYNC(sylvan_constrain));
+            bdd_refs_pop(1);
+            result = sylvan_makenode(level, low, high);
+        }
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_CONSTRAIN, a, b, 0, result)) sylvan_stats_count(BDD_CONSTRAIN_CACHEDPUT);
+    }
+
+    return result;
+}
+
+/**
+ * Calculate restrict a @ b
+ */
+TASK_IMPL_3(BDD, sylvan_restrict, BDD, a, BDD, b, BDDVAR, prev_level)
+{
+    /* Trivial cases */
+    if (b == sylvan_true) return a;
+    if (b == sylvan_false) return sylvan_false;
+    if (sylvan_isconst(a)) return a;
+    if (a == b) return sylvan_true;
+    if (a == sylvan_not(b)) return sylvan_false;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_RESTRICT);
+
+    // a != constant and b != constant
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR level = va < vb ? va : vb;
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_RESTRICT, a, b, 0, &result)) {
+            sylvan_stats_count(BDD_RESTRICT_CACHED);
+            return result;
+        }
+    }
+
+    BDD result;
+
+    if (vb < va) {
+        BDD c = CALL(sylvan_ite, node_low(b,nb), sylvan_true, node_high(b,nb), 0);
+        bdd_refs_push(c);
+        result = CALL(sylvan_restrict, a, c, level);
+        bdd_refs_pop(1);
+    } else {
+        BDD aLow=node_low(a,na),aHigh=node_high(a,na),bLow=b,bHigh=b;
+        if (va == vb) {
+            bLow = node_low(b,nb);
+            bHigh = node_high(b,nb);
+        }
+        if (bLow == sylvan_false) {
+            result = CALL(sylvan_restrict, aHigh, bHigh, level);
+        } else if (bHigh == sylvan_false) {
+            result = CALL(sylvan_restrict, aLow, bLow, level);
+        } else {
+            bdd_refs_spawn(SPAWN(sylvan_restrict, aLow, bLow, level));
+            BDD high = CALL(sylvan_restrict, aHigh, bHigh, level);
+            bdd_refs_push(high);
+            BDD low = bdd_refs_sync(SYNC(sylvan_restrict));
+            bdd_refs_pop(1);
+            result = sylvan_makenode(level, low, high);
+        }
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_RESTRICT, a, b, 0, result)) sylvan_stats_count(BDD_RESTRICT_CACHEDPUT);
+    }
+
+    return result;
+}
+
+/**
+ * Calculates \exists variables . a
+ */
+TASK_IMPL_3(BDD, sylvan_exists, BDD, a, BDD, variables, BDDVAR, prev_level)
+{
+    /* Terminal cases */
+    if (a == sylvan_true) return sylvan_true;
+    if (a == sylvan_false) return sylvan_false;
+    if (sylvan_set_isempty(variables)) return a;
+
+    // a != constant
+    bddnode_t na = GETNODE(a);
+    BDDVAR level = bddnode_getvariable(na);
+
+    bddnode_t nv = GETNODE(variables);
+    BDDVAR vv = bddnode_getvariable(nv);
+    while (vv < level) {
+        variables = node_high(variables, nv);
+        if (sylvan_set_isempty(variables)) return a;
+        nv = GETNODE(variables);
+        vv = bddnode_getvariable(nv);
+    }
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_EXISTS);
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_EXISTS, a, variables, 0, &result)) {
+            sylvan_stats_count(BDD_EXISTS_CACHED);
+            return result;
+        }
+    }
+
+    // Get cofactors
+    BDD aLow = node_low(a, na);
+    BDD aHigh = node_high(a, na);
+
+    BDD result;
+
+    if (vv == level) {
+        // level is in variable set, perform abstraction
+        if (aLow == sylvan_true || aHigh == sylvan_true || aLow == sylvan_not(aHigh)) {
+            result = sylvan_true;
+        } else {
+            BDD _v = sylvan_set_next(variables);
+            BDD low = CALL(sylvan_exists, aLow, _v, level);
+            if (low == sylvan_true) {
+                result = sylvan_true;
+            } else {
+                bdd_refs_push(low);
+                BDD high = CALL(sylvan_exists, aHigh, _v, level);
+                if (high == sylvan_true) {
+                    result = sylvan_true;
+                    bdd_refs_pop(1);
+                } else if (low == sylvan_false && high == sylvan_false) {
+                    result = sylvan_false;
+                    bdd_refs_pop(1);
+                } else {
+                    bdd_refs_push(high);
+                    result = sylvan_or(low, high);
+                    bdd_refs_pop(2);
+                }
+            }
+        }
+    } else {
+        // level is not in variable set
+        BDD low, high;
+        bdd_refs_spawn(SPAWN(sylvan_exists, aHigh, variables, level));
+        low = CALL(sylvan_exists, aLow, variables, level);
+        bdd_refs_push(low);
+        high = bdd_refs_sync(SYNC(sylvan_exists));
+        bdd_refs_pop(1);
+        result = sylvan_makenode(level, low, high);
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_EXISTS, a, variables, 0, result)) sylvan_stats_count(BDD_EXISTS_CACHEDPUT);
+    }
+
+    return result;
+}
+
+/**
+ * Calculate exists(a AND b, v)
+ */
+TASK_IMPL_4(BDD, sylvan_and_exists, BDD, a, BDD, b, BDDSET, v, BDDVAR, prev_level)
+{
+    /* Terminal cases */
+    if (a == sylvan_false) return sylvan_false;
+    if (b == sylvan_false) return sylvan_false;
+    if (a == sylvan_not(b)) return sylvan_false;
+    if (a == sylvan_true && b == sylvan_true) return sylvan_true;
+
+    /* Cases that reduce to "exists" and "and" */
+    if (a == sylvan_true) return CALL(sylvan_exists, b, v, 0);
+    if (b == sylvan_true) return CALL(sylvan_exists, a, v, 0);
+    if (a == b) return CALL(sylvan_exists, a, v, 0);
+    if (sylvan_set_isempty(v)) return sylvan_and(a, b);
+
+    /* At this point, a and b are proper nodes, and v is non-empty */
+
+    /* Improve for caching */
+    if (BDD_STRIPMARK(a) > BDD_STRIPMARK(b)) {
+        BDD t = b;
+        b = a;
+        a = t;
+    }
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_AND_EXISTS);
+
+    // a != constant
+    bddnode_t na = GETNODE(a);
+    bddnode_t nb = GETNODE(b);
+    bddnode_t nv = GETNODE(v);
+
+    BDDVAR va = bddnode_getvariable(na);
+    BDDVAR vb = bddnode_getvariable(nb);
+    BDDVAR vv = bddnode_getvariable(nv);
+    BDDVAR level = va < vb ? va : vb;
+
+    /* Skip levels in v that are not in a and b */
+    while (vv < level) {
+        v = node_high(v, nv); // get next variable in conjunction
+        if (sylvan_set_isempty(v)) return sylvan_and(a, b);
+        nv = GETNODE(v);
+        vv = bddnode_getvariable(nv);
+    }
+
+    BDD result;
+
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        if (cache_get3(CACHE_BDD_AND_EXISTS, a, b, v, &result)) {
+            sylvan_stats_count(BDD_AND_EXISTS_CACHED);
+            return result;
+        }
+    }
+
+    // Get cofactors
+    BDD aLow, aHigh, bLow, bHigh;
+    if (level == va) {
+        aLow = node_low(a, na);
+        aHigh = node_high(a, na);
+    } else {
+        aLow = a;
+        aHigh = a;
+    }
+    if (level == vb) {
+        bLow = node_low(b, nb);
+        bHigh = node_high(b, nb);
+    } else {
+        bLow = b;
+        bHigh = b;
+    }
+
+    if (level == vv) {
+        // level is in variable set, perform abstraction
+        BDD _v = node_high(v, nv);
+        BDD low = CALL(sylvan_and_exists, aLow, bLow, _v, level);
+        if (low == sylvan_true || low == aHigh || low == bHigh) {
+            result = low;
+        } else {
+            bdd_refs_push(low);
+            BDD high;
+            if (low == sylvan_not(aHigh)) {
+                high = CALL(sylvan_exists, bHigh, _v, 0);
+            } else if (low == sylvan_not(bHigh)) {
+                high = CALL(sylvan_exists, aHigh, _v, 0);
+            } else {
+                high = CALL(sylvan_and_exists, aHigh, bHigh, _v, level);
+            }
+            if (high == sylvan_true) {
+                result = sylvan_true;
+                bdd_refs_pop(1);
+            } else if (high == sylvan_false) {
+                result = low;
+                bdd_refs_pop(1);
+            } else if (low == sylvan_false) {
+                result = high;
+                bdd_refs_pop(1);
+            } else {
+                bdd_refs_push(high);
+                result = sylvan_or(low, high);
+                bdd_refs_pop(2);
+            }
+        }
+    } else {
+        // level is not in variable set
+        bdd_refs_spawn(SPAWN(sylvan_and_exists, aHigh, bHigh, v, level));
+        BDD low = CALL(sylvan_and_exists, aLow, bLow, v, level);
+        bdd_refs_push(low);
+        BDD high = bdd_refs_sync(SYNC(sylvan_and_exists));
+        bdd_refs_pop(1);
+        result = sylvan_makenode(level, low, high);
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_AND_EXISTS, a, b, v, result)) sylvan_stats_count(BDD_AND_EXISTS_CACHEDPUT);
+    }
+
+    return result;
+}
+
+
+TASK_IMPL_4(BDD, sylvan_relnext, BDD, a, BDD, b, BDDSET, vars, BDDVAR, prev_level)
+{
+    /* Compute R(s) = \exists x: A(x) \and B(x,s) with support(result) = s, support(A) = s, support(B) = s+t
+     * if vars == sylvan_false, then every level is in s or t
+     * any other levels (outside s,t) in B are ignored / existentially quantified
+     */
+
+    /* Terminals */
+    if (a == sylvan_true && b == sylvan_true) return sylvan_true;
+    if (a == sylvan_false) return sylvan_false;
+    if (b == sylvan_false) return sylvan_false;
+    if (sylvan_set_isempty(vars)) return a;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_RELNEXT);
+
+    /* Determine top level */
+    bddnode_t na = sylvan_isconst(a) ? 0 : GETNODE(a);
+    bddnode_t nb = sylvan_isconst(b) ? 0 : GETNODE(b);
+
+    BDDVAR va = na ? bddnode_getvariable(na) : 0xffffffff;
+    BDDVAR vb = nb ? bddnode_getvariable(nb) : 0xffffffff;
+    BDDVAR level = va < vb ? va : vb;
+
+    /* Skip vars */
+    int is_s_or_t = 0;
+    bddnode_t nv = 0;
+    if (vars == sylvan_false) {
+        is_s_or_t = 1;
+    } else {
+        nv = GETNODE(vars);
+        for (;;) {
+            /* check if level is s/t */
+            BDDVAR vv = bddnode_getvariable(nv);
+            if (level == vv || (level^1) == vv) {
+                is_s_or_t = 1;
+                break;
+            }
+            /* check if level < s/t */
+            if (level < vv) break;
+            vars = node_high(vars, nv); // get next in vars
+            if (sylvan_set_isempty(vars)) return a;
+            nv = GETNODE(vars);
+        }
+    }
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_RELNEXT, a, b, vars, &result)) {
+            sylvan_stats_count(BDD_RELNEXT_CACHED);
+            return result;
+        }
+    }
+
+    BDD result;
+
+    if (is_s_or_t) {
+        /* Get s and t */
+        BDDVAR s = level & (~1);
+        BDDVAR t = s+1;
+
+        BDD a0, a1, b0, b1;
+        if (na && va == s) {
+            a0 = node_low(a, na);
+            a1 = node_high(a, na);
+        } else {
+            a0 = a1 = a;
+        }
+        if (nb && vb == s) {
+            b0 = node_low(b, nb);
+            b1 = node_high(b, nb);
+        } else {
+            b0 = b1 = b;
+        }
+
+        BDD b00, b01, b10, b11;
+        if (!sylvan_isconst(b0)) {
+            bddnode_t nb0 = GETNODE(b0);
+            if (bddnode_getvariable(nb0) == t) {
+                b00 = node_low(b0, nb0);
+                b01 = node_high(b0, nb0);
+            } else {
+                b00 = b01 = b0;
+            }
+        } else {
+            b00 = b01 = b0;
+        }
+        if (!sylvan_isconst(b1)) {
+            bddnode_t nb1 = GETNODE(b1);
+            if (bddnode_getvariable(nb1) == t) {
+                b10 = node_low(b1, nb1);
+                b11 = node_high(b1, nb1);
+            } else {
+                b10 = b11 = b1;
+            }
+        } else {
+            b10 = b11 = b1;
+        }
+
+        BDD _vars = vars == sylvan_false ? sylvan_false : node_high(vars, nv);
+
+        bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b00, _vars, level));
+        bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b10, _vars, level));
+        bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b01, _vars, level));
+        bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b11, _vars, level));
+
+        BDD f = bdd_refs_sync(SYNC(sylvan_relnext)); bdd_refs_push(f);
+        BDD e = bdd_refs_sync(SYNC(sylvan_relnext)); bdd_refs_push(e);
+        BDD d = bdd_refs_sync(SYNC(sylvan_relnext)); bdd_refs_push(d);
+        BDD c = bdd_refs_sync(SYNC(sylvan_relnext)); bdd_refs_push(c);
+
+        bdd_refs_spawn(SPAWN(sylvan_ite, c, sylvan_true, d, 0)); /* a0 b00  \or  a1 b01 */
+        bdd_refs_spawn(SPAWN(sylvan_ite, e, sylvan_true, f, 0)); /* a0 b01  \or  a1 b11 */
+
+        /* R1 */ d = bdd_refs_sync(SYNC(sylvan_ite)); bdd_refs_push(d);
+        /* R0 */ c = bdd_refs_sync(SYNC(sylvan_ite)); // not necessary: bdd_refs_push(c);
+
+        bdd_refs_pop(5);
+        result = sylvan_makenode(s, c, d);
+    } else {
+        /* Variable not in vars! Take a, quantify b */
+        BDD a0, a1, b0, b1;
+        if (na && va == level) {
+            a0 = node_low(a, na);
+            a1 = node_high(a, na);
+        } else {
+            a0 = a1 = a;
+        }
+        if (nb && vb == level) {
+            b0 = node_low(b, nb);
+            b1 = node_high(b, nb);
+        } else {
+            b0 = b1 = b;
+        }
+
+        if (b0 != b1) {
+            if (a0 == a1) {
+                /* Quantify "b" variables */
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b1, vars, level));
+
+                BDD r1 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r1);
+                BDD r0 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r0);
+                result = sylvan_or(r0, r1);
+                bdd_refs_pop(2);
+            } else {
+                /* Quantify "b" variables, but keep "a" variables */
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b1, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b1, vars, level));
+
+                BDD r11 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r11);
+                BDD r10 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r10);
+                BDD r01 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r01);
+                BDD r00 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r00);
+
+                bdd_refs_spawn(SPAWN(sylvan_ite, r00, sylvan_true, r01, 0));
+                bdd_refs_spawn(SPAWN(sylvan_ite, r10, sylvan_true, r11, 0));
+
+                BDD r1 = bdd_refs_sync(SYNC(sylvan_ite));
+                bdd_refs_push(r1);
+                BDD r0 = bdd_refs_sync(SYNC(sylvan_ite));
+                bdd_refs_pop(5);
+
+                result = sylvan_makenode(level, r0, r1);
+            }
+        } else {
+            /* Keep "a" variables */
+            bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b0, vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b1, vars, level));
+
+            BDD r1 = bdd_refs_sync(SYNC(sylvan_relnext));
+            bdd_refs_push(r1);
+            BDD r0 = bdd_refs_sync(SYNC(sylvan_relnext));
+            bdd_refs_pop(1);
+            result = sylvan_makenode(level, r0, r1);
+        }
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_RELNEXT, a, b, vars, result)) sylvan_stats_count(BDD_RELNEXT_CACHEDPUT);
+    }
+
+    return result;
+}
+
+TASK_IMPL_4(BDD, sylvan_relprev, BDD, a, BDD, b, BDDSET, vars, BDDVAR, prev_level)
+{
+    /* Compute \exists x: A(s,x) \and B(x,t)
+     * if vars == sylvan_false, then every level is in s or t
+     * any other levels (outside s,t) in A are ignored / existentially quantified
+     */
+
+    /* Terminals */
+    if (a == sylvan_true && b == sylvan_true) return sylvan_true;
+    if (a == sylvan_false) return sylvan_false;
+    if (b == sylvan_false) return sylvan_false;
+    if (sylvan_set_isempty(vars)) return b;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_RELPREV);
+
+    /* Determine top level */
+    bddnode_t na = sylvan_isconst(a) ? 0 : GETNODE(a);
+    bddnode_t nb = sylvan_isconst(b) ? 0 : GETNODE(b);
+
+    BDDVAR va = na ? bddnode_getvariable(na) : 0xffffffff;
+    BDDVAR vb = nb ? bddnode_getvariable(nb) : 0xffffffff;
+    BDDVAR level = va < vb ? va : vb;
+
+    /* Skip vars */
+    int is_s_or_t = 0;
+    bddnode_t nv = 0;
+    if (vars == sylvan_false) {
+        is_s_or_t = 1;
+    } else {
+        nv = GETNODE(vars);
+        for (;;) {
+            /* check if level is s/t */
+            BDDVAR vv = bddnode_getvariable(nv);
+            if (level == vv || (level^1) == vv) {
+                is_s_or_t = 1;
+                break;
+            }
+            /* check if level < s/t */
+            if (level < vv) break;
+            vars = node_high(vars, nv); // get next in vars
+            if (sylvan_set_isempty(vars)) return b;
+            nv = GETNODE(vars);
+        }
+    }
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_RELPREV, a, b, vars, &result)) {
+            sylvan_stats_count(BDD_RELPREV_CACHED);
+            return result;
+        }
+    }
+
+    BDD result;
+
+    if (is_s_or_t) {
+        /* Get s and t */
+        BDDVAR s = level & (~1);
+        BDDVAR t = s+1;
+
+        BDD a0, a1, b0, b1;
+        if (na && va == s) {
+            a0 = node_low(a, na);
+            a1 = node_high(a, na);
+        } else {
+            a0 = a1 = a;
+        }
+        if (nb && vb == s) {
+            b0 = node_low(b, nb);
+            b1 = node_high(b, nb);
+        } else {
+            b0 = b1 = b;
+        }
+
+        BDD a00, a01, a10, a11;
+        if (!sylvan_isconst(a0)) {
+            bddnode_t na0 = GETNODE(a0);
+            if (bddnode_getvariable(na0) == t) {
+                a00 = node_low(a0, na0);
+                a01 = node_high(a0, na0);
+            } else {
+                a00 = a01 = a0;
+            }
+        } else {
+            a00 = a01 = a0;
+        }
+        if (!sylvan_isconst(a1)) {
+            bddnode_t na1 = GETNODE(a1);
+            if (bddnode_getvariable(na1) == t) {
+                a10 = node_low(a1, na1);
+                a11 = node_high(a1, na1);
+            } else {
+                a10 = a11 = a1;
+            }
+        } else {
+            a10 = a11 = a1;
+        }
+
+        BDD b00, b01, b10, b11;
+        if (!sylvan_isconst(b0)) {
+            bddnode_t nb0 = GETNODE(b0);
+            if (bddnode_getvariable(nb0) == t) {
+                b00 = node_low(b0, nb0);
+                b01 = node_high(b0, nb0);
+            } else {
+                b00 = b01 = b0;
+            }
+        } else {
+            b00 = b01 = b0;
+        }
+        if (!sylvan_isconst(b1)) {
+            bddnode_t nb1 = GETNODE(b1);
+            if (bddnode_getvariable(nb1) == t) {
+                b10 = node_low(b1, nb1);
+                b11 = node_high(b1, nb1);
+            } else {
+                b10 = b11 = b1;
+            }
+        } else {
+            b10 = b11 = b1;
+        }
+
+        BDD _vars;
+        if (vars != sylvan_false) {
+            _vars = node_high(vars, nv);
+            if (sylvan_set_var(_vars) == t) _vars = sylvan_set_next(_vars);
+        } else {
+            _vars = sylvan_false;
+        }
+
+        if (b00 == b01) {
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a00, b0, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a10, b0, _vars, level));
+        } else {
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a00, b00, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a00, b01, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a10, b00, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a10, b01, _vars, level));
+        }
+
+        if (b10 == b11) {
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a01, b1, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a11, b1, _vars, level));
+        } else {
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a01, b10, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a01, b11, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a11, b10, _vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a11, b11, _vars, level));
+        }
+
+        BDD r00, r01, r10, r11;
+
+        if (b10 == b11) {
+            r11 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r01 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+        } else {
+            BDD r111 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            BDD r110 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r11 = sylvan_makenode(t, r110, r111);
+            bdd_refs_pop(2);
+            bdd_refs_push(r11);
+            BDD r011 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            BDD r010 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r01 = sylvan_makenode(t, r010, r011);
+            bdd_refs_pop(2);
+            bdd_refs_push(r01);
+        }
+
+        if (b00 == b01) {
+            r10 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r00 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+        } else {
+            BDD r101 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            BDD r100 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r10 = sylvan_makenode(t, r100, r101);
+            bdd_refs_pop(2);
+            bdd_refs_push(r10);
+            BDD r001 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            BDD r000 = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_relprev)));
+            r00 = sylvan_makenode(t, r000, r001);
+            bdd_refs_pop(2);
+            bdd_refs_push(r00);
+         }
+
+        bdd_refs_spawn(SPAWN(sylvan_and, sylvan_not(r00), sylvan_not(r01), 0));
+        bdd_refs_spawn(SPAWN(sylvan_and, sylvan_not(r10), sylvan_not(r11), 0));
+
+        BDD r1 = sylvan_not(bdd_refs_push(bdd_refs_sync(SYNC(sylvan_and))));
+        BDD r0 = sylvan_not(bdd_refs_sync(SYNC(sylvan_and)));
+        bdd_refs_pop(5);
+        result = sylvan_makenode(s, r0, r1);
+    } else {
+        BDD a0, a1, b0, b1;
+        if (na && va == level) {
+            a0 = node_low(a, na);
+            a1 = node_high(a, na);
+        } else {
+            a0 = a1 = a;
+        }
+        if (nb && vb == level) {
+            b0 = node_low(b, nb);
+            b1 = node_high(b, nb);
+        } else {
+            b0 = b1 = b;
+        }
+
+        if (a0 != a1) {
+            if (b0 == b1) {
+                /* Quantify "a" variables */
+                bdd_refs_spawn(SPAWN(sylvan_relprev, a0, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relprev, a1, b1, vars, level));
+
+                BDD r1 = bdd_refs_sync(SYNC(sylvan_relprev));
+                bdd_refs_push(r1);
+                BDD r0 = bdd_refs_sync(SYNC(sylvan_relprev));
+                bdd_refs_push(r0);
+                result = CALL(sylvan_ite, r0, sylvan_true, r1, 0);
+                bdd_refs_pop(2);
+
+            } else {
+                /* Quantify "a" variables, but keep "b" variables */
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b0, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a0, b1, vars, level));
+                bdd_refs_spawn(SPAWN(sylvan_relnext, a1, b1, vars, level));
+
+                BDD r11 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r11);
+                BDD r01 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r01);
+                BDD r10 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r10);
+                BDD r00 = bdd_refs_sync(SYNC(sylvan_relnext));
+                bdd_refs_push(r00);
+
+                bdd_refs_spawn(SPAWN(sylvan_ite, r00, sylvan_true, r10, 0));
+                bdd_refs_spawn(SPAWN(sylvan_ite, r01, sylvan_true, r11, 0));
+
+                BDD r1 = bdd_refs_sync(SYNC(sylvan_ite));
+                bdd_refs_push(r1);
+                BDD r0 = bdd_refs_sync(SYNC(sylvan_ite));
+                bdd_refs_pop(5);
+
+                result = sylvan_makenode(level, r0, r1);
+            }
+        } else {
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a0, b0, vars, level));
+            bdd_refs_spawn(SPAWN(sylvan_relprev, a1, b1, vars, level));
+
+            BDD r1 = bdd_refs_sync(SYNC(sylvan_relprev));
+            bdd_refs_push(r1);
+            BDD r0 = bdd_refs_sync(SYNC(sylvan_relprev));
+            bdd_refs_pop(1);
+            result = sylvan_makenode(level, r0, r1);
+        }
+    }
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_RELPREV, a, b, vars, result)) sylvan_stats_count(BDD_RELPREV_CACHEDPUT);
+    }
+
+    return result;
+}
+
+/**
+ * Computes the transitive closure by traversing the BDD recursively.
+ * See Y. Matsunaga, P. C. McGeer, R. K. Brayton
+ *     On Computing the Transitive Closre of a State Transition Relation
+ *     30th ACM Design Automation Conference, 1993.
+ */
+TASK_IMPL_2(BDD, sylvan_closure, BDD, a, BDDVAR, prev_level)
+{
+    /* Terminals */
+    if (a == sylvan_true) return a;
+    if (a == sylvan_false) return a;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_CLOSURE);
+
+    /* Determine top level */
+    bddnode_t n = GETNODE(a);
+    BDDVAR level = bddnode_getvariable(n);
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_CLOSURE, a, 0, 0, &result)) {
+            sylvan_stats_count(BDD_CLOSURE_CACHED);
+            return result;
+        }
+    }
+
+    BDDVAR s = level & (~1);
+    BDDVAR t = s+1;
+
+    BDD a0, a1;
+    if (level == s) {
+        a0 = node_low(a, n);
+        a1 = node_high(a, n);
+    } else {
+        a0 = a1 = a;
+    }
+
+    BDD a00, a01, a10, a11;
+    if (!sylvan_isconst(a0)) {
+        bddnode_t na0 = GETNODE(a0);
+        if (bddnode_getvariable(na0) == t) {
+            a00 = node_low(a0, na0);
+            a01 = node_high(a0, na0);
+        } else {
+            a00 = a01 = a0;
+        }
+    } else {
+        a00 = a01 = a0;
+    }
+    if (!sylvan_isconst(a1)) {
+        bddnode_t na1 = GETNODE(a1);
+        if (bddnode_getvariable(na1) == t) {
+            a10 = node_low(a1, na1);
+            a11 = node_high(a1, na1);
+        } else {
+            a10 = a11 = a1;
+        }
+    } else {
+        a10 = a11 = a1;
+    }
+
+    BDD u1 = CALL(sylvan_closure, a11, level);
+    bdd_refs_push(u1);
+    /* u3 = */ bdd_refs_spawn(SPAWN(sylvan_relprev, a01, u1, sylvan_false, level));
+    BDD u2 = CALL(sylvan_relprev, u1, a10, sylvan_false, level);
+    bdd_refs_push(u2);
+    BDD e = CALL(sylvan_relprev, a01, u2, sylvan_false, level);
+    bdd_refs_push(e);
+    e = CALL(sylvan_ite, a00, sylvan_true, e, level);
+    bdd_refs_pop(1);
+    bdd_refs_push(e);
+    e = CALL(sylvan_closure, e, level);
+    bdd_refs_pop(1);
+    bdd_refs_push(e);
+    BDD g = CALL(sylvan_relprev, u2, e, sylvan_false, level);
+    bdd_refs_push(g);
+    BDD u3 = bdd_refs_sync(SYNC(sylvan_relprev));
+    bdd_refs_push(u3);
+    BDD f = CALL(sylvan_relprev, e, u3, sylvan_false, level);
+    bdd_refs_push(f);
+    BDD h = CALL(sylvan_relprev, u2, f, sylvan_false, level);
+    bdd_refs_push(h);
+    h = CALL(sylvan_ite, u1, sylvan_true, h, level);
+    bdd_refs_pop(1);
+    bdd_refs_push(h);
+
+    BDD r0, r1;
+    /* R0 */ r0 = sylvan_makenode(t, e, f);
+    bdd_refs_pop(7);
+    bdd_refs_push(r0);
+    /* R1 */ r1 = sylvan_makenode(t, g, h);
+    bdd_refs_pop(1);
+    BDD result = sylvan_makenode(s, r0, r1);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_CLOSURE, a, 0, 0, result)) sylvan_stats_count(BDD_CLOSURE_CACHEDPUT);
+    }
+
+    return result;
+}
+
+
+/**
+ * Function composition
+ */
+TASK_IMPL_3(BDD, sylvan_compose, BDD, a, BDDMAP, map, BDDVAR, prev_level)
+{
+    /* Trivial cases */
+    if (a == sylvan_false || a == sylvan_true) return a;
+    if (sylvan_map_isempty(map)) return a;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Count operation */
+    sylvan_stats_count(BDD_COMPOSE);
+
+    /* Determine top level */
+    bddnode_t n = GETNODE(a);
+    BDDVAR level = bddnode_getvariable(n);
+
+    /* Skip map */
+    bddnode_t map_node = GETNODE(map);
+    BDDVAR map_var = bddnode_getvariable(map_node);
+    while (map_var < level) {
+        map = node_low(map, map_node);
+        if (sylvan_map_isempty(map)) return a;
+        map_node = GETNODE(map);
+        map_var = bddnode_getvariable(map_node);
+    }
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        BDD result;
+        if (cache_get3(CACHE_BDD_COMPOSE, a, map, 0, &result)) {
+            sylvan_stats_count(BDD_COMPOSE_CACHED);
+            return result;
+        }
+    }
+
+    /* Recursively calculate low and high */
+    bdd_refs_spawn(SPAWN(sylvan_compose, node_low(a, n), map, level));
+    BDD high = CALL(sylvan_compose, node_high(a, n), map, level);
+    bdd_refs_push(high);
+    BDD low = bdd_refs_sync(SYNC(sylvan_compose));
+    bdd_refs_push(low);
+
+    /* Calculate result */
+    BDD root = map_var == level ? node_high(map, map_node) : sylvan_ithvar(level);
+    bdd_refs_push(root);
+    BDD result = CALL(sylvan_ite, root, high, low, 0);
+    bdd_refs_pop(3);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_COMPOSE, a, map, 0, result)) sylvan_stats_count(BDD_COMPOSE_CACHEDPUT);
+    }
+
+    return result;
+}
+
+/**
+ * Count number of nodes in BDD
+ */
+uint64_t sylvan_nodecount_do_1(BDD a)
+{
+    if (sylvan_isconst(a)) return 0;
+    bddnode_t na = GETNODE(a);
+    if (bddnode_getmark(na)) return 0;
+    bddnode_setmark(na, 1);
+    uint64_t result = 1;
+    result += sylvan_nodecount_do_1(bddnode_getlow(na));
+    result += sylvan_nodecount_do_1(bddnode_gethigh(na));
+    return result;
+}
+
+void sylvan_nodecount_do_2(BDD a)
+{
+    if (sylvan_isconst(a)) return;
+    bddnode_t na = GETNODE(a);
+    if (!bddnode_getmark(na)) return;
+    bddnode_setmark(na, 0);
+    sylvan_nodecount_do_2(bddnode_getlow(na));
+    sylvan_nodecount_do_2(bddnode_gethigh(na));
+}
+
+size_t sylvan_nodecount(BDD a)
+{
+    uint32_t result = sylvan_nodecount_do_1(a);
+    sylvan_nodecount_do_2(a);
+    return result;
+}
+
+/**
+ * Calculate the number of distinct paths to True.
+ */
+TASK_IMPL_2(double, sylvan_pathcount, BDD, bdd, BDDVAR, prev_level)
+{
+    /* Trivial cases */
+    if (bdd == sylvan_false) return 0.0;
+    if (bdd == sylvan_true) return 1.0;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_PATHCOUNT);
+
+    BDD level = sylvan_var(bdd);
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != level / granularity;
+    if (cachenow) {
+        double result;
+        if (cache_get3(CACHE_BDD_PATHCOUNT, bdd, 0, 0, (uint64_t*)&result)) {
+            sylvan_stats_count(BDD_PATHCOUNT_CACHED);
+            return result;
+        }
+    }
+
+    SPAWN(sylvan_pathcount, sylvan_low(bdd), level);
+    SPAWN(sylvan_pathcount, sylvan_high(bdd), level);
+    double res1 = SYNC(sylvan_pathcount);
+    res1 += SYNC(sylvan_pathcount);
+
+    if (cachenow) {
+        if (cache_put3(CACHE_BDD_PATHCOUNT, bdd, 0, 0, *(uint64_t*)&res1)) sylvan_stats_count(BDD_PATHCOUNT_CACHEDPUT);
+    }
+
+    return res1;
+}
+
+/**
+ * Calculate the number of satisfying variable assignments according to <variables>.
+ */
+TASK_IMPL_3(double, sylvan_satcount, BDD, bdd, BDDSET, variables, BDDVAR, prev_level)
+{
+    /* Trivial cases */
+    if (bdd == sylvan_false) return 0.0;
+    if (bdd == sylvan_true) return powl(2.0L, sylvan_set_count(variables));
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_SATCOUNT);
+
+    /* Count variables before var(bdd) */
+    size_t skipped = 0;
+    BDDVAR var = sylvan_var(bdd);
+    bddnode_t set_node = GETNODE(variables);
+    BDDVAR set_var = bddnode_getvariable(set_node);
+    while (var != set_var) {
+        skipped++;
+        variables = node_high(variables, set_node);
+        // if this assertion fails, then variables is not the support of <bdd>
+        assert(!sylvan_set_isempty(variables));
+        set_node = GETNODE(variables);
+        set_var = bddnode_getvariable(set_node);
+    }
+
+    union {
+        double d;
+        uint64_t s;
+    } hack;
+
+    /* Consult cache */
+    int cachenow = granularity < 2 || prev_level == 0 ? 1 : prev_level / granularity != var / granularity;
+    if (cachenow) {
+        if (cache_get3(CACHE_BDD_SATCOUNT, bdd, variables, 0, &hack.s)) {
+            sylvan_stats_count(BDD_SATCOUNT_CACHED);
+            return hack.d * powl(2.0L, skipped);
+        }
+    }
+
+    SPAWN(sylvan_satcount, sylvan_high(bdd), node_high(variables, set_node), var);
+    double low = CALL(sylvan_satcount, sylvan_low(bdd), node_high(variables, set_node), var);
+    double result = low + SYNC(sylvan_satcount);
+
+    if (cachenow) {
+        hack.d = result;
+        if (cache_put3(CACHE_BDD_SATCOUNT, bdd, variables, 0, hack.s)) sylvan_stats_count(BDD_SATCOUNT_CACHEDPUT);
+    }
+
+    return result * powl(2.0L, skipped);
+}
+
+int
+sylvan_sat_one(BDD bdd, BDDSET vars, uint8_t *str)
+{
+    if (bdd == sylvan_false) return 0;
+    if (str == NULL) return 0;
+    if (sylvan_set_isempty(vars)) return 1;
+
+    for (;;) {
+        bddnode_t n_vars = GETNODE(vars);
+        if (bdd == sylvan_true) {
+            *str = 0;
+        } else {
+            bddnode_t n_bdd = GETNODE(bdd);
+            if (bddnode_getvariable(n_bdd) != bddnode_getvariable(n_vars)) {
+                *str = 0;
+            } else {
+                if (node_low(bdd, n_bdd) == sylvan_false) {
+                    // take high edge
+                    *str = 1;
+                    bdd = node_high(bdd, n_bdd);
+                } else {
+                    // take low edge
+                    *str = 0;
+                    bdd = node_low(bdd, n_bdd);
+                }
+            }
+        }
+        vars = node_high(vars, n_vars);
+        if (sylvan_set_isempty(vars)) break;
+        str++;
+    }
+
+    return 1;
+}
+
+BDD
+sylvan_sat_one_bdd(BDD bdd)
+{
+    if (bdd == sylvan_false) return sylvan_false;
+    if (bdd == sylvan_true) return sylvan_true;
+
+    bddnode_t node = GETNODE(bdd);
+    BDD low = node_low(bdd, node);
+    BDD high = node_high(bdd, node);
+
+    BDD m;
+
+    BDD result;
+    if (low == sylvan_false) {
+        m = sylvan_sat_one_bdd(high);
+        result = sylvan_makenode(bddnode_getvariable(node), sylvan_false, m);
+    } else if (high == sylvan_false) {
+        m = sylvan_sat_one_bdd(low);
+        result = sylvan_makenode(bddnode_getvariable(node), m, sylvan_false);
+    } else {
+        if (rand() & 0x2000) {
+            m = sylvan_sat_one_bdd(low);
+            result = sylvan_makenode(bddnode_getvariable(node), m, sylvan_false);
+        } else {
+            m = sylvan_sat_one_bdd(high);
+            result = sylvan_makenode(bddnode_getvariable(node), sylvan_false, m);
+        }
+    }
+
+    return result;
+}
+
+BDD
+sylvan_cube(BDDSET vars, uint8_t *cube)
+{
+    if (sylvan_set_isempty(vars)) return sylvan_true;
+
+    bddnode_t n = GETNODE(vars);
+    BDDVAR v = bddnode_getvariable(n);
+    vars = node_high(vars, n);
+
+    BDD result = sylvan_cube(vars, cube+1);
+    if (*cube == 0) {
+        result = sylvan_makenode(v, result, sylvan_false);
+    } else if (*cube == 1) {
+        result = sylvan_makenode(v, sylvan_false, result);
+    }
+
+    return result;
+}
+
+TASK_IMPL_3(BDD, sylvan_union_cube, BDD, bdd, BDDSET, vars, uint8_t *, cube)
+{
+    /* Terminal cases */
+    if (bdd == sylvan_true) return sylvan_true;
+    if (bdd == sylvan_false) return sylvan_cube(vars, cube);
+    if (sylvan_set_isempty(vars)) return sylvan_true;
+
+    bddnode_t nv = GETNODE(vars);
+
+    for (;;) {
+        if (*cube == 0 || *cube == 1) break;
+        // *cube should be 2
+        cube++;
+        vars = node_high(vars, nv);
+        if (sylvan_set_isempty(vars)) return sylvan_true;
+        nv = GETNODE(vars);
+    }
+
+    sylvan_gc_test();
+
+    // missing: SV_CNT_OP
+
+    bddnode_t n = GETNODE(bdd);
+    BDD result = bdd;
+    BDDVAR v = bddnode_getvariable(nv);
+    BDDVAR n_level = bddnode_getvariable(n);
+
+    if (v < n_level) {
+        vars = node_high(vars, nv);
+        if (*cube == 0) {
+            result = sylvan_union_cube(bdd, vars, cube+1);
+            result = sylvan_makenode(v, result, bdd);
+        } else /* *cube == 1 */ {
+            result = sylvan_union_cube(bdd, vars, cube+1);
+            result = sylvan_makenode(v, bdd, result);
+        }
+    } else if (v > n_level) {
+        BDD high = node_high(bdd, n);
+        BDD low = node_low(bdd, n);
+        SPAWN(sylvan_union_cube, high, vars, cube);
+        BDD new_low = sylvan_union_cube(low, vars, cube);
+        bdd_refs_push(new_low);
+        BDD new_high = SYNC(sylvan_union_cube);
+        bdd_refs_pop(1);
+        if (new_low != low || new_high != high) {
+            result = sylvan_makenode(n_level, new_low, new_high);
+        }
+    } else /* v == n_level */ {
+        vars = node_high(vars, nv);
+        BDD high = node_high(bdd, n);
+        BDD low = node_low(bdd, n);
+        if (*cube == 0) {
+            BDD new_low = sylvan_union_cube(low, vars, cube+1);
+            if (new_low != low) {
+                result = sylvan_makenode(n_level, new_low, high);
+            }
+        } else /* *cube == 1 */ {
+            BDD new_high = sylvan_union_cube(high, vars, cube+1);
+            if (new_high != high) {
+                result = sylvan_makenode(n_level, low, new_high);
+            }
+        }
+    }
+
+    return result;
+}
+
+struct bdd_path
+{
+    struct bdd_path *prev;
+    BDDVAR var;
+    int8_t val; // 0=false, 1=true, 2=both
+};
+
+VOID_TASK_5(sylvan_enum_do, BDD, bdd, BDDSET, vars, enum_cb, cb, void*, context, struct bdd_path*, path)
+{
+    if (bdd == sylvan_false) return;
+
+    if (sylvan_set_isempty(vars)) {
+        /* bdd should now be true */
+        assert(bdd == sylvan_true);
+        /* compute length of path */
+        int i=0;
+        struct bdd_path *pp;
+        for (pp = path; pp != NULL; pp = pp->prev) i++;
+        /* if length is 0 (enum called with empty vars??), return */
+        if (i == 0) return;
+        /* fill cube and vars with trace */
+        uint8_t cube[i];
+        BDDVAR vars[i];
+        int j=0;
+        for (pp = path; pp != NULL; pp = pp->prev) {
+            cube[i-j-1] = pp->val;
+            vars[i-j-1] = pp->var;
+            j++;
+        }
+        /* call callback */
+        WRAP(cb, context, vars, cube, i);
+        return;
+    }
+
+    BDDVAR var = sylvan_var(vars);
+    vars = sylvan_set_next(vars);
+    BDDVAR bdd_var = sylvan_var(bdd);
+
+    /* assert var <= bdd_var */
+    if (bdd == sylvan_true || var < bdd_var) {
+        struct bdd_path pp0 = (struct bdd_path){path, var, 0};
+        CALL(sylvan_enum_do, bdd, vars, cb, context, &pp0);
+        struct bdd_path pp1 = (struct bdd_path){path, var, 1};
+        CALL(sylvan_enum_do, bdd, vars, cb, context, &pp1);
+    } else if (var == bdd_var) {
+        struct bdd_path pp0 = (struct bdd_path){path, var, 0};
+        CALL(sylvan_enum_do, sylvan_low(bdd), vars, cb, context, &pp0);
+        struct bdd_path pp1 = (struct bdd_path){path, var, 1};
+        CALL(sylvan_enum_do, sylvan_high(bdd), vars, cb, context, &pp1);
+    } else {
+        printf("var %u not expected (expecting %u)!\n", bdd_var, var);
+        assert(var <= bdd_var);
+    }
+}
+
+VOID_TASK_5(sylvan_enum_par_do, BDD, bdd, BDDSET, vars, enum_cb, cb, void*, context, struct bdd_path*, path)
+{
+    if (bdd == sylvan_false) return;
+
+    if (sylvan_set_isempty(vars)) {
+        /* bdd should now be true */
+        assert(bdd == sylvan_true);
+        /* compute length of path */
+        int i=0;
+        struct bdd_path *pp;
+        for (pp = path; pp != NULL; pp = pp->prev) i++;
+        /* if length is 0 (enum called with empty vars??), return */
+        if (i == 0) return;
+        /* fill cube and vars with trace */
+        uint8_t cube[i];
+        BDDVAR vars[i];
+        int j=0;
+        for (pp = path; pp != NULL; pp = pp->prev) {
+            cube[i-j-1] = pp->val;
+            vars[i-j-1] = pp->var;
+            j++;
+        }
+        /* call callback */
+        WRAP(cb, context, vars, cube, i);
+        return;
+    }
+
+    BDD var = sylvan_var(vars);
+    vars = sylvan_set_next(vars);
+    BDD bdd_var = sylvan_var(bdd);
+
+    /* assert var <= bdd_var */
+    if (var < bdd_var) {
+        struct bdd_path pp1 = (struct bdd_path){path, var, 1};
+        SPAWN(sylvan_enum_par_do, bdd, vars, cb, context, &pp1);
+        struct bdd_path pp0 = (struct bdd_path){path, var, 0};
+        CALL(sylvan_enum_par_do, bdd, vars, cb, context, &pp0);
+        SYNC(sylvan_enum_par_do);
+    } else if (var == bdd_var) {
+        struct bdd_path pp1 = (struct bdd_path){path, var, 1};
+        SPAWN(sylvan_enum_par_do, sylvan_high(bdd), vars, cb, context, &pp1);
+        struct bdd_path pp0 = (struct bdd_path){path, var, 0};
+        CALL(sylvan_enum_par_do, sylvan_low(bdd), vars, cb, context, &pp0);
+        SYNC(sylvan_enum_par_do);
+    } else {
+        assert(var <= bdd_var);
+    }
+}
+
+VOID_TASK_IMPL_4(sylvan_enum, BDD, bdd, BDDSET, vars, enum_cb, cb, void*, context)
+{
+    CALL(sylvan_enum_do, bdd, vars, cb, context, 0);
+}
+
+VOID_TASK_IMPL_4(sylvan_enum_par, BDD, bdd, BDDSET, vars, enum_cb, cb, void*, context)
+{
+    CALL(sylvan_enum_par_do, bdd, vars, cb, context, 0);
+}
+
+TASK_5(BDD, sylvan_collect_do, BDD, bdd, BDDSET, vars, sylvan_collect_cb, cb, void*, context, struct bdd_path*, path)
+{
+    if (bdd == sylvan_false) return sylvan_false;
+
+    if (sylvan_set_isempty(vars)) {
+        /* compute length of path */
+        int i=0;
+        struct bdd_path *pp;
+        for (pp = path; pp != NULL; pp = pp->prev) i++;
+        /* if length is 0 (enum called with empty vars??), return */
+        if (i == 0) return WRAP(cb, context, NULL);
+        /* fill cube and vars with trace */
+        uint8_t cube[i];
+        int j=0;
+        for (pp = path; pp != NULL; pp = pp->prev) {
+            cube[i-j-1] = pp->val;
+            j++;
+        }
+        /* call callback */
+        return WRAP(cb, context, cube);
+    } else {
+        BDD var = sylvan_var(vars);
+        vars = sylvan_set_next(vars);
+        BDD bdd_var = sylvan_var(bdd);
+
+        /* if fails, then <bdd> has variables not in <vars> */
+        assert(var <= bdd_var);
+
+        struct bdd_path pp1 = (struct bdd_path){path, var, 1};
+        struct bdd_path pp0 = (struct bdd_path){path, var, 0};
+        if (var < bdd_var) {
+            bdd_refs_spawn(SPAWN(sylvan_collect_do, bdd, vars, cb, context, &pp1));
+            BDD low = bdd_refs_push(CALL(sylvan_collect_do, bdd, vars, cb, context, &pp0));
+            BDD high = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_collect_do)));
+            BDD res = sylvan_or(low, high);
+            bdd_refs_pop(2);
+            return res;
+        } else if (var == bdd_var) {
+            bdd_refs_spawn(SPAWN(sylvan_collect_do, sylvan_high(bdd), vars, cb, context, &pp1));
+            BDD low = bdd_refs_push(CALL(sylvan_collect_do, sylvan_low(bdd), vars, cb, context, &pp0));
+            BDD high = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_collect_do)));
+            BDD res = sylvan_or(low, high);
+            bdd_refs_pop(2);
+            return res;
+        } else {
+            return sylvan_invalid; // unreachable
+        }
+    }
+}
+
+TASK_IMPL_4(BDD, sylvan_collect, BDD, bdd, BDDSET, vars, sylvan_collect_cb, cb, void*, context)
+{
+    return CALL(sylvan_collect_do, bdd, vars, cb, context, 0);
+}
+
+/**
+ * IMPLEMENTATION OF BDDSET
+ */
+
+int
+sylvan_set_in(BDDSET set, BDDVAR level)
+{
+    while (!sylvan_set_isempty(set)) {
+        bddnode_t n = GETNODE(set);
+        BDDVAR n_level = bddnode_getvariable(n);
+        if (n_level == level) return 1;
+        if (n_level > level) return 0; // BDDs are ordered
+        set = node_high(set, n);
+    }
+
+    return 0;
+}
+
+size_t
+sylvan_set_count(BDDSET set)
+{
+    size_t result = 0;
+    for (;!sylvan_set_isempty(set);set = sylvan_set_next(set)) result++;
+    return result;
+}
+
+void
+sylvan_set_toarray(BDDSET set, BDDVAR *arr)
+{
+    size_t i = 0;
+    while (!sylvan_set_isempty(set)) {
+        bddnode_t n = GETNODE(set);
+        arr[i++] = bddnode_getvariable(n);
+        set = node_high(set, n);
+    }
+}
+
+TASK_IMPL_2(BDDSET, sylvan_set_fromarray, BDDVAR*, arr, size_t, length)
+{
+    if (length == 0) return sylvan_set_empty();
+    BDDSET sub = sylvan_set_fromarray(arr+1, length-1);
+    bdd_refs_push(sub);
+    BDDSET result = sylvan_set_add(sub, *arr);
+    bdd_refs_pop(1);
+    return result;
+}
+
+void
+sylvan_test_isset(BDDSET set)
+{
+    while (set != sylvan_false) {
+        assert(set != sylvan_true);
+        assert(llmsset_is_marked(nodes, set));
+        bddnode_t n = GETNODE(set);
+        assert(node_low(set, n) == sylvan_true);
+        set = node_high(set, n);
+    }
+}
+
+/**
+ * IMPLEMENTATION OF BDDMAP
+ */
+
+BDDMAP
+sylvan_map_add(BDDMAP map, BDDVAR key, BDD value)
+{
+    if (sylvan_map_isempty(map)) return sylvan_makenode(key, sylvan_map_empty(), value);
+
+    bddnode_t n = GETNODE(map);
+    BDDVAR key_m = bddnode_getvariable(n);
+
+    if (key_m < key) {
+        // add recursively and rebuild tree
+        BDDMAP low = sylvan_map_add(node_low(map, n), key, value);
+        BDDMAP result = sylvan_makenode(key_m, low, node_high(map, n));
+        return result;
+    } else if (key_m > key) {
+        return sylvan_makenode(key, map, value);
+    } else {
+        // replace old
+        return sylvan_makenode(key, node_low(map, n), value);
+    }
+}
+
+BDDMAP
+sylvan_map_addall(BDDMAP map_1, BDDMAP map_2)
+{
+    // one of the maps is empty
+    if (sylvan_map_isempty(map_1)) return map_2;
+    if (sylvan_map_isempty(map_2)) return map_1;
+
+    bddnode_t n_1 = GETNODE(map_1);
+    BDDVAR key_1 = bddnode_getvariable(n_1);
+
+    bddnode_t n_2 = GETNODE(map_2);
+    BDDVAR key_2 = bddnode_getvariable(n_2);
+
+    BDDMAP result;
+    if (key_1 < key_2) {
+        // key_1, recurse on n_1->low, map_2
+        BDDMAP low = sylvan_map_addall(node_low(map_1, n_1), map_2);
+        result = sylvan_makenode(key_1, low, node_high(map_1, n_1));
+    } else if (key_1 > key_2) {
+        // key_2, recurse on map_1, n_2->low
+        BDDMAP low = sylvan_map_addall(map_1, node_low(map_2, n_2));
+        result = sylvan_makenode(key_2, low, node_high(map_2, n_2));
+    } else {
+        // equal: key_2, recurse on n_1->low, n_2->low
+        BDDMAP low = sylvan_map_addall(node_low(map_1, n_1), node_low(map_2, n_2));
+        result = sylvan_makenode(key_2, low, node_high(map_2, n_2));
+    }
+    return result;
+}
+
+BDDMAP
+sylvan_map_remove(BDDMAP map, BDDVAR key)
+{
+    if (sylvan_map_isempty(map)) return map;
+
+    bddnode_t n = GETNODE(map);
+    BDDVAR key_m = bddnode_getvariable(n);
+
+    if (key_m < key) {
+        BDDMAP low = sylvan_map_remove(node_low(map, n), key);
+        BDDMAP result = sylvan_makenode(key_m, low, node_high(map, n));
+        return result;
+    } else if (key_m > key) {
+        return map;
+    } else {
+        return node_low(map, n);
+    }
+}
+
+BDDMAP
+sylvan_map_removeall(BDDMAP map, BDDSET toremove)
+{
+    if (sylvan_map_isempty(map)) return map;
+    if (sylvan_set_isempty(toremove)) return map;
+
+    bddnode_t n_1 = GETNODE(map);
+    BDDVAR key_1 = bddnode_getvariable(n_1);
+
+    bddnode_t n_2 = GETNODE(toremove);
+    BDDVAR key_2 = bddnode_getvariable(n_2);
+
+    if (key_1 < key_2) {
+        BDDMAP low = sylvan_map_removeall(node_low(map, n_1), toremove);
+        BDDMAP result = sylvan_makenode(key_1, low, node_high(map, n_1));
+        return result;
+    } else if (key_1 > key_2) {
+        return sylvan_map_removeall(map, node_high(toremove, n_2));
+    } else {
+        return sylvan_map_removeall(node_low(map, n_1), node_high(toremove, n_2));
+    }
+}
+
+int
+sylvan_map_in(BDDMAP map, BDDVAR key)
+{
+    while (!sylvan_map_isempty(map)) {
+        bddnode_t n = GETNODE(map);
+        BDDVAR n_level = bddnode_getvariable(n);
+        if (n_level == key) return 1;
+        if (n_level > key) return 0; // BDDs are ordered
+        map = node_low(map, n);
+    }
+
+    return 0;
+}
+
+size_t
+sylvan_map_count(BDDMAP map)
+{
+    size_t r=0;
+    while (!sylvan_map_isempty(map)) { r++; map=sylvan_map_next(map); }
+    return r;
+}
+
+BDDMAP
+sylvan_set_to_map(BDDSET set, BDD value)
+{
+    if (sylvan_set_isempty(set)) return sylvan_map_empty();
+    bddnode_t set_n = GETNODE(set);
+    BDD sub = sylvan_set_to_map(node_high(set, set_n), value);
+    BDD result = sylvan_makenode(sub, bddnode_getvariable(set_n), value);
+    return result;
+}
+
+/**
+ * Determine the support of a BDD (all variables used in the BDD)
+ */
+TASK_IMPL_1(BDD, sylvan_support, BDD, bdd)
+{
+    if (bdd == sylvan_true || bdd == sylvan_false) return sylvan_set_empty(); // return empty set
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(BDD_SUPPORT);
+
+    BDD result;
+    if (cache_get3(CACHE_BDD_SUPPORT, bdd, 0, 0, &result)) {
+        sylvan_stats_count(BDD_SUPPORT_CACHED);
+        return result;
+    }
+
+    bddnode_t n = GETNODE(bdd);
+    BDD high, low, set;
+
+    /* compute recursively */
+    bdd_refs_spawn(SPAWN(sylvan_support, bddnode_getlow(n)));
+    high = bdd_refs_push(CALL(sylvan_support, bddnode_gethigh(n)));
+    low = bdd_refs_push(bdd_refs_sync(SYNC(sylvan_support)));
+
+    /* take intersection of support of low and support of high */
+    set = sylvan_and(low, high);
+    bdd_refs_pop(2);
+
+    /* add current level to set */
+    result = sylvan_makenode(bddnode_getvariable(n), sylvan_false, set);
+
+    if (cache_put3(CACHE_BDD_SUPPORT, bdd, 0, 0, result)) sylvan_stats_count(BDD_SUPPORT_CACHEDPUT);
+    return result;
+}
+
+static void
+sylvan_unmark_rec(bddnode_t node)
+{
+    if (bddnode_getmark(node)) {
+        bddnode_setmark(node, 0);
+        if (!sylvan_isconst(bddnode_getlow(node))) sylvan_unmark_rec(GETNODE(bddnode_getlow(node)));
+        if (!sylvan_isconst(bddnode_gethigh(node))) sylvan_unmark_rec(GETNODE(bddnode_gethigh(node)));
+    }
+}
+
+/**
+ * fprint, print
+ */
+void
+sylvan_fprint(FILE *f, BDD bdd)
+{
+    sylvan_serialize_reset();
+    size_t v = sylvan_serialize_add(bdd);
+    fprintf(f, "%s%zu,", bdd&sylvan_complement?"!":"", v);
+    sylvan_serialize_totext(f);
+}
+
+void
+sylvan_print(BDD bdd)
+{
+    sylvan_fprint(stdout, bdd);
+}
+
+/**
+ * Output to .DOT files
+ */
+
+/***
+ * We keep a set [level -> [node]] using AVLset
+ */
+struct level_to_nodeset {
+    BDDVAR level;
+    avl_node_t *set;
+};
+
+AVL(level_to_nodeset, struct level_to_nodeset)
+{
+    if (left->level > right->level) return 1;
+    if (right->level > left->level) return -1;
+    return 0;
+}
+
+AVL(nodeset, BDD)
+{
+    if (*left > *right) return 1;
+    if (*right > *left) return -1;
+    return 0;
+}
+
+/* returns 1 if inserted, 0 if already existed */
+static int __attribute__((noinline))
+sylvan_dothelper_register(avl_node_t **set, BDD bdd)
+{
+    struct level_to_nodeset s, *ss;
+    s.level = sylvan_var(bdd);
+    ss = level_to_nodeset_search(*set, &s);
+    if (ss == NULL) {
+        s.set = NULL;
+        ss = level_to_nodeset_put(set, &s, NULL);
+    }
+    assert(ss != NULL);
+    return nodeset_insert(&ss->set, &bdd);
+}
+
+static void
+sylvan_fprintdot_rec(FILE *out, BDD bdd, avl_node_t **levels)
+{
+    bdd = BDD_STRIPMARK(bdd);
+    if (bdd == sylvan_false) return;
+    if (!sylvan_dothelper_register(levels, bdd)) return;
+
+    BDD low = sylvan_low(bdd);
+    BDD high = sylvan_high(bdd);
+    fprintf(out, "\"%" PRIx64 "\" [label=\"%d\"];\n", bdd, sylvan_var(bdd));
+    fprintf(out, "\"%" PRIx64 "\" -> \"%" PRIx64 "\" [style=dashed];\n", bdd, low);
+    fprintf(out, "\"%" PRIx64 "\" -> \"%" PRIx64 "\" [style=solid dir=both arrowtail=%s];\n", bdd, BDD_STRIPMARK(high), BDD_HASMARK(high) ? "dot" : "none");
+    sylvan_fprintdot_rec(out, low, levels);
+    sylvan_fprintdot_rec(out, high, levels);
+}
+
+void
+sylvan_fprintdot(FILE *out, BDD bdd)
+{
+    fprintf(out, "digraph \"DD\" {\n");
+    fprintf(out, "graph [dpi = 300];\n");
+    fprintf(out, "center = true;\n");
+    fprintf(out, "edge [dir = forward];\n");
+    fprintf(out, "0 [label=\"0\", style=filled, shape=box, height=0.3, width=0.3];\n");
+    fprintf(out, "root [style=invis];\n");
+    fprintf(out, "root -> \"%" PRIx64 "\" [style=solid dir=both arrowtail=%s];\n", BDD_STRIPMARK(bdd), BDD_HASMARK(bdd) ? "dot" : "none");
+
+    avl_node_t *levels = NULL;
+    sylvan_fprintdot_rec(out, bdd, &levels);
+
+    if (levels != NULL) {
+        size_t levels_count = avl_count(levels);
+        struct level_to_nodeset *arr = level_to_nodeset_toarray(levels);
+        size_t i;
+        for (i=0;i<levels_count;i++) {
+            fprintf(out, "{ rank=same; ");
+            size_t node_count = avl_count(arr[i].set);
+            size_t j;
+            BDD *arr_j = nodeset_toarray(arr[i].set);
+            for (j=0;j<node_count;j++) {
+                fprintf(out, "\"%" PRIx64 "\"; ", arr_j[j]);
+            }
+            fprintf(out, "}\n");
+        }
+        level_to_nodeset_free(&levels);
+    }
+
+    fprintf(out, "}\n");
+}
+
+void
+sylvan_printdot(BDD bdd)
+{
+    sylvan_fprintdot(stdout, bdd);
+}
+
+static void
+sylvan_fprintdot_nc_rec(FILE *out, BDD bdd, avl_node_t **levels)
+{
+    if (bdd == sylvan_true || bdd == sylvan_false) return;
+    if (!sylvan_dothelper_register(levels, bdd)) return;
+
+    BDD low = sylvan_low(bdd);
+    BDD high = sylvan_high(bdd);
+    fprintf(out, "\"%" PRIx64 "\" [label=\"%d\"];\n", bdd, sylvan_var(bdd));
+    fprintf(out, "\"%" PRIx64 "\" -> \"%" PRIx64 "\" [style=dashed];\n", bdd, low);
+    fprintf(out, "\"%" PRIx64 "\" -> \"%" PRIx64 "\" [style=solid];\n", bdd, high);
+    sylvan_fprintdot_nc_rec(out, low, levels);
+    sylvan_fprintdot_nc_rec(out, high, levels);
+}
+
+void
+sylvan_fprintdot_nc(FILE *out, BDD bdd)
+{
+    fprintf(out, "digraph \"DD\" {\n");
+    fprintf(out, "graph [dpi = 300];\n");
+    fprintf(out, "center = true;\n");
+    fprintf(out, "edge [dir = forward];\n");
+    fprintf(out, "\"%" PRIx64 "\" [shape=box, label=\"F\", style=filled, shape=box, height=0.3, width=0.3];\n", sylvan_false);
+    fprintf(out, "\"%" PRIx64 "\" [shape=box, label=\"T\", style=filled, shape=box, height=0.3, width=0.3];\n", sylvan_true);
+    fprintf(out, "root [style=invis];\n");
+    fprintf(out, "root -> \"%" PRIx64 "\" [style=solid];\n", bdd);
+
+    avl_node_t *levels = NULL;
+    sylvan_fprintdot_nc_rec(out, bdd, &levels);
+
+    if (levels != NULL) {
+        size_t levels_count = avl_count(levels);
+        struct level_to_nodeset *arr = level_to_nodeset_toarray(levels);
+        size_t i;
+        for (i=0;i<levels_count;i++) {
+            fprintf(out, "{ rank=same; ");
+            size_t node_count = avl_count(arr[i].set);
+            size_t j;
+            BDD *arr_j = nodeset_toarray(arr[i].set);
+            for (j=0;j<node_count;j++) {
+                fprintf(out, "\"%" PRIx64 "\"; ", arr_j[j]);
+            }
+            fprintf(out, "}\n");
+        }
+        level_to_nodeset_free(&levels);
+    }
+
+    fprintf(out, "}\n");
+}
+
+void
+sylvan_printdot_nc(BDD bdd)
+{
+    sylvan_fprintdot_nc(stdout, bdd);
+}
+
+/**
+ * SERIALIZATION
+ */
+
+struct sylvan_ser {
+    BDD bdd;
+    size_t assigned;
+};
+
+// Define a AVL tree type with prefix 'sylvan_ser' holding
+// nodes of struct sylvan_ser with the following compare() function...
+AVL(sylvan_ser, struct sylvan_ser)
+{
+    if (left->bdd > right->bdd) return 1;
+    if (left->bdd < right->bdd) return -1;
+    return 0;
+}
+
+// Define a AVL tree type with prefix 'sylvan_ser_reversed' holding
+// nodes of struct sylvan_ser with the following compare() function...
+AVL(sylvan_ser_reversed, struct sylvan_ser)
+{
+    if (left->assigned > right->assigned) return 1;
+    if (left->assigned < right->assigned) return -1;
+    return 0;
+}
+
+// Initially, both sets are empty
+static avl_node_t *sylvan_ser_set = NULL;
+static avl_node_t *sylvan_ser_reversed_set = NULL;
+
+// Start counting (assigning numbers to BDDs) at 1
+static size_t sylvan_ser_counter = 1;
+static size_t sylvan_ser_done = 0;
+
+// Given a BDD, assign unique numbers to all nodes
+static size_t
+sylvan_serialize_assign_rec(BDD bdd)
+{
+    if (sylvan_isnode(bdd)) {
+        bddnode_t n = GETNODE(bdd);
+
+        struct sylvan_ser s, *ss;
+        s.bdd = BDD_STRIPMARK(bdd);
+        ss = sylvan_ser_search(sylvan_ser_set, &s);
+        if (ss == NULL) {
+            // assign dummy value
+            s.assigned = 0;
+            ss = sylvan_ser_put(&sylvan_ser_set, &s, NULL);
+
+            // first assign recursively
+            sylvan_serialize_assign_rec(bddnode_getlow(n));
+            sylvan_serialize_assign_rec(bddnode_gethigh(n));
+
+            // assign real value
+            ss->assigned = sylvan_ser_counter++;
+
+            // put a copy in the reversed table
+            sylvan_ser_reversed_insert(&sylvan_ser_reversed_set, ss);
+        }
+
+        return ss->assigned;
+    }
+
+    return BDD_STRIPMARK(bdd);
+}
+
+size_t
+sylvan_serialize_add(BDD bdd)
+{
+    return BDD_TRANSFERMARK(bdd, sylvan_serialize_assign_rec(bdd));
+}
+
+void
+sylvan_serialize_reset()
+{
+    sylvan_ser_free(&sylvan_ser_set);
+    sylvan_ser_free(&sylvan_ser_reversed_set);
+    sylvan_ser_counter = 1;
+    sylvan_ser_done = 0;
+}
+
+size_t
+sylvan_serialize_get(BDD bdd)
+{
+    if (!sylvan_isnode(bdd)) return bdd;
+    struct sylvan_ser s, *ss;
+    s.bdd = BDD_STRIPMARK(bdd);
+    ss = sylvan_ser_search(sylvan_ser_set, &s);
+    assert(ss != NULL);
+    return BDD_TRANSFERMARK(bdd, ss->assigned);
+}
+
+BDD
+sylvan_serialize_get_reversed(size_t value)
+{
+    if (!sylvan_isnode(value)) return value;
+    struct sylvan_ser s, *ss;
+    s.assigned = BDD_STRIPMARK(value);
+    ss = sylvan_ser_reversed_search(sylvan_ser_reversed_set, &s);
+    assert(ss != NULL);
+    return BDD_TRANSFERMARK(value, ss->bdd);
+}
+
+void
+sylvan_serialize_totext(FILE *out)
+{
+    fprintf(out, "[");
+    avl_iter_t *it = sylvan_ser_reversed_iter(sylvan_ser_reversed_set);
+    struct sylvan_ser *s;
+
+    while ((s=sylvan_ser_reversed_iter_next(it))) {
+        BDD bdd = s->bdd;
+        bddnode_t n = GETNODE(bdd);
+        fprintf(out, "(%zu,%u,%zu,%zu,%u),", s->assigned,
+                                             bddnode_getvariable(n),
+                                             (size_t)bddnode_getlow(n),
+                                             (size_t)BDD_STRIPMARK(bddnode_gethigh(n)),
+                                             BDD_HASMARK(bddnode_gethigh(n)) ? 1 : 0);
+    }
+
+    sylvan_ser_reversed_iter_free(it);
+    fprintf(out, "]");
+}
+
+void
+sylvan_serialize_tofile(FILE *out)
+{
+    size_t count = avl_count(sylvan_ser_reversed_set);
+    assert(count >= sylvan_ser_done);
+    assert(count == sylvan_ser_counter-1);
+    count -= sylvan_ser_done;
+    fwrite(&count, sizeof(size_t), 1, out);
+
+    struct sylvan_ser *s;
+    avl_iter_t *it = sylvan_ser_reversed_iter(sylvan_ser_reversed_set);
+
+    /* Skip already written entries */
+    size_t index = 0;
+    while (index < sylvan_ser_done && (s=sylvan_ser_reversed_iter_next(it))) {
+        index++;
+        assert(s->assigned == index);
+    }
+
+    while ((s=sylvan_ser_reversed_iter_next(it))) {
+        index++;
+        assert(s->assigned == index);
+
+        bddnode_t n = GETNODE(s->bdd);
+
+        struct bddnode node;
+        bddnode_makenode(&node, bddnode_getvariable(n), sylvan_serialize_get(bddnode_getlow(n)), sylvan_serialize_get(bddnode_gethigh(n)));
+
+        fwrite(&node, sizeof(struct bddnode), 1, out);
+    }
+
+    sylvan_ser_done = sylvan_ser_counter-1;
+    sylvan_ser_reversed_iter_free(it);
+}
+
+void
+sylvan_serialize_fromfile(FILE *in)
+{
+    size_t count, i;
+    if (fread(&count, sizeof(size_t), 1, in) != 1) {
+        // TODO FIXME return error
+        printf("sylvan_serialize_fromfile: file format error, giving up\n");
+        exit(-1);
+    }
+
+    for (i=1; i<=count; i++) {
+        struct bddnode node;
+        if (fread(&node, sizeof(struct bddnode), 1, in) != 1) {
+            // TODO FIXME return error
+            printf("sylvan_serialize_fromfile: file format error, giving up\n");
+            exit(-1);
+        }
+
+        BDD low = sylvan_serialize_get_reversed(bddnode_getlow(&node));
+        BDD high = sylvan_serialize_get_reversed(bddnode_gethigh(&node));
+
+        struct sylvan_ser s;
+        s.bdd = sylvan_makenode(bddnode_getvariable(&node), low, high);
+        s.assigned = ++sylvan_ser_done; // starts at 0 but we want 1-based...
+
+        sylvan_ser_insert(&sylvan_ser_set, &s);
+        sylvan_ser_reversed_insert(&sylvan_ser_reversed_set, &s);
+    }
+}
+
+/**
+ * Generate SHA2 structural hashes.
+ * Hashes are independent of location.
+ * Mainly useful for debugging purposes.
+ */
+static void
+sylvan_sha2_rec(BDD bdd, SHA256_CTX *ctx)
+{
+    if (bdd == sylvan_true || bdd == sylvan_false) {
+        SHA256_Update(ctx, (void*)&bdd, sizeof(BDD));
+        return;
+    }
+
+    bddnode_t node = GETNODE(bdd);
+    if (bddnode_getmark(node) == 0) {
+        bddnode_setmark(node, 1);
+        uint32_t level = bddnode_getvariable(node);
+        if (BDD_STRIPMARK(bddnode_gethigh(node))) level |= 0x80000000;
+        SHA256_Update(ctx, (void*)&level, sizeof(uint32_t));
+        sylvan_sha2_rec(bddnode_gethigh(node), ctx);
+        sylvan_sha2_rec(bddnode_getlow(node), ctx);
+    }
+}
+
+void
+sylvan_printsha(BDD bdd)
+{
+    sylvan_fprintsha(stdout, bdd);
+}
+
+void
+sylvan_fprintsha(FILE *f, BDD bdd)
+{
+    char buf[80];
+    sylvan_getsha(bdd, buf);
+    fprintf(f, "%s", buf);
+}
+
+void
+sylvan_getsha(BDD bdd, char *target)
+{
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    sylvan_sha2_rec(bdd, &ctx);
+    if (bdd != sylvan_true && bdd != sylvan_false) sylvan_unmark_rec(GETNODE(bdd));
+    SHA256_End(&ctx, target);
+}
+
+/**
+ * Debug tool to check that a BDD is properly ordered.
+ * Also that every BDD node is marked 'in-use' in the hash table.
+ */
+TASK_2(int, sylvan_test_isbdd_rec, BDD, bdd, BDDVAR, parent_var)
+{
+    if (bdd == sylvan_true || bdd == sylvan_false) return 1;
+    assert(llmsset_is_marked(nodes, BDD_STRIPMARK(bdd)));
+
+    sylvan_stats_count(BDD_ISBDD);
+
+    uint64_t result;
+    if (cache_get3(CACHE_BDD_ISBDD, bdd, 0, 0, &result)) {
+        sylvan_stats_count(BDD_ISBDD_CACHED);
+        return result;
+    }
+
+    bddnode_t n = GETNODE(bdd);
+    BDDVAR var = bddnode_getvariable(n);
+    if (var <= parent_var) {
+        result = 0;
+    } else {
+        SPAWN(sylvan_test_isbdd_rec, node_low(bdd, n), var);
+        result = (uint64_t)CALL(sylvan_test_isbdd_rec, node_high(bdd, n), var);
+        if (!SYNC(sylvan_test_isbdd_rec)) result = 0;
+    }
+
+    if (cache_put3(CACHE_BDD_ISBDD, bdd, 0, 0, result)) sylvan_stats_count(BDD_ISBDD_CACHEDPUT);
+    return result;
+}
+
+TASK_IMPL_1(int, sylvan_test_isbdd, BDD, bdd)
+{
+    if (bdd == sylvan_true) return 1;
+    if (bdd == sylvan_false) return 1;
+
+    assert(llmsset_is_marked(nodes, BDD_STRIPMARK(bdd)));
+
+    bddnode_t n = GETNODE(bdd);
+    BDDVAR var = bddnode_getvariable(n);
+    SPAWN(sylvan_test_isbdd_rec, node_low(bdd, n), var);
+    int result = CALL(sylvan_test_isbdd_rec, node_high(bdd, n), var);
+    if (!SYNC(sylvan_test_isbdd_rec)) result = 0;
+    return result;
+}
diff --git a/src/sylvan_bdd.h b/src/sylvan_bdd.h
new file mode 100644
index 000000000..6cd4ef584
--- /dev/null
+++ b/src/sylvan_bdd.h
@@ -0,0 +1,423 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Do not include this file directly. Instead, include sylvan.h */
+
+#include <tls.h>
+
+#ifndef SYLVAN_BDD_H
+#define SYLVAN_BDD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef uint64_t BDD;       // low 40 bits used for index, highest bit for complement, rest 0
+// BDDSET uses the BDD node hash table. A BDDSET is an ordered BDD.
+typedef uint64_t BDDSET;    // encodes a set of variables (e.g. for exists etc.)
+// BDDMAP also uses the BDD node hash table. A BDDMAP is *not* an ordered BDD.
+typedef uint64_t BDDMAP;    // encodes a function of variable->BDD (e.g. for substitute)
+typedef uint32_t BDDVAR;    // low 24 bits only
+
+#define sylvan_complement   ((uint64_t)0x8000000000000000)
+#define sylvan_false        ((BDD)0x0000000000000000)
+#define sylvan_true         (sylvan_false|sylvan_complement)
+#define sylvan_invalid      ((BDD)0x7fffffffffffffff)
+
+#define sylvan_isconst(bdd) (bdd == sylvan_true || bdd == sylvan_false)
+#define sylvan_isnode(bdd)  (bdd != sylvan_true && bdd != sylvan_false)
+
+/**
+ * Initialize BDD functionality.
+ * 
+ * Granularity (BDD only) determines usage of operation cache. Smallest value is 1: use the operation cache always.
+ * Higher values mean that the cache is used less often. Variables are grouped such that
+ * the cache is used when going to the next group, i.e., with granularity=3, variables [0,1,2] are in the
+ * first group, [3,4,5] in the next, etc. Then no caching occur between 0->1, 1->2, 0->2. Caching occurs
+ * on 0->3, 1->4, 2->3, etc.
+ *
+ * A reasonable default is a granularity of 4-16, strongly depending on the structure of the BDDs.
+ */
+void sylvan_init_bdd(int granularity);
+
+/* Create a BDD representing just <var> or the negation of <var> */
+BDD sylvan_ithvar(BDDVAR var);
+static inline BDD sylvan_nithvar(BDD var) { return sylvan_ithvar(var) ^ sylvan_complement; }
+
+/* Retrieve the <var> of the BDD node <bdd> */
+BDDVAR sylvan_var(BDD bdd);
+
+/* Follow <low> and <high> edges */
+BDD sylvan_low(BDD bdd);
+BDD sylvan_high(BDD bdd);
+
+/* Add or remove external reference to BDD */
+BDD sylvan_ref(BDD a); 
+void sylvan_deref(BDD a);
+
+/* For use in custom mark functions */
+VOID_TASK_DECL_1(sylvan_gc_mark_rec, BDD);
+#define sylvan_gc_mark_rec(mdd) CALL(sylvan_gc_mark_rec, mdd)
+
+/* Return the number of external references */
+size_t sylvan_count_refs();
+
+/* Add or remove BDD pointers to protect (indirect external references) */
+void sylvan_protect(BDD* ptr);
+void sylvan_unprotect(BDD* ptr);
+
+/* Return the number of protected BDD pointers */
+size_t sylvan_count_protected();
+
+/* Mark BDD for "notify on dead" */
+#define sylvan_notify_ondead(bdd) llmsset_notify_ondead(nodes, bdd&~sylvan_complement)
+
+/* Unary, binary and if-then-else operations */
+#define sylvan_not(a) (((BDD)a)^sylvan_complement)
+TASK_DECL_4(BDD, sylvan_ite, BDD, BDD, BDD, BDDVAR);
+#define sylvan_ite(a,b,c) (CALL(sylvan_ite,a,b,c,0))
+TASK_DECL_3(BDD, sylvan_and, BDD, BDD, BDDVAR);
+#define sylvan_and(a,b) (CALL(sylvan_and,a,b,0))
+TASK_DECL_3(BDD, sylvan_xor, BDD, BDD, BDDVAR);
+#define sylvan_xor(a,b) (CALL(sylvan_xor,a,b,0))
+/* Do not use nested calls for xor/equiv parameter b! */
+#define sylvan_equiv(a,b) sylvan_not(sylvan_xor(a,b))
+#define sylvan_or(a,b) sylvan_not(sylvan_and(sylvan_not(a),sylvan_not(b)))
+#define sylvan_nand(a,b) sylvan_not(sylvan_and(a,b))
+#define sylvan_nor(a,b) sylvan_not(sylvan_or(a,b))
+#define sylvan_imp(a,b) sylvan_not(sylvan_and(a,sylvan_not(b)))
+#define sylvan_invimp(a,b) sylvan_not(sylvan_and(sylvan_not(a),b))
+#define sylvan_biimp sylvan_equiv
+#define sylvan_diff(a,b) sylvan_and(a,sylvan_not(b))
+#define sylvan_less(a,b) sylvan_and(sylvan_not(a),b)
+
+/* Existential and Universal quantifiers */
+TASK_DECL_3(BDD, sylvan_exists, BDD, BDD, BDDVAR);
+#define sylvan_exists(a, vars) (CALL(sylvan_exists, a, vars, 0))
+#define sylvan_forall(a, vars) (sylvan_not(CALL(sylvan_exists, sylvan_not(a), vars, 0)))
+
+/**
+ * Compute \exists v: A(...) \and B(...)
+ * Parameter vars is the cube (conjunction) of all v variables.
+ */
+TASK_DECL_4(BDD, sylvan_and_exists, BDD, BDD, BDDSET, BDDVAR);
+#define sylvan_and_exists(a,b,vars) CALL(sylvan_and_exists,a,b,vars,0)
+
+/**
+ * Compute R(s,t) = \exists x: A(s,x) \and B(x,t)
+ *      or R(s)   = \exists x: A(s,x) \and B(x)
+ * Assumes s,t are interleaved with s even and t odd (s+1).
+ * Parameter vars is the cube of all s and/or t variables.
+ * Other variables in A are "ignored" (existential quantification)
+ * Other variables in B are kept
+ * Alternatively, vars=false means all variables are in vars
+ *
+ * Use this function to concatenate two relations   --> -->
+ * or to take the 'previous' of a set               -->  S
+ */
+TASK_DECL_4(BDD, sylvan_relprev, BDD, BDD, BDDSET, BDDVAR);
+#define sylvan_relprev(a,b,vars) CALL(sylvan_relprev,a,b,vars,0)
+
+/**
+ * Compute R(s) = \exists x: A(x) \and B(x,s)
+ * with support(result) = s, support(A) = s, support(B) = s+t
+ * Assumes s,t are interleaved with s even and t odd (s+1).
+ * Parameter vars is the cube of all s and/or t variables.
+ * Other variables in A are kept
+ * Other variables in B are "ignored" (existential quantification)
+ * Alternatively, vars=false means all variables are in vars
+ *
+ * Use this function to take the 'next' of a set     S  -->
+ */
+TASK_DECL_4(BDD, sylvan_relnext, BDD, BDD, BDDSET, BDDVAR);
+#define sylvan_relnext(a,b,vars) CALL(sylvan_relnext,a,b,vars,0)
+
+/**
+ * Computes the transitive closure by traversing the BDD recursively.
+ * See Y. Matsunaga, P. C. McGeer, R. K. Brayton
+ *     On Computing the Transitive Closure of a State Transition Relation
+ *     30th ACM Design Automation Conference, 1993.
+ *
+ * The input BDD must be a transition relation that only has levels of s,t
+ * with s,t interleaved with s even and t odd, i.e.
+ * s level 0,2,4 matches with t level 1,3,5 and so forth.
+ */
+TASK_DECL_2(BDD, sylvan_closure, BDD, BDDVAR);
+#define sylvan_closure(a) CALL(sylvan_closure,a,0);
+
+/**
+ * Calculate a@b (a constrain b), such that (b -> a@b) = (b -> a)
+ * Special cases:
+ *   - a@0 = 0
+ *   - a@1 = f
+ *   - 0@b = 0
+ *   - 1@b = 1
+ *   - a@a = 1
+ *   - a@not(a) = 0
+ */
+TASK_DECL_3(BDD, sylvan_constrain, BDD, BDD, BDDVAR);
+#define sylvan_constrain(f,c) (CALL(sylvan_constrain, (f), (c), 0))
+
+TASK_DECL_3(BDD, sylvan_restrict, BDD, BDD, BDDVAR);
+#define sylvan_restrict(f,c) (CALL(sylvan_restrict, (f), (c), 0))
+
+TASK_DECL_3(BDD, sylvan_compose, BDD, BDDMAP, BDDVAR);
+#define sylvan_compose(f,m) (CALL(sylvan_compose, (f), (m), 0))
+
+/**
+ * Calculate the support of a BDD.
+ * A variable v is in the support of a Boolean function f iff f[v<-0] != f[v<-1]
+ * It is also the set of all variables in the BDD nodes of the BDD.
+ */
+TASK_DECL_1(BDD, sylvan_support, BDD);
+#define sylvan_support(bdd) (CALL(sylvan_support, bdd))
+
+/**
+ * A set of BDD variables is a cube (conjunction) of variables in their positive form.
+ * Note 2015-06-10: This used to be a union (disjunction) of variables in their positive form.
+ */
+// empty bddset
+#define sylvan_set_empty() sylvan_true
+#define sylvan_set_isempty(set) (set == sylvan_true)
+// add variables to the bddset
+#define sylvan_set_add(set, var) sylvan_and(set, sylvan_ithvar(var))
+#define sylvan_set_addall(set, set_to_add) sylvan_and(set, set_to_add)
+// remove variables from the bddset
+#define sylvan_set_remove(set, var) sylvan_exists(set, var)
+#define sylvan_set_removeall(set, set_to_remove) sylvan_exists(set, set_to_remove)
+// iterate through all variables
+#define sylvan_set_var(set) (sylvan_var(set))
+#define sylvan_set_next(set) (sylvan_high(set))
+int sylvan_set_in(BDDSET set, BDDVAR var);
+size_t sylvan_set_count(BDDSET set);
+void sylvan_set_toarray(BDDSET set, BDDVAR *arr);
+// variables in arr should be ordered
+TASK_DECL_2(BDDSET, sylvan_set_fromarray, BDDVAR*, size_t);
+#define sylvan_set_fromarray(arr, length) ( CALL(sylvan_set_fromarray, arr, length) )
+void sylvan_test_isset(BDDSET set);
+
+/**
+ * BDDMAP maps BDDVAR-->BDD, implemented using BDD nodes.
+ * Based on disjunction of variables, but with high edges to BDDs instead of True terminals.
+ */
+// empty bddmap
+static inline BDDMAP sylvan_map_empty() { return sylvan_false; }
+static inline int sylvan_map_isempty(BDDMAP map) { return map == sylvan_false ? 1 : 0; }
+// add key-value pairs to the bddmap
+BDDMAP sylvan_map_add(BDDMAP map, BDDVAR key, BDD value);
+BDDMAP sylvan_map_addall(BDDMAP map_1, BDDMAP map_2);
+// remove key-value pairs from the bddmap
+BDDMAP sylvan_map_remove(BDDMAP map, BDDVAR key);
+BDDMAP sylvan_map_removeall(BDDMAP map, BDDSET toremove);
+// iterate through all pairs
+static inline BDDVAR sylvan_map_key(BDDMAP map) { return sylvan_var(map); }
+static inline BDD sylvan_map_value(BDDMAP map) { return sylvan_high(map); }
+static inline BDDMAP sylvan_map_next(BDDMAP map) { return sylvan_low(map); }
+// is a key in the map
+int sylvan_map_in(BDDMAP map, BDDVAR key);
+// count number of keys
+size_t sylvan_map_count(BDDMAP map);
+// convert a BDDSET (cube of variables) to a map, with all variables pointing on the value
+BDDMAP sylvan_set_to_map(BDDSET set, BDD value);
+
+/**
+ * Node creation primitive.
+ * Careful: does not check ordering!
+ * Preferably only use when debugging!
+ */
+BDD sylvan_makenode(BDDVAR level, BDD low, BDD high);
+
+/**
+ * Write a DOT representation of a BDD
+ */
+void sylvan_printdot(BDD bdd);
+void sylvan_fprintdot(FILE *out, BDD bdd);
+
+/**
+ * Write a DOT representation of a BDD.
+ * This variant does not print complement edges.
+ */
+void sylvan_printdot_nc(BDD bdd);
+void sylvan_fprintdot_nc(FILE *out, BDD bdd);
+
+void sylvan_print(BDD bdd);
+void sylvan_fprint(FILE *f, BDD bdd);
+
+void sylvan_printsha(BDD bdd);
+void sylvan_fprintsha(FILE *f, BDD bdd);
+void sylvan_getsha(BDD bdd, char *target); // target must be at least 65 bytes...
+
+/**
+ * Calculate number of satisfying variable assignments.
+ * The set of variables must be >= the support of the BDD.
+ */
+
+TASK_DECL_3(double, sylvan_satcount, BDD, BDDSET, BDDVAR);
+#define sylvan_satcount(bdd, variables) CALL(sylvan_satcount, bdd, variables, 0)
+
+/**
+ * Create a BDD cube representing the conjunction of variables in their positive or negative
+ * form depending on whether the cube[idx] equals 0 (negative), 1 (positive) or 2 (any).
+ * CHANGED 2014/09/19: vars is now a BDDSET (ordered!)
+ */
+BDD sylvan_cube(BDDSET variables, uint8_t *cube);
+TASK_DECL_3(BDD, sylvan_union_cube, BDD, BDDSET, uint8_t*);
+#define sylvan_union_cube(bdd, variables, cube) CALL(sylvan_union_cube, bdd, variables, cube)
+
+/**
+ * Pick one satisfying variable assignment randomly for which <bdd> is true.
+ * The <variables> set must include all variables in the support of <bdd>.
+ *
+ * The function will set the values of str, such that
+ * str[index] where index is the index in the <variables> set is set to
+ * 0 when the variable is negative, 1 when positive, or 2 when it could be either.
+ *
+ * This implies that str[i] will be set in the variable ordering as in <variables>.
+ *
+ * Returns 1 when succesful, or 0 when no assignment is found (i.e. bdd==sylvan_false).
+ */
+int sylvan_sat_one(BDD bdd, BDDSET variables, uint8_t* str);
+
+/**
+ * Pick one satisfying variable assignment randomly from the given <bdd>.
+ * Functionally equivalent to performing sylvan_cube on the result of sylvan_sat_one.
+ * For the result: sylvan_and(res, bdd) = res.
+ */
+BDD sylvan_sat_one_bdd(BDD bdd);
+#define sylvan_pick_cube sylvan_sat_one_bdd
+
+/**
+ * Enumerate all satisfying variable assignments from the given <bdd> using variables <vars>.
+ * Calls <cb> with four parameters: a user-supplied context, the array of BDD variables in <vars>,
+ * the cube (array of values 0 and 1 for each variables in <vars>) and the length of the two arrays.
+ */
+LACE_TYPEDEF_CB(void, enum_cb, void*, BDDVAR*, uint8_t*, int);
+VOID_TASK_DECL_4(sylvan_enum, BDD, BDDSET, enum_cb, void*);
+#define sylvan_enum(bdd, vars, cb, context) CALL(sylvan_enum, bdd, vars, cb, context)
+VOID_TASK_DECL_4(sylvan_enum_par, BDD, BDDSET, enum_cb, void*);
+#define sylvan_enum_par(bdd, vars, cb, context) CALL(sylvan_enum_par, bdd, vars, cb, context)
+
+/**
+ * Enumerate all satisfyable variable assignments of the given <bdd> using variables <vars>.
+ * Calls <cb> with two parameters: a user-supplied context and the cube (array of
+ * values 0 and 1 for each variable in <vars>).
+ * The BDD that <cb> returns is pair-wise merged (using or) and returned.
+ */
+LACE_TYPEDEF_CB(BDD, sylvan_collect_cb, void*, uint8_t*);
+TASK_DECL_4(BDD, sylvan_collect, BDD, BDDSET, sylvan_collect_cb, void*);
+#define sylvan_collect(bdd, vars, cb, context) CALL(sylvan_collect, bdd, vars, cb, context)
+
+/**
+ * Compute the number of distinct paths to sylvan_true in the BDD
+ */
+TASK_DECL_2(double, sylvan_pathcount, BDD, BDDVAR);
+#define sylvan_pathcount(bdd) (CALL(sylvan_pathcount, bdd, 0))
+
+/**
+ * Compute the number of BDD nodes in the BDD
+ */
+size_t sylvan_nodecount(BDD a);
+
+/**
+ * SAVING:
+ * use sylvan_serialize_add on every BDD you want to store
+ * use sylvan_serialize_get to retrieve the key of every stored BDD
+ * use sylvan_serialize_tofile
+ *
+ * LOADING:
+ * use sylvan_serialize_fromfile (implies sylvan_serialize_reset)
+ * use sylvan_serialize_get_reversed for every key
+ *
+ * MISC:
+ * use sylvan_serialize_reset to free all allocated structures
+ * use sylvan_serialize_totext to write a textual list of tuples of all BDDs.
+ *         format: [(<key>,<level>,<key_low>,<key_high>,<complement_high>),...]
+ *
+ * for the old sylvan_print functions, use sylvan_serialize_totext
+ */
+size_t sylvan_serialize_add(BDD bdd);
+size_t sylvan_serialize_get(BDD bdd);
+BDD sylvan_serialize_get_reversed(size_t value);
+void sylvan_serialize_reset();
+void sylvan_serialize_totext(FILE *out);
+void sylvan_serialize_tofile(FILE *out);
+void sylvan_serialize_fromfile(FILE *in);
+
+/**
+ * For debugging
+ * if (part of) the BDD is not 'marked' in the nodes table, assertion fails
+ * if the BDD is not ordered, returns 0
+ * if nicely ordered, returns 1
+ */
+TASK_DECL_1(int, sylvan_test_isbdd, BDD);
+#define sylvan_test_isbdd(bdd) CALL(sylvan_test_isbdd, bdd)
+
+/* Infrastructure for internal markings */
+typedef struct bdd_refs_internal
+{
+    size_t r_size, r_count;
+    size_t s_size, s_count;
+    BDD *results;
+    Task **spawns;
+} *bdd_refs_internal_t;
+
+extern DECLARE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+
+static inline BDD
+bdd_refs_push(BDD bdd)
+{
+    LOCALIZE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+    if (bdd_refs_key->r_count >= bdd_refs_key->r_size) {
+        bdd_refs_key->r_size *= 2;
+        bdd_refs_key->results = (BDD*)realloc(bdd_refs_key->results, sizeof(BDD) * bdd_refs_key->r_size);
+    }
+    bdd_refs_key->results[bdd_refs_key->r_count++] = bdd;
+    return bdd;
+}
+
+static inline void
+bdd_refs_pop(int amount)
+{
+    LOCALIZE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+    bdd_refs_key->r_count-=amount;
+}
+
+static inline void
+bdd_refs_spawn(Task *t)
+{
+    LOCALIZE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+    if (bdd_refs_key->s_count >= bdd_refs_key->s_size) {
+        bdd_refs_key->s_size *= 2;
+        bdd_refs_key->spawns = (Task**)realloc(bdd_refs_key->spawns, sizeof(Task*) * bdd_refs_key->s_size);
+    }
+    bdd_refs_key->spawns[bdd_refs_key->s_count++] = t;
+}
+
+static inline BDD
+bdd_refs_sync(BDD result)
+{
+    LOCALIZE_THREAD_LOCAL(bdd_refs_key, bdd_refs_internal_t);
+    bdd_refs_key->s_count--;
+    return result;
+}
+
+#include "sylvan_bdd_storm.h"
+    
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_bdd_storm.h b/src/sylvan_bdd_storm.h
new file mode 100644
index 000000000..5806fba5d
--- /dev/null
+++ b/src/sylvan_bdd_storm.h
@@ -0,0 +1,3 @@
+#define bdd_isnegated(dd) ((dd & sylvan_complement) ? 1 : 0)
+#define bdd_regular(dd) (dd & ~sylvan_complement)
+#define bdd_isterminal(dd) (dd == sylvan_false || dd == sylvan_true)
\ No newline at end of file
diff --git a/src/sylvan_cache.c b/src/sylvan_cache.c
new file mode 100644
index 000000000..7bfb72afd
--- /dev/null
+++ b/src/sylvan_cache.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>  // for errno
+#include <stdio.h>  // for fprintf
+#include <stdint.h> // for uint32_t etc
+#include <stdlib.h> // for exit
+#include <string.h> // for strerror
+#include <sys/mman.h> // for mmap
+
+#include <sylvan_cache.h>
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef compiler_barrier
+#define compiler_barrier() { asm volatile("" ::: "memory"); }
+#endif
+
+#ifndef cas
+#define cas(ptr, old, new) (__sync_bool_compare_and_swap((ptr),(old),(new)))
+#endif
+
+/**
+ * This cache is designed to store a,b,c->res, with a,b,c,res 64-bit integers.
+ *
+ * Each cache bucket takes 32 bytes, 2 per cache line.
+ * Each cache status bucket takes 4 bytes, 16 per cache line.
+ * Therefore, size 2^N = 36*(2^N) bytes.
+ */
+
+struct __attribute__((packed)) cache_entry {
+    uint64_t            a;
+    uint64_t            b;
+    uint64_t            c;
+    uint64_t            res;
+};
+
+static size_t             cache_size;         // power of 2
+static size_t             cache_max;          // power of 2
+#if CACHE_MASK
+static size_t             cache_mask;         // cache_size-1
+#endif
+static cache_entry_t      cache_table;
+static uint32_t*          cache_status;
+
+static uint64_t           next_opid;
+
+uint64_t
+cache_next_opid()
+{
+    return __sync_fetch_and_add(&next_opid, 1LL<<40);
+}
+
+// status: 0x80000000 - bitlock
+//         0x7fff0000 - hash (part of the 64-bit hash not used to position)
+//         0x0000ffff - tag (every put increases tag field)
+
+/* Rotating 64-bit FNV-1a hash */
+static uint64_t
+cache_hash(uint64_t a, uint64_t b, uint64_t c)
+{
+    const uint64_t prime = 1099511628211;
+    uint64_t hash = 14695981039346656037LLU;
+    hash = (hash ^ (a>>32));
+    hash = (hash ^ a) * prime;
+    hash = (hash ^ b) * prime;
+    hash = (hash ^ c) * prime;
+    return hash;
+}
+
+int
+cache_get(uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
+{
+    const uint64_t hash = cache_hash(a, b, c);
+#if CACHE_MASK
+    volatile uint32_t *s_bucket = cache_status + (hash & cache_mask);
+    cache_entry_t bucket = cache_table + (hash & cache_mask);
+#else
+    volatile uint32_t *s_bucket = cache_status + (hash % cache_size);
+    cache_entry_t bucket = cache_table + (hash % cache_size);
+#endif
+    const uint32_t s = *s_bucket;
+    compiler_barrier();
+    // abort if locked
+    if (s & 0x80000000) return 0;
+    // abort if different hash
+    if ((s ^ (hash>>32)) & 0x7fff0000) return 0;
+    // abort if key different
+    if (bucket->a != a || bucket->b != b || bucket->c != c) return 0;
+    *res = bucket->res;
+    compiler_barrier();
+    // abort if status field changed after compiler_barrier()
+    return *s_bucket == s ? 1 : 0;
+}
+
+int
+cache_put(uint64_t a, uint64_t b, uint64_t c, uint64_t res)
+{
+    const uint64_t hash = cache_hash(a, b, c);
+#if CACHE_MASK
+    volatile uint32_t *s_bucket = cache_status + (hash & cache_mask);
+    cache_entry_t bucket = cache_table + (hash & cache_mask);
+#else
+    volatile uint32_t *s_bucket = cache_status + (hash % cache_size);
+    cache_entry_t bucket = cache_table + (hash % cache_size);
+#endif
+    const uint32_t s = *s_bucket;
+    // abort if locked
+    if (s & 0x80000000) return 0;
+    // abort if hash identical -> no: in iscasmc this occasionally causes timeouts?!
+    const uint32_t hash_mask = (hash>>32) & 0x7fff0000;
+    // if ((s & 0x7fff0000) == hash_mask) return 0;
+    // use cas to claim bucket
+    const uint32_t new_s = ((s+1) & 0x0000ffff) | hash_mask;
+    if (!cas(s_bucket, s, new_s | 0x80000000)) return 0;
+    // cas succesful: write data
+    bucket->a = a;
+    bucket->b = b;
+    bucket->c = c;
+    bucket->res = res;
+    compiler_barrier();
+    // after compiler_barrier(), unlock status field
+    *s_bucket = new_s;
+    return 1;
+}
+
+void
+cache_create(size_t _cache_size, size_t _max_size)
+{
+#if CACHE_MASK
+    // Cache size must be a power of 2
+    if (__builtin_popcountll(_cache_size) != 1 || __builtin_popcountll(_max_size) != 1) {
+        fprintf(stderr, "cache_create: Table size must be a power of 2!\n");
+        exit(1);
+    }
+#endif
+
+    cache_size = _cache_size;
+    cache_max  = _max_size;
+#if CACHE_MASK
+    cache_mask = cache_size - 1;
+#endif
+
+    if (cache_size > cache_max) {
+        fprintf(stderr, "cache_create: Table size must be <= max size!\n");
+        exit(1);
+    }
+
+    cache_table = (cache_entry_t)mmap(0, cache_max * sizeof(struct cache_entry), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    cache_status = (uint32_t*)mmap(0, cache_max * sizeof(uint32_t), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    if (cache_table == (cache_entry_t)-1 || cache_status == (uint32_t*)-1) {
+        fprintf(stderr, "cache_create: Unable to allocate memory: %s!\n", strerror(errno));
+        exit(1);
+    }
+
+    next_opid = 512LL << 40;
+}
+
+void
+cache_free()
+{
+    munmap(cache_table, cache_max * sizeof(struct cache_entry));
+    munmap(cache_status, cache_max * sizeof(uint32_t));
+}
+
+void
+cache_clear()
+{
+    // a bit silly, but this works just fine, and does not require writing 0 everywhere...
+    cache_free();
+    cache_create(cache_size, cache_max);
+}
+
+void
+cache_setsize(size_t size)
+{
+    // easy solution
+    cache_free();
+    cache_create(size, cache_max);
+}
+
+size_t
+cache_getsize()
+{
+    return cache_size;
+}
+
+size_t
+cache_getused()
+{
+    size_t result = 0;
+    for (size_t i=0;i<cache_size;i++) {
+        uint32_t s = cache_status[i];
+        if (s & 0x80000000) fprintf(stderr, "cache_getuser: cache in use during cache_getused()\n");
+        if (s) result++;
+    }
+    return result;
+}
+
+size_t
+cache_getmaxsize()
+{
+    return cache_max;
+}
diff --git a/src/sylvan_cache.h b/src/sylvan_cache.h
new file mode 100644
index 000000000..babc74f65
--- /dev/null
+++ b/src/sylvan_cache.h
@@ -0,0 +1,113 @@
+#include <stdint.h> // for uint32_t etc
+
+#include <sylvan_config.h>
+
+#ifndef CACHE_H
+#define CACHE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef CACHE_MASK
+#define CACHE_MASK 1
+#endif
+
+/**
+ * Operation cache
+ *
+ * Use cache_next_opid() at initialization time to generate unique "operation identifiers".
+ * You should store these operation identifiers as static globals in your implementation .C/.CPP file.
+ *
+ * For custom operations, just use the following functions:
+ * - cache_get3/cache_put3 for any operation with 1 BDD and 2 other values (that can be BDDs)
+ *   int success = cache_get3(opid, dd1, value2, value3, &result);
+ *   int success = cache_put3(opid, dd1, value2, value3, result);
+ * - cache_get4/cache_put4 for any operation with 4 BDDs
+ *   int success = cache_get4(opid, dd1, dd2, dd3, dd4, &result);
+ *   int success = cache_get4(opid, dd1, dd2, dd3, dd4, result);
+ *
+ * Notes:
+ * - The "result" is any 64-bit value
+ * - Use "0" for unused parameters
+ */
+
+typedef struct cache_entry *cache_entry_t;
+
+/**
+ * Primitives for cache get/put
+ */
+int cache_get(uint64_t a, uint64_t b, uint64_t c, uint64_t *res);
+int cache_put(uint64_t a, uint64_t b, uint64_t c, uint64_t res);
+
+/**
+ * Helper function to get next 'operation id' (during initialization of modules)
+ */
+uint64_t cache_next_opid();
+
+/**
+ * dd must be MTBDD, d2/d3 can be anything
+ */
+static inline int __attribute__((unused))
+cache_get3(uint64_t opid, uint64_t dd, uint64_t d2, uint64_t d3, uint64_t *res)
+{
+    return cache_get(dd | opid, d2, d3, res);
+}
+
+/**
+ * dd/dd2/dd3/dd4 must be MTBDDs
+ */
+static inline int __attribute__((unused))
+cache_get4(uint64_t opid, uint64_t dd, uint64_t dd2, uint64_t dd3, uint64_t dd4, uint64_t *res)
+{
+    uint64_t p2 = dd2 | ((dd4 & 0x00000000000fffff) << 40); // 20 bits and complement bit
+    if (dd4 & 0x8000000000000000) p2 |= 0x4000000000000000;
+    uint64_t p3 = dd3 | ((dd4 & 0x000000fffff00000) << 20); // 20 bits
+
+    return cache_get3(opid, dd, p2, p3, res);
+}
+
+/**
+ * dd must be MTBDD, d2/d3 can be anything
+ */
+static inline int __attribute__((unused))
+cache_put3(uint64_t opid, uint64_t dd, uint64_t d2, uint64_t d3, uint64_t res)
+{
+    return cache_put(dd | opid, d2, d3, res);
+}
+
+/**
+ * dd/dd2/dd3/dd4 must be MTBDDs
+ */
+static inline int __attribute__((unused))
+cache_put4(uint64_t opid, uint64_t dd, uint64_t dd2, uint64_t dd3, uint64_t dd4, uint64_t res)
+{
+    uint64_t p2 = dd2 | ((dd4 & 0x00000000000fffff) << 40); // 20 bits and complement bit
+    if (dd4 & 0x8000000000000000) p2 |= 0x4000000000000000;
+    uint64_t p3 = dd3 | ((dd4 & 0x000000fffff00000) << 20); // 20 bits
+
+    return cache_put3(opid, dd, p2, p3, res);
+}
+/**
+ * Functions for Sylvan for cache management
+ */
+
+void cache_create(size_t _cache_size, size_t _max_size);
+
+void cache_free();
+
+void cache_clear();
+
+void cache_setsize(size_t size);
+
+size_t cache_getused();
+
+size_t cache_getsize();
+
+size_t cache_getmaxsize();
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_common.c b/src/sylvan_common.c
new file mode 100644
index 000000000..aa0374a7b
--- /dev/null
+++ b/src/sylvan_common.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <sylvan.h>
+#include <sylvan_common.h>
+
+#ifndef cas
+#define cas(ptr, old, new) (__sync_bool_compare_and_swap((ptr),(old),(new)))
+#endif
+
+/**
+ * Static global variables
+ */
+
+llmsset_t nodes;
+
+/**
+ * Retrieve nodes
+ */
+
+llmsset_t
+__sylvan_get_internal_data()
+{
+    return nodes;
+}
+
+/**
+ * Calculate table usage (in parallel)
+ */
+VOID_TASK_IMPL_2(sylvan_table_usage, size_t*, filled, size_t*, total)
+{
+    size_t tot = llmsset_get_size(nodes);
+    if (filled != NULL) *filled = llmsset_count_marked(nodes);
+    if (total != NULL) *total = tot;
+}
+
+/**
+ * Implementation of garbage collection
+ */
+static int gc_enabled = 1;
+static volatile int gc; // variable used in cas switch to ensure only one gc at a time
+
+struct reg_gc_mark_entry
+{
+    struct reg_gc_mark_entry *next;
+    gc_mark_cb cb;
+    int order;
+};
+
+static struct reg_gc_mark_entry *gc_mark_register = NULL;
+
+void
+sylvan_gc_add_mark(int order, gc_mark_cb cb)
+{
+    struct reg_gc_mark_entry *e = (struct reg_gc_mark_entry*)malloc(sizeof(struct reg_gc_mark_entry));
+    e->cb = cb;
+    e->order = order;
+    if (gc_mark_register == NULL || gc_mark_register->order>order) {
+        e->next = gc_mark_register;
+        gc_mark_register = e;
+        return;
+    }
+    struct reg_gc_mark_entry *f = gc_mark_register;
+    for (;;) {
+        if (f->next == NULL) {
+            e->next = NULL;
+            f->next = e;
+            return;
+        }
+        if (f->next->order > order) {
+            e->next = f->next;
+            f->next = e;
+            return;
+        }
+        f = f->next;
+    }
+}
+
+static gc_hook_cb gc_hook;
+
+void
+sylvan_gc_set_hook(gc_hook_cb new_hook)
+{
+    gc_hook = new_hook;
+}
+
+void
+sylvan_gc_enable()
+{
+    gc_enabled = 1;
+}
+
+void
+sylvan_gc_disable()
+{
+    gc_enabled = 0;
+}
+
+/* Mark hook for cache */
+VOID_TASK_0(sylvan_gc_mark_cache)
+{
+    /* We simply clear the cache.
+     * Alternatively, we could implement for example some strategy
+     * where part of the cache is cleared and part is marked
+     */
+    cache_clear();
+}
+
+/* Default hook */
+
+size_t
+next_size(size_t n)
+{
+#if SYLVAN_SIZE_FIBONACCI
+    size_t f1=1, f2=1;
+    for (;;) {
+        f2 += f1;
+        if (f2 > n) return f2;
+        f1 += f2;
+        if (f1 > n) return f1;
+    }
+#else
+    return n*2;
+#endif
+}
+
+VOID_TASK_IMPL_0(sylvan_gc_aggressive_resize)
+{
+    /**
+     * Always resize when gc called
+     */
+    size_t max_size = llmsset_get_max_size(nodes);
+    size_t size = llmsset_get_size(nodes);
+    if (size < max_size) {
+        size_t new_size = next_size(size);
+        if (new_size > max_size) new_size = max_size;
+        llmsset_set_size(nodes, new_size);
+        size_t cache_size = cache_getsize();
+        size_t cache_max = cache_getmaxsize();
+        if (cache_size < cache_max) {
+            new_size = next_size(cache_size);
+            if (new_size > cache_max) new_size = cache_max;
+            cache_setsize(new_size);
+        }
+    }
+}
+
+VOID_TASK_IMPL_0(sylvan_gc_default_hook)
+{
+    /**
+     * Default behavior:
+     * if we can resize the nodes set, and if we use more than 50%, then increase size
+     */
+    size_t max_size = llmsset_get_max_size(nodes);
+    size_t size = llmsset_get_size(nodes);
+    if (size < max_size) {
+        size_t marked = llmsset_count_marked(nodes);
+        if (marked*2 > size) {
+            size_t new_size = next_size(size);
+            if (new_size > max_size) new_size = max_size;
+            llmsset_set_size(nodes, new_size);
+            size_t cache_size = cache_getsize();
+            size_t cache_max = cache_getmaxsize();
+            if (cache_size < cache_max) {
+                new_size = next_size(cache_size);
+                if (new_size > cache_max) new_size = cache_max;
+                cache_setsize(new_size);
+            }
+        }
+    }
+}
+
+VOID_TASK_0(sylvan_gc_call_hook)
+{
+    // call hook function (resizing, reordering, etc)
+    WRAP(gc_hook);
+}
+
+VOID_TASK_0(sylvan_gc_rehash)
+{
+    // rehash marked nodes
+    llmsset_rehash(nodes);
+}
+
+VOID_TASK_0(sylvan_gc_destroy_unmarked)
+{
+    llmsset_destroy_unmarked(nodes);
+}
+
+VOID_TASK_0(sylvan_gc_go)
+{
+    sylvan_stats_count(SYLVAN_GC_COUNT);
+    sylvan_timer_start(SYLVAN_GC);
+
+    // clear hash array
+    llmsset_clear(nodes);
+
+    // call mark functions, hook and rehash
+    struct reg_gc_mark_entry *e = gc_mark_register;
+    while (e != NULL) {
+        WRAP(e->cb);
+        e = e->next;
+    }
+
+    sylvan_timer_stop(SYLVAN_GC);
+}
+
+/* Perform garbage collection */
+VOID_TASK_IMPL_0(sylvan_gc)
+{
+    if (!gc_enabled) return;
+    if (cas(&gc, 0, 1)) {
+        NEWFRAME(sylvan_gc_go);
+        gc = 0;
+    } else {
+        /* wait for new frame to appear */
+        while (*(Task* volatile*)&(lace_newframe.t) == 0) {}
+        lace_yield(__lace_worker, __lace_dq_head);
+    }
+}
+
+/**
+ * Package init and quit functions
+ */
+void
+sylvan_init_package(size_t tablesize, size_t maxsize, size_t cachesize, size_t max_cachesize)
+{
+    if (tablesize > maxsize) tablesize = maxsize;
+    if (cachesize > max_cachesize) cachesize = max_cachesize;
+
+    if (maxsize > 0x000003ffffffffff) {
+        fprintf(stderr, "sylvan_init_package error: tablesize must be <= 42 bits!\n");
+        exit(1);
+    }
+
+    nodes = llmsset_create(tablesize, maxsize);
+    cache_create(cachesize, max_cachesize);
+
+    gc = 0;
+#if SYLVAN_AGGRESSIVE_RESIZE
+    gc_hook = TASK(sylvan_gc_aggressive_resize);
+#else
+    gc_hook = TASK(sylvan_gc_default_hook);
+#endif
+    sylvan_gc_add_mark(10, TASK(sylvan_gc_mark_cache));
+    sylvan_gc_add_mark(19, TASK(sylvan_gc_destroy_unmarked));
+    sylvan_gc_add_mark(20, TASK(sylvan_gc_call_hook));
+    sylvan_gc_add_mark(30, TASK(sylvan_gc_rehash));
+
+    LACE_ME;
+    sylvan_stats_init();
+}
+
+struct reg_quit_entry
+{
+    struct reg_quit_entry *next;
+    quit_cb cb;
+};
+
+static struct reg_quit_entry *quit_register = NULL;
+
+void
+sylvan_register_quit(quit_cb cb)
+{
+    struct reg_quit_entry *e = (struct reg_quit_entry*)malloc(sizeof(struct reg_quit_entry));
+    e->next = quit_register;
+    e->cb = cb;
+    quit_register = e;
+}
+
+void
+sylvan_quit()
+{
+    while (quit_register != NULL) {
+        struct reg_quit_entry *e = quit_register;
+        quit_register = e->next;
+        e->cb();
+        free(e);
+    }
+
+    while (gc_mark_register != NULL) {
+        struct reg_gc_mark_entry *e = gc_mark_register;
+        gc_mark_register = e->next;
+        free(e);
+    }
+
+    cache_free();
+    llmsset_free(nodes);
+}
diff --git a/src/sylvan_common.h b/src/sylvan_common.h
new file mode 100644
index 000000000..7f512a904
--- /dev/null
+++ b/src/sylvan_common.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SYLVAN_COMMON_H
+#define SYLVAN_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Garbage collection test task - t */
+#define sylvan_gc_test() YIELD_NEWFRAME()
+
+// BDD operations
+#define CACHE_BDD_ITE             (0LL<<40)
+#define CACHE_BDD_AND             (1LL<<40)
+#define CACHE_BDD_XOR             (2LL<<40)
+#define CACHE_BDD_EXISTS          (3LL<<40)
+#define CACHE_BDD_AND_EXISTS      (4LL<<40)
+#define CACHE_BDD_RELNEXT         (5LL<<40)
+#define CACHE_BDD_RELPREV         (6LL<<40)
+#define CACHE_BDD_SATCOUNT        (7LL<<40)
+#define CACHE_BDD_COMPOSE         (8LL<<40)
+#define CACHE_BDD_RESTRICT        (9LL<<40)
+#define CACHE_BDD_CONSTRAIN       (10LL<<40)
+#define CACHE_BDD_CLOSURE         (11LL<<40)
+#define CACHE_BDD_ISBDD           (12LL<<40)
+#define CACHE_BDD_SUPPORT         (13LL<<40)
+#define CACHE_BDD_PATHCOUNT       (14LL<<40)
+
+// MDD operations
+#define CACHE_MDD_RELPROD         (20LL<<40)
+#define CACHE_MDD_MINUS           (21LL<<40)
+#define CACHE_MDD_UNION           (22LL<<40)
+#define CACHE_MDD_INTERSECT       (23LL<<40)
+#define CACHE_MDD_PROJECT         (24LL<<40)
+#define CACHE_MDD_JOIN            (25LL<<40)
+#define CACHE_MDD_MATCH           (26LL<<40)
+#define CACHE_MDD_RELPREV         (27LL<<40)
+#define CACHE_MDD_SATCOUNT        (28LL<<40)
+#define CACHE_MDD_SATCOUNTL1      (29LL<<40)
+#define CACHE_MDD_SATCOUNTL2      (30LL<<40)
+
+// MTBDD operations
+#define CACHE_MTBDD_APPLY         (40LL<<40)
+#define CACHE_MTBDD_UAPPLY        (41LL<<40)
+#define CACHE_MTBDD_ABSTRACT      (42LL<<40)
+#define CACHE_MTBDD_ITE           (43LL<<40)
+#define CACHE_MTBDD_AND_EXISTS    (44LL<<40)
+#define CACHE_MTBDD_SUPPORT       (45LL<<40)
+#define CACHE_MTBDD_COMPOSE       (46LL<<40)
+#define CACHE_MTBDD_EQUAL_NORM    (47LL<<40)
+#define CACHE_MTBDD_EQUAL_NORM_REL (48LL<<40)
+#define CACHE_MTBDD_MINIMUM       (49LL<<40)
+#define CACHE_MTBDD_MAXIMUM       (50LL<<40)
+#define CACHE_MTBDD_LEQ           (51LL<<40)
+#define CACHE_MTBDD_LESS          (52LL<<40)
+#define CACHE_MTBDD_GEQ           (53LL<<40)
+#define CACHE_MTBDD_GREATER       (54LL<<40)
+#define CACHE_MTBDD_NONZERO_COUNT (55LL<<40)
+
+/**
+ * Registration of quit functions
+ */
+typedef void (*quit_cb)();
+void sylvan_register_quit(quit_cb cb);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_config.h b/src/sylvan_config.h
new file mode 100644
index 000000000..cb234227d
--- /dev/null
+++ b/src/sylvan_config.h
@@ -0,0 +1,30 @@
+/* Operation cache: use bitmasks for module (size must be power of 2!) */
+#ifndef CACHE_MASK
+#define CACHE_MASK 1
+#endif
+
+/* Nodes table: use bitmasks for module (size must be power of 2!) */
+#ifndef LLMSSET_MASK
+#define LLMSSET_MASK 1
+#endif
+
+/**
+ * Use Fibonacci sequence as resizing strategy.
+ * This MAY result in more conservative memory consumption, but is not
+ * great for performance.
+ * By default, powers of 2 should be used.
+ * If you set this, then set CACHE_MASK and LLMSSET_MASK to 0.
+ */
+#ifndef SYLVAN_SIZE_FIBONACCI
+#define SYLVAN_SIZE_FIBONACCI 0
+#endif
+
+/* Enable/disable counters and timers */
+#ifndef SYLVAN_STATS
+#define SYLVAN_STATS 0
+#endif
+
+/* Aggressive or conservative resizing strategy */
+#ifndef SYLVAN_AGGRESSIVE_RESIZE
+#define SYLVAN_AGGRESSIVE_RESIZE 1
+#endif
diff --git a/src/sylvan_gmp.c b/src/sylvan_gmp.c
new file mode 100644
index 000000000..0437b1be8
--- /dev/null
+++ b/src/sylvan_gmp.c
@@ -0,0 +1,595 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sylvan.h>
+#include <sylvan_common.h>
+#include <sylvan_mtbdd_int.h>
+#include <sylvan_gmp.h>
+#include <gmp.h>
+
+
+/**
+ * helper function for hash
+ */
+#ifndef rotl64
+static inline uint64_t
+rotl64(uint64_t x, int8_t r)
+{
+    return ((x<<r) | (x>>(64-r)));
+}
+#endif
+
+static uint64_t
+gmp_hash(const uint64_t v, const uint64_t seed)
+{
+    /* Hash the mpq in pointer v 
+     * A simpler way would be to hash the result of mpq_get_d.
+     * We just hash on the contents of the memory */
+    
+    mpq_ptr x = (mpq_ptr)(size_t)v;
+
+    const uint64_t prime = 1099511628211;
+    uint64_t hash = seed;
+    mp_limb_t *limbs;
+
+    // hash "numerator" limbs
+    limbs = x[0]._mp_num._mp_d;
+    for (int i=0; i<x[0]._mp_num._mp_size; i++) {
+        hash = hash ^ limbs[i];
+        hash = rotl64(hash, 47);
+        hash = hash * prime;
+    }
+
+    // hash "denominator" limbs
+    limbs = x[0]._mp_den._mp_d;
+    for (int i=0; i<x[0]._mp_den._mp_size; i++) {
+        hash = hash ^ limbs[i];
+        hash = rotl64(hash, 31);
+        hash = hash * prime;
+    }
+
+    return hash ^ (hash >> 32);
+}
+
+static int
+gmp_equals(const uint64_t left, const uint64_t right)
+{
+    /* This function is called by the unique table when comparing a new
+       leaf with an existing leaf */
+    mpq_ptr x = (mpq_ptr)(size_t)left;
+    mpq_ptr y = (mpq_ptr)(size_t)right;
+
+    /* Just compare x and y */
+    return mpq_equal(x, y) ? 1 : 0;
+}
+
+static void
+gmp_create(uint64_t *val)
+{
+    /* This function is called by the unique table when a leaf does not yet exist.
+       We make a copy, which will be stored in the hash table. */
+    mpq_ptr x = (mpq_ptr)malloc(sizeof(__mpq_struct));
+    mpq_init(x);
+    mpq_set(x, *(mpq_ptr*)val);
+    *(mpq_ptr*)val = x;
+}
+
+static void
+gmp_destroy(uint64_t val)
+{
+    /* This function is called by the unique table
+       when a leaf is removed during garbage collection. */
+    mpq_clear((mpq_ptr)val);
+    free((void*)val);
+}
+
+static uint32_t gmp_type;
+static uint64_t CACHE_GMP_AND_EXISTS;
+
+/**
+ * Initialize gmp custom leaves
+ */
+void
+gmp_init()
+{
+    /* Register custom leaf 3 */
+    gmp_type = mtbdd_register_custom_leaf(gmp_hash, gmp_equals, gmp_create, gmp_destroy);
+    CACHE_GMP_AND_EXISTS = cache_next_opid();
+}
+
+/**
+ * Create GMP mpq leaf
+ */
+MTBDD
+mtbdd_gmp(mpq_t val)
+{
+    mpq_canonicalize(val);
+    return mtbdd_makeleaf(gmp_type, (size_t)val);
+}
+
+/**
+ * Operation "plus" for two mpq MTBDDs
+ * Interpret partial function as "0"
+ */
+TASK_IMPL_2(MTBDD, gmp_op_plus, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions */
+    if (a == mtbdd_false) return b;
+    if (b == mtbdd_false) return a;
+
+    /* If both leaves, compute plus */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_add(mres, ma, mb);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    /* Commutative, so swap a,b for better cache performance */
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "minus" for two mpq MTBDDs
+ * Interpret partial function as "0"
+ */
+TASK_IMPL_2(MTBDD, gmp_op_minus, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions */
+    if (a == mtbdd_false) return gmp_neg(b);
+    if (b == mtbdd_false) return a;
+
+    /* If both leaves, compute plus */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_sub(mres, ma, mb);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "times" for two mpq MTBDDs.
+ * One of the parameters can be a BDD, then it is interpreted as a filter.
+ * For partial functions, domain is intersection
+ */
+TASK_IMPL_2(MTBDD, gmp_op_times, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions and for Boolean (filter) */
+    if (a == mtbdd_false || b == mtbdd_false) return mtbdd_false;
+
+    /* If one of Boolean, interpret as filter */
+    if (a == mtbdd_true) return b;
+    if (b == mtbdd_true) return a;
+
+    /* Handle multiplication of leaves */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+
+        // compute result
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_mul(mres, ma, mb);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    /* Commutative, so make "a" the lowest for better cache performance */
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "divide" for two mpq MTBDDs.
+ * For partial functions, domain is intersection
+ */
+TASK_IMPL_2(MTBDD, gmp_op_divide, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions */
+    if (a == mtbdd_false || b == mtbdd_false) return mtbdd_false;
+
+    /* Handle division of leaves */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+
+        // compute result
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_div(mres, ma, mb);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "min" for two mpq MTBDDs.
+ */
+TASK_IMPL_2(MTBDD, gmp_op_min, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Handle partial functions */
+    if (a == mtbdd_false) return b;
+    if (b == mtbdd_false) return a;
+
+    /* Handle trivial case */
+    if (a == b) return a;
+
+    /* Compute result for leaves */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+        int cmp = mpq_cmp(ma, mb);
+        return cmp < 0 ? a : b;
+    }
+
+    /* For cache performance */
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "max" for two mpq MTBDDs.
+ */
+TASK_IMPL_2(MTBDD, gmp_op_max, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Handle partial functions */
+    if (a == mtbdd_false) return b;
+    if (b == mtbdd_false) return a;
+
+    /* Handle trivial case */
+    if (a == b) return a;
+
+    /* Compute result for leaves */
+    if (mtbdd_isleaf(a) && mtbdd_isleaf(b)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+        int cmp = mpq_cmp(ma, mb);
+        return cmp > 0 ? a : b;
+    }
+
+    /* For cache performance */
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "neg" for one mpq MTBDD
+ */
+TASK_IMPL_2(MTBDD, gmp_op_neg, MTBDD, dd, size_t, p)
+{
+    /* Handle partial functions */
+    if (dd == mtbdd_false) return mtbdd_false;
+
+    /* Compute result for leaf */
+    if (mtbdd_isleaf(dd)) {
+        mpq_ptr m = (mpq_ptr)mtbdd_getvalue(dd);
+
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_neg(mres, m);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    return mtbdd_invalid;
+    (void)p;
+}
+
+/**
+ * Operation "abs" for one mpq MTBDD
+ */
+TASK_IMPL_2(MTBDD, gmp_op_abs, MTBDD, dd, size_t, p)
+{
+    /* Handle partial functions */
+    if (dd == mtbdd_false) return mtbdd_false;
+
+    /* Compute result for leaf */
+    if (mtbdd_isleaf(dd)) {
+        mpq_ptr m = (mpq_ptr)mtbdd_getvalue(dd);
+
+        mpq_t mres;
+        mpq_init(mres);
+        mpq_abs(mres, m);
+        MTBDD res = mtbdd_gmp(mres);
+        mpq_clear(mres);
+        return res;
+    }
+
+    return mtbdd_invalid;
+    (void)p;
+}
+
+/**
+ * The abstraction operators are called in either of two ways:
+ * - with k=0, then just calculate "a op b"
+ * - with k<>0, then just calculate "a := a op a", k times
+ */
+
+TASK_IMPL_3(MTBDD, gmp_abstract_op_plus, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k==0) {
+        return mtbdd_apply(a, b, TASK(gmp_op_plus));
+    } else {
+        MTBDD res = a;
+        for (int i=0; i<k; i++) {
+            mtbdd_refs_push(res);
+            res = mtbdd_apply(res, res, TASK(gmp_op_plus));
+            mtbdd_refs_pop(1);
+        }
+        return res;
+    }
+}
+
+TASK_IMPL_3(MTBDD, gmp_abstract_op_times, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k==0) {
+        return mtbdd_apply(a, b, TASK(gmp_op_times));
+    } else {
+        MTBDD res = a;
+        for (int i=0; i<k; i++) {
+            mtbdd_refs_push(res);
+            res = mtbdd_apply(res, res, TASK(gmp_op_times));
+            mtbdd_refs_pop(1);
+        }
+        return res;
+    }
+}
+
+TASK_IMPL_3(MTBDD, gmp_abstract_op_min, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k == 0) {
+        return mtbdd_apply(a, b, TASK(gmp_op_min));
+    } else {
+        // nothing to do: min(a, a) = a
+        return a;
+    }
+}
+
+TASK_IMPL_3(MTBDD, gmp_abstract_op_max, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k == 0) {
+        return mtbdd_apply(a, b, TASK(gmp_op_max));
+    } else {
+        // nothing to do: max(a, a) = a
+        return a;
+    }
+}
+
+/**
+ * Convert to Boolean MTBDD, terminals >= value (double) to True, or False otherwise.
+ */
+TASK_2(MTBDD, gmp_op_threshold_d, MTBDD, a, size_t, svalue)
+{
+    /* Handle partial function */
+    if (a == mtbdd_false) return mtbdd_false;
+
+    /* Compute result */
+    if (mtbdd_isleaf(a)) {
+        double value = *(double*)&svalue;
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        return mpq_get_d(ma) >= value ? mtbdd_true : mtbdd_false;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Convert to Boolean MTBDD, terminals > value (double) to True, or False otherwise.
+ */
+TASK_2(MTBDD, gmp_op_strict_threshold_d, MTBDD, a, size_t, svalue)
+{
+    /* Handle partial function */
+    if (a == mtbdd_false) return mtbdd_false;
+
+    /* Compute result */
+    if (mtbdd_isleaf(a)) {
+        double value = *(double*)&svalue;
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        return mpq_get_d(ma) > value ? mtbdd_true : mtbdd_false;
+    }
+
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_2(MTBDD, gmp_threshold_d, MTBDD, dd, double, d)
+{
+    return mtbdd_uapply(dd, TASK(gmp_op_threshold_d), *(size_t*)&d);
+}
+
+TASK_IMPL_2(MTBDD, gmp_strict_threshold_d, MTBDD, dd, double, d)
+{
+    return mtbdd_uapply(dd, TASK(gmp_op_strict_threshold_d), *(size_t*)&d);
+}
+
+/**
+ * Operation "threshold" for mpq MTBDDs.
+ * The second parameter must be a mpq leaf.
+ */
+TASK_IMPL_2(MTBDD, gmp_op_threshold, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions */
+    if (a == mtbdd_false) return mtbdd_false;
+
+    /* Handle comparison of leaves */
+    if (mtbdd_isleaf(a)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+        int cmp = mpq_cmp(ma, mb);
+        return cmp >= 0 ? mtbdd_true : mtbdd_false;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Operation "strict threshold" for mpq MTBDDs.
+ * The second parameter must be a mpq leaf.
+ */
+TASK_IMPL_2(MTBDD, gmp_op_strict_threshold, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+
+    /* Check for partial functions */
+    if (a == mtbdd_false) return mtbdd_false;
+
+    /* Handle comparison of leaves */
+    if (mtbdd_isleaf(a)) {
+        mpq_ptr ma = (mpq_ptr)mtbdd_getvalue(a);
+        mpq_ptr mb = (mpq_ptr)mtbdd_getvalue(b);
+        int cmp = mpq_cmp(ma, mb);
+        return cmp > 0 ? mtbdd_true : mtbdd_false;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Multiply <a> and <b>, and abstract variables <vars> using summation.
+ * This is similar to the "and_exists" operation in BDDs.
+ */
+TASK_IMPL_3(MTBDD, gmp_and_exists, MTBDD, a, MTBDD, b, MTBDD, v)
+{
+    /* Check terminal cases */
+
+    /* If v == true, then <vars> is an empty set */
+    if (v == mtbdd_true) return mtbdd_apply(a, b, TASK(gmp_op_times));
+
+    /* Try the times operator on a and b */
+    MTBDD result = CALL(gmp_op_times, &a, &b);
+    if (result != mtbdd_invalid) {
+        /* Times operator successful, store reference (for garbage collection) */
+        mtbdd_refs_push(result);
+        /* ... and perform abstraction */
+        result = mtbdd_abstract(result, v, TASK(gmp_abstract_op_plus));
+        mtbdd_refs_pop(1);
+        /* Note that the operation cache is used in mtbdd_abstract */
+        return result;
+    }
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache. Note that we do this now, since the times operator might swap a and b (commutative) */
+    if (cache_get3(CACHE_GMP_AND_EXISTS, a, b, v, &result)) return result;
+
+    /* Now, v is not a constant, and either a or b is not a constant */
+
+    /* Get top variable */
+    int la = mtbdd_isleaf(a);
+    int lb = mtbdd_isleaf(b);
+    mtbddnode_t na = la ? 0 : GETNODE(a);
+    mtbddnode_t nb = lb ? 0 : GETNODE(b);
+    uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+    uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+    uint32_t var = va < vb ? va : vb;
+
+    mtbddnode_t nv = GETNODE(v);
+    uint32_t vv = mtbddnode_getvariable(nv);
+
+    if (vv < var) {
+        /* Recursive, then abstract result */
+        result = CALL(gmp_and_exists, a, b, node_gethigh(v, nv));
+        mtbdd_refs_push(result);
+        result = mtbdd_apply(result, result, TASK(gmp_op_plus));
+        mtbdd_refs_pop(1);
+    } else {
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = (!la && va == var) ? node_getlow(a, na)  : a;
+        ahigh = (!la && va == var) ? node_gethigh(a, na) : a;
+        blow  = (!lb && vb == var) ? node_getlow(b, nb)  : b;
+        bhigh = (!lb && vb == var) ? node_gethigh(b, nb) : b;
+
+        if (vv == var) {
+            /* Recursive, then abstract result */
+            mtbdd_refs_spawn(SPAWN(gmp_and_exists, ahigh, bhigh, node_gethigh(v, nv)));
+            MTBDD low = mtbdd_refs_push(CALL(gmp_and_exists, alow, blow, node_gethigh(v, nv)));
+            MTBDD high = mtbdd_refs_push(mtbdd_refs_sync(SYNC(gmp_and_exists)));
+            result = CALL(mtbdd_apply, low, high, TASK(gmp_op_plus));
+            mtbdd_refs_pop(2);
+        } else /* vv > v */ {
+            /* Recursive, then create node */
+            mtbdd_refs_spawn(SPAWN(gmp_and_exists, ahigh, bhigh, v));
+            MTBDD low = mtbdd_refs_push(CALL(gmp_and_exists, alow, blow, v));
+            MTBDD high = mtbdd_refs_sync(SYNC(gmp_and_exists));
+            mtbdd_refs_pop(1);
+            result = mtbdd_makenode(var, low, high);
+        }
+    }
+
+    /* Store in cache */
+    cache_put3(CACHE_GMP_AND_EXISTS, a, b, v, result);
+    return result;
+}
diff --git a/src/sylvan_gmp.h b/src/sylvan_gmp.h
new file mode 100644
index 000000000..fbf6bc2ad
--- /dev/null
+++ b/src/sylvan_gmp.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is an implementation of GMP mpq custom leaves of MTBDDs
+ */
+
+#ifndef SYLVAN_GMP_H
+#define SYLVAN_GMP_H
+
+#include <sylvan.h>
+#include <gmp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * Initialize GMP custom leaves
+ */
+void gmp_init();
+
+/**
+ * Create MPQ leaf
+ */
+MTBDD mtbdd_gmp(mpq_t val);
+
+/**
+ * Operation "plus" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_plus, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, gmp_abstract_op_plus, MTBDD, MTBDD, int);
+
+/**
+ * Operation "minus" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_minus, MTBDD*, MTBDD*);
+
+/**
+ * Operation "times" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_times, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, gmp_abstract_op_times, MTBDD, MTBDD, int);
+
+/**
+ * Operation "divide" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_divide, MTBDD*, MTBDD*);
+
+/**
+ * Operation "min" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_min, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, gmp_abstract_op_min, MTBDD, MTBDD, int);
+
+/**
+ * Operation "max" for two mpq MTBDDs
+ */
+TASK_DECL_2(MTBDD, gmp_op_max, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, gmp_abstract_op_max, MTBDD, MTBDD, int);
+
+/**
+ * Operation "negate" for one mpq MTBDD
+ */
+TASK_DECL_2(MTBDD, gmp_op_neg, MTBDD, size_t);
+
+/**
+ * Operation "abs" for one mpq MTBDD
+ */
+TASK_DECL_2(MTBDD, gmp_op_abs, MTBDD, size_t);
+
+/**
+ * Compute a + b
+ */
+#define gmp_plus(a, b) mtbdd_apply(a, b, TASK(gmp_op_plus))
+
+/**
+ * Compute a + b
+ */
+#define gmp_minus(a, b) mtbdd_apply(a, b, TASK(gmp_op_minus))
+
+/**
+ * Compute a * b
+ */
+#define gmp_times(a, b) mtbdd_apply(a, b, TASK(gmp_op_times))
+
+/**
+ * Compute a * b
+ */
+#define gmp_divide(a, b) mtbdd_apply(a, b, TASK(gmp_op_divide))
+
+/**
+ * Compute min(a, b)
+ */
+#define gmp_min(a, b) mtbdd_apply(a, b, TASK(gmp_op_min))
+
+/**
+ * Compute max(a, b)
+ */
+#define gmp_max(a, b) mtbdd_apply(a, b, TASK(gmp_op_max))
+
+/**
+ * Compute -a
+ */
+#define gmp_neg(a) mtbdd_uapply(a, TASK(gmp_op_neg), 0);
+
+/**
+ * Compute abs(a)
+ */
+#define gmp_abs(a) mtbdd_uapply(a, TASK(gmp_op_abs), 0);
+
+/**
+ * Abstract the variables in <v> from <a> by taking the sum of all values
+ */
+#define gmp_abstract_plus(dd, v) mtbdd_abstract(dd, v, TASK(gmp_abstract_op_plus))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the product of all values
+ */
+#define gmp_abstract_times(dd, v) mtbdd_abstract(dd, v, TASK(gmp_abstract_op_times))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the minimum of all values
+ */
+#define gmp_abstract_min(dd, v) mtbdd_abstract(dd, v, TASK(gmp_abstract_op_min))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the maximum of all values
+ */
+#define gmp_abstract_max(dd, v) mtbdd_abstract(dd, v, TASK(gmp_abstract_op_max))
+
+/**
+ * Multiply <a> and <b>, and abstract variables <vars> using summation.
+ * This is similar to the "and_exists" operation in BDDs.
+ */
+TASK_DECL_3(MTBDD, gmp_and_exists, MTBDD, MTBDD, MTBDD);
+#define gmp_and_exists(a, b, vars) CALL(gmp_and_exists, a, b, vars)
+
+/**
+ * Convert to a Boolean MTBDD, translate terminals >= value to 1 and to 0 otherwise;
+ * Parameter <dd> is the MTBDD to convert; parameter <value> is an GMP mpq leaf
+ */
+TASK_DECL_2(MTBDD, gmp_op_threshold, MTBDD*, MTBDD*);
+#define gmp_threshold(dd, value) mtbdd_apply(dd, value, TASK(gmp_op_threshold));
+
+/**
+ * Convert to a Boolean MTBDD, translate terminals > value to 1 and to 0 otherwise;
+ * Parameter <dd> is the MTBDD to convert; parameter <value> is an GMP mpq leaf
+ */
+TASK_DECL_2(MTBDD, gmp_op_strict_threshold, MTBDD*, MTBDD*);
+#define gmp_strict_threshold(dd, value) mtbdd_apply(dd, value, TASK(gmp_op_strict_threshold));
+
+/**
+ * Convert to a Boolean MTBDD, translate terminals >= value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, gmp_threshold_d, MTBDD, double);
+#define gmp_threshold_d(dd, value) CALL(gmp_threshold_d, dd, value)
+
+/**
+ * Convert to a Boolean MTBDD, translate terminals > value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, gmp_strict_threshold_d, MTBDD, double);
+#define gmp_strict_threshold_d(dd, value) CALL(gmp_strict_threshold_d, dd, value)
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_ldd.c b/src/sylvan_ldd.c
new file mode 100644
index 000000000..814b7e61c
--- /dev/null
+++ b/src/sylvan_ldd.c
@@ -0,0 +1,2560 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <math.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <avl.h>
+#include <refs.h>
+#include <sha2.h>
+#include <sylvan.h>
+#include <sylvan_common.h>
+
+/**
+ * MDD node structure
+ */
+typedef struct __attribute__((packed)) mddnode {
+    uint64_t a, b;
+} * mddnode_t; // 16 bytes
+
+// RmRR RRRR RRRR VVVV | VVVV DcDD DDDD DDDD (little endian - in memory)
+// VVVV RRRR RRRR RRRm | DDDD DDDD DDDc VVVV (big endian)
+
+// Ensure our mddnode is 16 bytes
+typedef char __lddmc_check_mddnode_t_is_16_bytes[(sizeof(struct mddnode)==16) ? 1 : -1];
+
+static inline uint32_t __attribute__((unused))
+mddnode_getvalue(mddnode_t n)
+{
+    return *(uint32_t*)((uint8_t*)n+6);
+}
+
+static inline uint8_t __attribute__((unused))
+mddnode_getmark(mddnode_t n)
+{
+    return n->a & 1;
+}
+
+static inline uint8_t __attribute__((unused))
+mddnode_getcopy(mddnode_t n)
+{
+    return n->b & 0x10000 ? 1 : 0;
+}
+
+static inline uint64_t __attribute__((unused))
+mddnode_getright(mddnode_t n)
+{
+    return (n->a & 0x0000ffffffffffff) >> 1;
+}
+
+static inline uint64_t __attribute__((unused))
+mddnode_getdown(mddnode_t n)
+{
+    return n->b >> 17;
+}
+
+static inline void __attribute__((unused))
+mddnode_setvalue(mddnode_t n, uint32_t value)
+{
+    *(uint32_t*)((uint8_t*)n+6) = value;
+}
+
+static inline void __attribute__((unused))
+mddnode_setmark(mddnode_t n, uint8_t mark)
+{
+    n->a = (n->a & 0xfffffffffffffffe) | (mark ? 1 : 0);
+}
+
+static inline void __attribute__((unused))
+mddnode_setright(mddnode_t n, uint64_t right)
+{
+    n->a = (n->a & 0xffff000000000001) | (right << 1);
+}
+
+static inline void __attribute__((unused))
+mddnode_setdown(mddnode_t n, uint64_t down)
+{
+    n->b = (n->b & 0x000000000001ffff) | (down << 16);
+}
+
+static inline void __attribute__((unused))
+mddnode_make(mddnode_t n, uint32_t value, uint64_t right, uint64_t down)
+{
+    n->a = right << 1;
+    n->b = down << 17;
+    *(uint32_t*)((uint8_t*)n+6) = value;
+}
+
+static inline void __attribute__((unused))
+mddnode_makecopy(mddnode_t n, uint64_t right, uint64_t down)
+{
+    n->a = right << 1;
+    n->b = ((down << 1) | 1) << 16;
+}
+
+#define GETNODE(mdd) ((mddnode_t)llmsset_index_to_ptr(nodes, mdd))
+
+/**
+ * Implementation of garbage collection
+ */
+
+/* Recursively mark MDD nodes as 'in use' */
+VOID_TASK_IMPL_1(lddmc_gc_mark_rec, MDD, mdd)
+{
+    if (mdd <= lddmc_true) return;
+
+    if (llmsset_mark(nodes, mdd)) {
+        mddnode_t n = GETNODE(mdd);
+        SPAWN(lddmc_gc_mark_rec, mddnode_getright(n));
+        CALL(lddmc_gc_mark_rec, mddnode_getdown(n));
+        SYNC(lddmc_gc_mark_rec);
+    }
+}
+
+/**
+ * External references
+ */
+
+refs_table_t mdd_refs;
+
+MDD
+lddmc_ref(MDD a)
+{
+    if (a == lddmc_true || a == lddmc_false) return a;
+    refs_up(&mdd_refs, a);
+    return a;
+}
+
+void
+lddmc_deref(MDD a)
+{
+    if (a == lddmc_true || a == lddmc_false) return;
+    refs_down(&mdd_refs, a);
+}
+
+size_t
+lddmc_count_refs()
+{
+    return refs_count(&mdd_refs);
+}
+
+/* Called during garbage collection */
+VOID_TASK_0(lddmc_gc_mark_external_refs)
+{
+    // iterate through refs hash table, mark all found
+    size_t count=0;
+    uint64_t *it = refs_iter(&mdd_refs, 0, mdd_refs.refs_size);
+    while (it != NULL) {
+        SPAWN(lddmc_gc_mark_rec, refs_next(&mdd_refs, &it, mdd_refs.refs_size));
+        count++;
+    }
+    while (count--) {
+        SYNC(lddmc_gc_mark_rec);
+    }
+}
+
+/* Infrastructure for internal markings */
+DECLARE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+
+VOID_TASK_0(lddmc_refs_mark_task)
+{
+    LOCALIZE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+    size_t i, j=0;
+    for (i=0; i<lddmc_refs_key->r_count; i++) {
+        if (j >= 40) {
+            while (j--) SYNC(lddmc_gc_mark_rec);
+            j=0;
+        }
+        SPAWN(lddmc_gc_mark_rec, lddmc_refs_key->results[i]);
+        j++;
+    }
+    for (i=0; i<lddmc_refs_key->s_count; i++) {
+        Task *t = lddmc_refs_key->spawns[i];
+        if (!TASK_IS_STOLEN(t)) break;
+        if (TASK_IS_COMPLETED(t)) {
+            if (j >= 40) {
+                while (j--) SYNC(lddmc_gc_mark_rec);
+                j=0;
+            }
+            SPAWN(lddmc_gc_mark_rec, *(BDD*)TASK_RESULT(t));
+            j++;
+        }
+    }
+    while (j--) SYNC(lddmc_gc_mark_rec);
+}
+
+VOID_TASK_0(lddmc_refs_mark)
+{
+    TOGETHER(lddmc_refs_mark_task);
+}
+
+VOID_TASK_0(lddmc_refs_init_task)
+{
+    lddmc_refs_internal_t s = (lddmc_refs_internal_t)malloc(sizeof(struct lddmc_refs_internal));
+    s->r_size = 128;
+    s->r_count = 0;
+    s->s_size = 128;
+    s->s_count = 0;
+    s->results = (BDD*)malloc(sizeof(BDD) * 128);
+    s->spawns = (Task**)malloc(sizeof(Task*) * 128);
+    SET_THREAD_LOCAL(lddmc_refs_key, s);
+}
+
+VOID_TASK_0(lddmc_refs_init)
+{
+    INIT_THREAD_LOCAL(lddmc_refs_key);
+    TOGETHER(lddmc_refs_init_task);
+    sylvan_gc_add_mark(10, TASK(lddmc_refs_mark));
+}
+
+/**
+ * Initialize and quit functions
+ */
+
+static void
+lddmc_quit()
+{
+    refs_free(&mdd_refs);
+}
+
+void
+sylvan_init_ldd()
+{
+    sylvan_register_quit(lddmc_quit);
+    sylvan_gc_add_mark(10, TASK(lddmc_gc_mark_external_refs));
+
+    // Sanity check
+    if (sizeof(struct mddnode) != 16) {
+        fprintf(stderr, "Invalid size of mdd nodes: %ld\n", sizeof(struct mddnode));
+        exit(1);
+    }
+
+    refs_create(&mdd_refs, 1024);
+
+    LACE_ME;
+    CALL(lddmc_refs_init);
+}
+
+/**
+ * Primitives
+ */
+
+MDD
+lddmc_makenode(uint32_t value, MDD ifeq, MDD ifneq)
+{
+    if (ifeq == lddmc_false) return ifneq;
+
+    // check if correct (should be false, or next in value)
+    assert(ifneq != lddmc_true);
+    if (ifneq != lddmc_false) assert(value < mddnode_getvalue(GETNODE(ifneq)));
+
+    struct mddnode n;
+    mddnode_make(&n, value, ifneq, ifeq);
+
+    int created;
+    uint64_t index = llmsset_lookup(nodes, n.a, n.b, &created);
+    if (index == 0) {
+        lddmc_refs_push(ifeq);
+        lddmc_refs_push(ifneq);
+        LACE_ME;
+        sylvan_gc();
+        lddmc_refs_pop(1);
+
+        index = llmsset_lookup(nodes, n.a, n.b, &created);
+        if (index == 0) {
+            fprintf(stderr, "MDD Unique table full, %zu of %zu buckets filled!\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+            exit(1);
+        }
+    }
+
+    if (created) sylvan_stats_count(LDD_NODES_CREATED);
+    else sylvan_stats_count(LDD_NODES_REUSED);
+
+    return (MDD)index;
+}
+
+MDD
+lddmc_make_copynode(MDD ifeq, MDD ifneq)
+{
+    struct mddnode n;
+    mddnode_makecopy(&n, ifneq, ifeq);
+
+    int created;
+    uint64_t index = llmsset_lookup(nodes, n.a, n.b, &created);
+    if (index == 0) {
+        lddmc_refs_push(ifeq);
+        lddmc_refs_push(ifneq);
+        LACE_ME;
+        sylvan_gc();
+        lddmc_refs_pop(1);
+
+        index = llmsset_lookup(nodes, n.a, n.b, &created);
+        if (index == 0) {
+            fprintf(stderr, "MDD Unique table full, %zu of %zu buckets filled!\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+            exit(1);
+        }
+    }
+
+    if (created) sylvan_stats_count(LDD_NODES_CREATED);
+    else sylvan_stats_count(LDD_NODES_REUSED);
+
+    return (MDD)index;
+}
+
+MDD
+lddmc_extendnode(MDD mdd, uint32_t value, MDD ifeq)
+{
+    if (mdd <= lddmc_true) return lddmc_makenode(value, ifeq, mdd);
+
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getcopy(n)) return lddmc_make_copynode(mddnode_getdown(n), lddmc_extendnode(mddnode_getright(n), value, ifeq));
+    uint32_t n_value = mddnode_getvalue(n);
+    if (n_value < value) return lddmc_makenode(n_value, mddnode_getdown(n), lddmc_extendnode(mddnode_getright(n), value, ifeq));
+    if (n_value == value) return lddmc_makenode(value, ifeq, mddnode_getright(n));
+    /* (n_value > value) */ return lddmc_makenode(value, ifeq, mdd);
+}
+
+uint32_t
+lddmc_getvalue(MDD mdd)
+{
+    return mddnode_getvalue(GETNODE(mdd));
+}
+
+MDD
+lddmc_getdown(MDD mdd)
+{
+    return mddnode_getdown(GETNODE(mdd));
+}
+
+MDD
+lddmc_getright(MDD mdd)
+{
+    return mddnode_getright(GETNODE(mdd));
+}
+
+MDD
+lddmc_follow(MDD mdd, uint32_t value)
+{
+    for (;;) {
+        if (mdd <= lddmc_true) return mdd;
+        const mddnode_t n = GETNODE(mdd);
+        if (!mddnode_getcopy(n)) {
+            const uint32_t v = mddnode_getvalue(n);
+            if (v == value) return mddnode_getdown(n);
+            if (v > value) return lddmc_false;
+        }
+        mdd = mddnode_getright(n);
+    }
+}
+
+int
+lddmc_iscopy(MDD mdd)
+{
+    if (mdd <= lddmc_true) return 0;
+
+    mddnode_t n = GETNODE(mdd);
+    return mddnode_getcopy(n) ? 1 : 0;
+}
+
+MDD
+lddmc_followcopy(MDD mdd)
+{
+    if (mdd <= lddmc_true) return lddmc_false;
+
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getcopy(n)) return mddnode_getdown(n);
+    else return lddmc_false;
+}
+
+/**
+ * MDD operations
+ */
+static inline int
+match_ldds(MDD *one, MDD *two)
+{
+    MDD m1 = *one, m2 = *two;
+    if (m1 == lddmc_false || m2 == lddmc_false) return 0;
+    mddnode_t n1 = GETNODE(m1), n2 = GETNODE(m2);
+    uint32_t v1 = mddnode_getvalue(n1), v2 = mddnode_getvalue(n2);
+    while (v1 != v2) {
+        if (v1 < v2) {
+            m1 = mddnode_getright(n1);
+            if (m1 == lddmc_false) return 0;
+            n1 = GETNODE(m1);
+            v1 = mddnode_getvalue(n1);
+        } else if (v1 > v2) {
+            m2 = mddnode_getright(n2);
+            if (m2 == lddmc_false) return 0;
+            n2 = GETNODE(m2);
+            v2 = mddnode_getvalue(n2);
+        }
+    }
+    *one = m1;
+    *two = m2;
+    return 1;
+}
+
+TASK_IMPL_2(MDD, lddmc_union, MDD, a, MDD, b)
+{
+    /* Terminal cases */
+    if (a == b) return a;
+    if (a == lddmc_false) return b;
+    if (b == lddmc_false) return a;
+    assert(a != lddmc_true && b != lddmc_true); // expecting same length
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_UNION);
+
+    /* Improve cache behavior */
+    if (a < b) { MDD tmp=b; b=a; a=tmp; }
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_UNION, a, b, 0, &result)) {
+        sylvan_stats_count(LDD_UNION_CACHED);
+        return result;
+    }
+
+    /* Get nodes */
+    mddnode_t na = GETNODE(a);
+    mddnode_t nb = GETNODE(b);
+
+    const int na_copy = mddnode_getcopy(na) ? 1 : 0;
+    const int nb_copy = mddnode_getcopy(nb) ? 1 : 0;
+    const uint32_t na_value = mddnode_getvalue(na);
+    const uint32_t nb_value = mddnode_getvalue(nb);
+
+    /* Perform recursive calculation */
+    if (na_copy && nb_copy) {
+        lddmc_refs_spawn(SPAWN(lddmc_union, mddnode_getdown(na), mddnode_getdown(nb)));
+        MDD right = CALL(lddmc_union, mddnode_getright(na), mddnode_getright(nb));
+        lddmc_refs_push(right);
+        MDD down = lddmc_refs_sync(SYNC(lddmc_union));
+        lddmc_refs_pop(1);
+        result = lddmc_make_copynode(down, right);
+    } else if (na_copy) {
+        MDD right = CALL(lddmc_union, mddnode_getright(na), b);
+        result = lddmc_make_copynode(mddnode_getdown(na), right);
+    } else if (nb_copy) {
+        MDD right = CALL(lddmc_union, a, mddnode_getright(nb));
+        result = lddmc_make_copynode(mddnode_getdown(nb), right);
+    } else if (na_value < nb_value) {
+        MDD right = CALL(lddmc_union, mddnode_getright(na), b);
+        result = lddmc_makenode(na_value, mddnode_getdown(na), right);
+    } else if (na_value == nb_value) {
+        lddmc_refs_spawn(SPAWN(lddmc_union, mddnode_getdown(na), mddnode_getdown(nb)));
+        MDD right = CALL(lddmc_union, mddnode_getright(na), mddnode_getright(nb));
+        lddmc_refs_push(right);
+        MDD down = lddmc_refs_sync(SYNC(lddmc_union));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(na_value, down, right);
+    } else /* na_value > nb_value */ {
+        MDD right = CALL(lddmc_union, a, mddnode_getright(nb));
+        result = lddmc_makenode(nb_value, mddnode_getdown(nb), right);
+    }
+
+    /* Write to cache */
+    if (cache_put3(CACHE_MDD_UNION, a, b, 0, result)) sylvan_stats_count(LDD_UNION_CACHEDPUT);
+
+    return result;
+}
+
+TASK_IMPL_2(MDD, lddmc_minus, MDD, a, MDD, b)
+{
+    /* Terminal cases */
+    if (a == b) return lddmc_false;
+    if (a == lddmc_false) return lddmc_false;
+    if (b == lddmc_false) return a;
+    assert(b != lddmc_true);
+    assert(a != lddmc_true); // Universe is unknown!! // Possibly depth issue?
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_MINUS);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_MINUS, a, b, 0, &result)) {
+        sylvan_stats_count(LDD_MINUS_CACHED);
+        return result;
+    }
+
+    /* Get nodes */
+    mddnode_t na = GETNODE(a);
+    mddnode_t nb = GETNODE(b);
+    uint32_t na_value = mddnode_getvalue(na);
+    uint32_t nb_value = mddnode_getvalue(nb);
+
+    /* Perform recursive calculation */
+    if (na_value < nb_value) {
+        MDD right = CALL(lddmc_minus, mddnode_getright(na), b);
+        result = lddmc_makenode(na_value, mddnode_getdown(na), right);
+    } else if (na_value == nb_value) {
+        lddmc_refs_spawn(SPAWN(lddmc_minus, mddnode_getright(na), mddnode_getright(nb)));
+        MDD down = CALL(lddmc_minus, mddnode_getdown(na), mddnode_getdown(nb));
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_minus));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(na_value, down, right);
+    } else /* na_value > nb_value */ {
+        result = CALL(lddmc_minus, a, mddnode_getright(nb));
+    }
+
+    /* Write to cache */
+    if (cache_put3(CACHE_MDD_MINUS, a, b, 0, result)) sylvan_stats_count(LDD_MINUS_CACHEDPUT);
+
+    return result;
+}
+
+/* result: a plus b; res2: b minus a */
+TASK_IMPL_3(MDD, lddmc_zip, MDD, a, MDD, b, MDD*, res2)
+{
+    /* Terminal cases */
+    if (a == b) {
+        *res2 = lddmc_false;
+        return a;
+    }
+    if (a == lddmc_false) {
+        *res2 = b;
+        return b;
+    }
+    if (b == lddmc_false) {
+        *res2 = lddmc_false;
+        return a;
+    }
+
+    assert(a != lddmc_true && b != lddmc_true); // expecting same length
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    /* Maybe not the ideal way */
+    sylvan_stats_count(LDD_ZIP);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_UNION, a, b, 0, &result) &&
+        cache_get3(CACHE_MDD_MINUS, b, a, 0, res2)) {
+        sylvan_stats_count(LDD_ZIP);
+        return result;
+    }
+
+    /* Get nodes */
+    mddnode_t na = GETNODE(a);
+    mddnode_t nb = GETNODE(b);
+    uint32_t na_value = mddnode_getvalue(na);
+    uint32_t nb_value = mddnode_getvalue(nb);
+
+    /* Perform recursive calculation */
+    if (na_value < nb_value) {
+        MDD right = CALL(lddmc_zip, mddnode_getright(na), b, res2);
+        result = lddmc_makenode(na_value, mddnode_getdown(na), right);
+    } else if (na_value == nb_value) {
+        MDD down2, right2;
+        lddmc_refs_spawn(SPAWN(lddmc_zip, mddnode_getdown(na), mddnode_getdown(nb), &down2));
+        MDD right = CALL(lddmc_zip, mddnode_getright(na), mddnode_getright(nb), &right2);
+        lddmc_refs_push(right);
+        lddmc_refs_push(right2);
+        MDD down = lddmc_refs_sync(SYNC(lddmc_zip));
+        lddmc_refs_pop(2);
+        result = lddmc_makenode(na_value, down, right);
+        *res2 = lddmc_makenode(na_value, down2, right2);
+    } else /* na_value > nb_value */ {
+        MDD right2;
+        MDD right = CALL(lddmc_zip, a, mddnode_getright(nb), &right2);
+        result = lddmc_makenode(nb_value, mddnode_getdown(nb), right);
+        *res2 = lddmc_makenode(nb_value, mddnode_getdown(nb), right2);
+    }
+
+    /* Write to cache */
+    int c1 = cache_put3(CACHE_MDD_UNION, a, b, 0, result);
+    int c2 = cache_put3(CACHE_MDD_MINUS, b, a, 0, *res2);
+    if (c1 && c2) sylvan_stats_count(LDD_ZIP_CACHEDPUT);
+
+    return result;
+}
+
+TASK_IMPL_2(MDD, lddmc_intersect, MDD, a, MDD, b)
+{
+    /* Terminal cases */
+    if (a == b) return a;
+    if (a == lddmc_false || b == lddmc_false) return lddmc_false;
+    assert(a != lddmc_true && b != lddmc_true);
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_INTERSECT);
+
+    /* Get nodes */
+    mddnode_t na = GETNODE(a);
+    mddnode_t nb = GETNODE(b);
+    uint32_t na_value = mddnode_getvalue(na);
+    uint32_t nb_value = mddnode_getvalue(nb);
+
+    /* Skip nodes if possible */
+    while (na_value != nb_value) {
+        if (na_value < nb_value) {
+            a = mddnode_getright(na);
+            if (a == lddmc_false) return lddmc_false;
+            na = GETNODE(a);
+            na_value = mddnode_getvalue(na);
+        }
+        if (nb_value < na_value) {
+            b = mddnode_getright(nb);
+            if (b == lddmc_false) return lddmc_false;
+            nb = GETNODE(b);
+            nb_value = mddnode_getvalue(nb);
+        }
+    }
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_INTERSECT, a, b, 0, &result)) {
+        sylvan_stats_count(LDD_INTERSECT_CACHED);
+        return result;
+    }
+
+    /* Perform recursive calculation */
+    lddmc_refs_spawn(SPAWN(lddmc_intersect, mddnode_getright(na), mddnode_getright(nb)));
+    MDD down = CALL(lddmc_intersect, mddnode_getdown(na), mddnode_getdown(nb));
+    lddmc_refs_push(down);
+    MDD right = lddmc_refs_sync(SYNC(lddmc_intersect));
+    lddmc_refs_pop(1);
+    result = lddmc_makenode(na_value, down, right);
+
+    /* Write to cache */
+    if (cache_put3(CACHE_MDD_INTERSECT, a, b, 0, result)) sylvan_stats_count(LDD_INTERSECT_CACHEDPUT);
+
+    return result;
+}
+
+// proj: -1 (rest 0), 0 (no match), 1 (match)
+TASK_IMPL_3(MDD, lddmc_match, MDD, a, MDD, b, MDD, proj)
+{
+    if (a == b) return a;
+    if (a == lddmc_false || b == lddmc_false) return lddmc_false;
+
+    mddnode_t p_node = GETNODE(proj);
+    uint32_t p_val = mddnode_getvalue(p_node);
+    if (p_val == (uint32_t)-1) return a;
+
+    assert(a != lddmc_true);
+    if (p_val == 1) assert(b != lddmc_true);
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    /* Skip nodes if possible */
+    if (p_val == 1) {
+        if (!match_ldds(&a, &b)) return lddmc_false;
+    }
+
+    sylvan_stats_count(LDD_MATCH);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_MATCH, a, b, proj, &result)) {
+        sylvan_stats_count(LDD_MATCH_CACHED);
+        return result;
+    }
+
+    /* Perform recursive calculation */
+    mddnode_t na = GETNODE(a);
+    MDD down;
+    if (p_val == 1) {
+        mddnode_t nb = GETNODE(b);
+        /* right = */ lddmc_refs_spawn(SPAWN(lddmc_match, mddnode_getright(na), mddnode_getright(nb), proj));
+        down = CALL(lddmc_match, mddnode_getdown(na), mddnode_getdown(nb), mddnode_getdown(p_node));
+    } else {
+        /* right = */ lddmc_refs_spawn(SPAWN(lddmc_match, mddnode_getright(na), b, proj));
+        down = CALL(lddmc_match, mddnode_getdown(na), b, mddnode_getdown(p_node));
+    }
+    lddmc_refs_push(down);
+    MDD right = lddmc_refs_sync(SYNC(lddmc_match));
+    lddmc_refs_pop(1);
+    result = lddmc_makenode(mddnode_getvalue(na), down, right);
+
+    /* Write to cache */
+    if (cache_put3(CACHE_MDD_MATCH, a, b, proj, result)) sylvan_stats_count(LDD_MATCH_CACHEDPUT);
+
+    return result;
+}
+
+TASK_4(MDD, lddmc_relprod_help, uint32_t, val, MDD, set, MDD, rel, MDD, proj)
+{
+    return lddmc_makenode(val, CALL(lddmc_relprod, set, rel, proj), lddmc_false);
+}
+
+// meta: -1 (end; rest not in rel), 0 (not in rel), 1 (read), 2 (write), 3 (only-read), 4 (only-write)
+TASK_IMPL_3(MDD, lddmc_relprod, MDD, set, MDD, rel, MDD, meta)
+{
+    if (set == lddmc_false) return lddmc_false;
+    if (rel == lddmc_false) return lddmc_false;
+
+    mddnode_t n_meta = GETNODE(meta);
+    uint32_t m_val = mddnode_getvalue(n_meta);
+    if (m_val == (uint32_t)-1) return set;
+    if (m_val != 0) assert(set != lddmc_true && rel != lddmc_true);
+
+    /* Skip nodes if possible */
+    if (!mddnode_getcopy(GETNODE(rel))) {
+        if (m_val == 1 || m_val == 3) {
+            if (!match_ldds(&set, &rel)) return lddmc_false;
+        }
+    }
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_RELPROD);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get3(CACHE_MDD_RELPROD, set, rel, meta, &result)) {
+        sylvan_stats_count(LDD_RELPROD_CACHED);
+        return result;
+    }
+
+    mddnode_t n_set = GETNODE(set);
+    mddnode_t n_rel = GETNODE(rel);
+
+    /* Recursive operations */
+    if (m_val == 0) { // not in rel
+        lddmc_refs_spawn(SPAWN(lddmc_relprod, mddnode_getright(n_set), rel, meta));
+        MDD down = CALL(lddmc_relprod, mddnode_getdown(n_set), rel, mddnode_getdown(n_meta));
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_relprod));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+    } else if (m_val == 1) { // read
+        // read layer: if not copy, then set&rel are already matched
+        lddmc_refs_spawn(SPAWN(lddmc_relprod, set, mddnode_getright(n_rel), meta)); // spawn next read in list
+
+        // for this read, either it is copy ('for all') or it is normal match
+        if (mddnode_getcopy(n_rel)) {
+            // spawn for every value to copy (set)
+            int count = 0;
+            for (;;) {
+                // stay same level of set (for write)
+                lddmc_refs_spawn(SPAWN(lddmc_relprod, set, mddnode_getdown(n_rel), mddnode_getdown(n_meta)));
+                count++;
+                set = mddnode_getright(n_set);
+                if (set == lddmc_false) break;
+                n_set = GETNODE(set);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        } else {
+            // stay same level of set (for write)
+            result = CALL(lddmc_relprod, set, mddnode_getdown(n_rel), mddnode_getdown(n_meta));
+        }
+
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod)); // sync next read in list
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    } else if (m_val == 3) { // only-read
+        if (mddnode_getcopy(n_rel)) {
+            // copy on read ('for any value')
+            // result = union(result_with_copy, result_without_copy)
+            lddmc_refs_spawn(SPAWN(lddmc_relprod, set, mddnode_getright(n_rel), meta)); // spawn without_copy
+
+            // spawn for every value to copy (set)
+            int count = 0;
+            for (;;) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_help, mddnode_getvalue(n_set), mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta)));
+                count++;
+                set = mddnode_getright(n_set);
+                if (set == lddmc_false) break;
+                n_set = GETNODE(set);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+
+            // add result from without_copy
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        } else {
+            // only-read, without copy
+            lddmc_refs_spawn(SPAWN(lddmc_relprod, mddnode_getright(n_set), mddnode_getright(n_rel), meta));
+            MDD down = CALL(lddmc_relprod, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta));
+            lddmc_refs_push(down);
+            MDD right = lddmc_refs_sync(SYNC(lddmc_relprod));
+            lddmc_refs_pop(1);
+            result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+        }
+    } else if (m_val == 2 || m_val == 4) { // write, only-write
+        if (m_val == 4) {
+            // only-write, so we need to include 'for all variables'
+            lddmc_refs_spawn(SPAWN(lddmc_relprod, mddnode_getright(n_set), rel, meta)); // next in set
+        }
+
+        // spawn for every value to write (rel)
+        int count = 0;
+        for (;;) {
+            uint32_t value;
+            if (mddnode_getcopy(n_rel)) value = mddnode_getvalue(n_set);
+            else value = mddnode_getvalue(n_rel);
+            lddmc_refs_spawn(SPAWN(lddmc_relprod_help, value, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta)));
+            count++;
+            rel = mddnode_getright(n_rel);
+            if (rel == lddmc_false) break;
+            n_rel = GETNODE(rel);
+        }
+
+        // sync+union (one by one)
+        result = lddmc_false;
+        while (count--) {
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_help));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        }
+
+        if (m_val == 4) {
+            // sync+union with other variables
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        }
+    }
+
+    /* Write to cache */
+    if (cache_put3(CACHE_MDD_RELPROD, set, rel, meta, result)) sylvan_stats_count(LDD_RELPROD_CACHEDPUT);
+
+    return result;
+}
+
+TASK_5(MDD, lddmc_relprod_union_help, uint32_t, val, MDD, set, MDD, rel, MDD, proj, MDD, un)
+{
+    return lddmc_makenode(val, CALL(lddmc_relprod_union, set, rel, proj, un), lddmc_false);
+}
+
+// meta: -1 (end; rest not in rel), 0 (not in rel), 1 (read), 2 (write), 3 (only-read), 4 (only-write)
+TASK_IMPL_4(MDD, lddmc_relprod_union, MDD, set, MDD, rel, MDD, meta, MDD, un)
+{
+    if (set == lddmc_false) return un;
+    if (rel == lddmc_false) return un;
+    if (un == lddmc_false) return CALL(lddmc_relprod, set, rel, meta);
+
+    mddnode_t n_meta = GETNODE(meta);
+    uint32_t m_val = mddnode_getvalue(n_meta);
+    if (m_val == (uint32_t)-1) return CALL(lddmc_union, set, un);
+
+    // check depths (this triggers on logic error)
+    if (m_val != 0) assert(set != lddmc_true && rel != lddmc_true && un != lddmc_true);
+
+    /* Skip nodes if possible */
+    if (!mddnode_getcopy(GETNODE(rel))) {
+        if (m_val == 1 || m_val == 3) {
+            if (!match_ldds(&set, &rel)) return un;
+        }
+    }
+
+    mddnode_t n_set = GETNODE(set);
+    mddnode_t n_rel = GETNODE(rel);
+    mddnode_t n_un = GETNODE(un);
+
+    // in some cases, we know un.value < result.value
+    if (m_val == 0 || m_val == 3) {
+        // if m_val == 0, no read/write, then un.value < set.value?
+        // if m_val == 3, only read (write same), then un.value < set.value?
+        uint32_t set_value = mddnode_getvalue(n_set);
+        uint32_t un_value = mddnode_getvalue(n_un);
+        if (un_value < set_value) {
+            MDD right = CALL(lddmc_relprod_union, set, rel, meta, mddnode_getright(n_un));
+            if (right == mddnode_getright(n_un)) return un;
+            else return lddmc_makenode(mddnode_getvalue(n_un), mddnode_getdown(n_un), right);
+        }
+    } else if (m_val == 2 || m_val == 4) {
+        // if we write, then we only know for certain that un.value < result.value if
+        // the root of rel is not a copy node
+        if (!mddnode_getcopy(n_rel)) {
+            uint32_t rel_value = mddnode_getvalue(n_rel);
+            uint32_t un_value = mddnode_getvalue(n_un);
+            if (un_value < rel_value) {
+                MDD right = CALL(lddmc_relprod_union, set, rel, meta, mddnode_getright(n_un));
+                if (right == mddnode_getright(n_un)) return un;
+                else return lddmc_makenode(mddnode_getvalue(n_un), mddnode_getdown(n_un), right);
+            }
+        }
+    }
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_RELPROD_UNION);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get4(CACHE_MDD_RELPROD, set, rel, meta, un, &result)) {
+        sylvan_stats_count(LDD_RELPROD_UNION_CACHED);
+        return result;
+    }
+
+    /* Recursive operations */
+    if (m_val == 0) { // not in rel
+        uint32_t set_value = mddnode_getvalue(n_set);
+        uint32_t un_value = mddnode_getvalue(n_un);
+        // set_value > un_value already checked above
+        if (set_value < un_value) {
+            lddmc_refs_spawn(SPAWN(lddmc_relprod_union, mddnode_getright(n_set), rel, meta, un));
+            // going down, we don't need _union, since un does not contain this subtree
+            MDD down = CALL(lddmc_relprod, mddnode_getdown(n_set), rel, mddnode_getdown(n_meta));
+            lddmc_refs_push(down);
+            MDD right = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+            lddmc_refs_pop(1);
+            if (down == lddmc_false) result = right;
+            else result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+        } else /* set_value == un_value */ {
+            lddmc_refs_spawn(SPAWN(lddmc_relprod_union, mddnode_getright(n_set), rel, meta, mddnode_getright(n_un)));
+            MDD down = CALL(lddmc_relprod_union, mddnode_getdown(n_set), rel, mddnode_getdown(n_meta), mddnode_getdown(n_un));
+            lddmc_refs_push(down);
+            MDD right = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+            lddmc_refs_pop(1);
+            if (right == mddnode_getright(n_un) && down == mddnode_getdown(n_un)) result = un;
+            else result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+        }
+    } else if (m_val == 1) { // read
+        // read layer: if not copy, then set&rel are already matched
+        lddmc_refs_spawn(SPAWN(lddmc_relprod_union, set, mddnode_getright(n_rel), meta, un)); // spawn next read in list
+
+        // for this read, either it is copy ('for all') or it is normal match
+        if (mddnode_getcopy(n_rel)) {
+            // spawn for every value in set (copy = for all)
+            int count = 0;
+            for (;;) {
+                // stay same level of set and un (for write)
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union, set, mddnode_getdown(n_rel), mddnode_getdown(n_meta), un));
+                count++;
+                set = mddnode_getright(n_set);
+                if (set == lddmc_false) break;
+                n_set = GETNODE(set);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        } else {
+            // stay same level of set and un (for write)
+            result = CALL(lddmc_relprod_union, set, mddnode_getdown(n_rel), mddnode_getdown(n_meta), un);
+        }
+
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union)); // sync next read in list
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    } else if (m_val == 3) { // only-read
+        // un < set already checked above
+        if (mddnode_getcopy(n_rel)) {
+            // copy on read ('for any value')
+            // result = union(result_with_copy, result_without_copy)
+            lddmc_refs_spawn(SPAWN(lddmc_relprod_union, set, mddnode_getright(n_rel), meta, un)); // spawn without_copy
+
+            // spawn for every value to copy (set)
+            int count = 0;
+            result = lddmc_false;
+            for (;;) {
+                uint32_t set_value = mddnode_getvalue(n_set);
+                uint32_t un_value = mddnode_getvalue(n_un);
+                if (un_value < set_value) {
+                    // this is a bit tricky
+                    // the result of this will simply be "un_value, mddnode_getdown(n_un), false" which is intended
+                    lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, un_value, lddmc_false, lddmc_false, mddnode_getdown(n_meta), mddnode_getdown(n_un)));
+                    count++;
+                    un = mddnode_getright(n_un);
+                    if (un == lddmc_false) {
+                        result = CALL(lddmc_relprod, set, rel, meta);
+                        break;
+                    }
+                    n_un = GETNODE(un);
+                } else if (un_value > set_value) {
+                    // tricky again. the result of this is a normal relprod
+                    lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, set_value, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), lddmc_false));
+                    count++;
+                    set = mddnode_getright(n_set);
+                    if (set == lddmc_false) {
+                        result = un;
+                        break;
+                    }
+                    n_set = GETNODE(set);
+                } else /* un_value == set_value */ {
+                    lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, set_value, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_un)));
+                    count++;
+                    set = mddnode_getright(n_set);
+                    un = mddnode_getright(n_un);
+                    if (set == lddmc_false) {
+                        result = un;
+                        break;
+                    } else if (un == lddmc_false) {
+                        result = CALL(lddmc_relprod, set, rel, meta);
+                        break;
+                    }
+                    n_set = GETNODE(set);
+                    n_un = GETNODE(un);
+                }
+            }
+
+            // sync+union (one by one)
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+
+            // add result from without_copy
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        } else {
+            // only-read, not a copy node
+            uint32_t set_value = mddnode_getvalue(n_set);
+            uint32_t un_value = mddnode_getvalue(n_un);
+
+            // already did un_value < set_value
+            if (un_value > set_value) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union, mddnode_getright(n_set), mddnode_getright(n_rel), meta, un));
+                MDD down = CALL(lddmc_relprod, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta));
+                lddmc_refs_push(down);
+                MDD right = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+                lddmc_refs_pop(1);
+                result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+            } else /* un_value == set_value */ {
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union, mddnode_getright(n_set), mddnode_getright(n_rel), meta, mddnode_getright(n_un)));
+                MDD down = CALL(lddmc_relprod_union, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_un));
+                lddmc_refs_push(down);
+                MDD right = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+                lddmc_refs_pop(1);
+                result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+            }
+        }
+    } else if (m_val == 2 || m_val == 4) { // write, only-write
+        if (m_val == 4) {
+            // only-write, so we need to include 'for all variables'
+            lddmc_refs_spawn(SPAWN(lddmc_relprod_union, mddnode_getright(n_set), rel, meta, un)); // next in set
+        }
+
+        // spawn for every value to write (rel)
+        int count = 0;
+        for (;;) {
+            uint32_t value;
+            if (mddnode_getcopy(n_rel)) value = mddnode_getvalue(n_set);
+            else value = mddnode_getvalue(n_rel);
+            uint32_t un_value = mddnode_getvalue(n_un);
+            if (un_value < value) {
+                // the result of this will simply be "un_value, mddnode_getdown(n_un), false" which is intended
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, un_value, lddmc_false, lddmc_false, mddnode_getdown(n_meta), mddnode_getdown(n_un)));
+                count++;
+                un = mddnode_getright(n_un);
+                if (un == lddmc_false) {
+                    result = CALL(lddmc_relprod, set, rel, meta);
+                    break;
+                }
+                n_un = GETNODE(un);
+            } else if (un_value > value) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, value, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), lddmc_false));
+                count++;
+                rel = mddnode_getright(n_rel);
+                if (rel == lddmc_false) {
+                    result = un;
+                    break;
+                }
+                n_rel = GETNODE(rel);
+            } else /* un_value == value */ {
+                lddmc_refs_spawn(SPAWN(lddmc_relprod_union_help, value, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_un)));
+                count++;
+                rel = mddnode_getright(n_rel);
+                un = mddnode_getright(n_un);
+                if (rel == lddmc_false) {
+                    result = un;
+                    break;
+                } else if (un == lddmc_false) {
+                    result = CALL(lddmc_relprod, set, rel, meta);
+                    break;
+                }
+                n_rel = GETNODE(rel);
+                n_un = GETNODE(un);
+            }
+        }
+
+        // sync+union (one by one)
+        while (count--) {
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union_help));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        }
+
+        if (m_val == 4) {
+            // sync+union with other variables
+            lddmc_refs_push(result);
+            MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprod_union));
+            lddmc_refs_push(result2);
+            result = CALL(lddmc_union, result, result2);
+            lddmc_refs_pop(2);
+        }
+    }
+
+    /* Write to cache */
+    if (cache_put4(CACHE_MDD_RELPROD, set, rel, meta, un, result)) sylvan_stats_count(LDD_RELPROD_UNION_CACHEDPUT);
+
+    return result;
+}
+
+TASK_5(MDD, lddmc_relprev_help, uint32_t, val, MDD, set, MDD, rel, MDD, proj, MDD, uni)
+{
+    return lddmc_makenode(val, CALL(lddmc_relprev, set, rel, proj, uni), lddmc_false);
+}
+
+/**
+ * Calculate all predecessors to a in uni according to rel[meta]
+ * <meta> follows the same semantics as relprod
+ * i.e. 0 (not in rel), 1 (read), 2 (write), 3 (only-read), 4 (only-write), -1 (end; rest=0)
+ */
+TASK_IMPL_4(MDD, lddmc_relprev, MDD, set, MDD, rel, MDD, meta, MDD, uni)
+{
+    if (set == lddmc_false) return lddmc_false;
+    if (rel == lddmc_false) return lddmc_false;
+    if (uni == lddmc_false) return lddmc_false;
+
+    mddnode_t n_meta = GETNODE(meta);
+    uint32_t m_val = mddnode_getvalue(n_meta);
+    if (m_val == (uint32_t)-1) {
+        if (set == uni) return set;
+        else return lddmc_intersect(set, uni);
+    }
+
+    if (m_val != 0) assert(set != lddmc_true && rel != lddmc_true && uni != lddmc_true);
+
+    /* Skip nodes if possible */
+    if (m_val == 0) {
+        // not in rel: match set and uni ('intersect')
+        if (!match_ldds(&set, &uni)) return lddmc_false;
+    } else if (mddnode_getcopy(GETNODE(rel))) {
+        // read+copy: no matching (pre is everything in uni)
+        // write+copy: no matching (match after split: set and uni)
+        // only-read+copy: match set and uni
+        // only-write+copy: no matching (match after split: set and uni)
+        if (m_val == 3) {
+            if (!match_ldds(&set, &uni)) return lddmc_false;
+        }
+    } else if (m_val == 1) {
+        // read: match uni and rel
+        if (!match_ldds(&uni, &rel)) return lddmc_false;
+    } else if (m_val == 2) {
+        // write: match set and rel
+        if (!match_ldds(&set, &rel)) return lddmc_false;
+    } else if (m_val == 3) {
+        // only-read: match uni and set and rel
+        mddnode_t n_set = GETNODE(set);
+        mddnode_t n_rel = GETNODE(rel);
+        mddnode_t n_uni = GETNODE(uni);
+        uint32_t n_set_value = mddnode_getvalue(n_set);
+        uint32_t n_rel_value = mddnode_getvalue(n_rel);
+        uint32_t n_uni_value = mddnode_getvalue(n_uni);
+        while (n_uni_value != n_rel_value || n_rel_value != n_set_value) {
+            if (n_uni_value < n_rel_value || n_uni_value < n_set_value) {
+                uni = mddnode_getright(n_uni);
+                if (uni == lddmc_false) return lddmc_false;
+                n_uni = GETNODE(uni);
+                n_uni_value = mddnode_getvalue(n_uni);
+            }
+            if (n_set_value < n_rel_value || n_set_value < n_uni_value) {
+                set = mddnode_getright(n_set);
+                if (set == lddmc_false) return lddmc_false;
+                n_set = GETNODE(set);
+                n_set_value = mddnode_getvalue(n_set);
+            }
+            if (n_rel_value < n_set_value || n_rel_value < n_uni_value) {
+                rel = mddnode_getright(n_rel);
+                if (rel == lddmc_false) return lddmc_false;
+                n_rel = GETNODE(rel);
+                n_rel_value = mddnode_getvalue(n_rel);
+            }
+        }
+    } else if (m_val == 4) {
+        // only-write: match set and rel (then use whole universe)
+        if (!match_ldds(&set, &rel)) return lddmc_false;
+    }
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_RELPREV);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get4(CACHE_MDD_RELPREV, set, rel, meta, uni, &result)) {
+        sylvan_stats_count(LDD_RELPREV_CACHED);
+        return result;
+    }
+
+    mddnode_t n_set = GETNODE(set);
+    mddnode_t n_rel = GETNODE(rel);
+    mddnode_t n_uni = GETNODE(uni);
+
+    /* Recursive operations */
+    if (m_val == 0) { // not in rel
+        // m_val == 0 : not in rel (intersection set and universe)
+        lddmc_refs_spawn(SPAWN(lddmc_relprev, mddnode_getright(n_set), rel, meta, mddnode_getright(n_uni)));
+        MDD down = CALL(lddmc_relprev, mddnode_getdown(n_set), rel, mddnode_getdown(n_meta), mddnode_getdown(n_uni));
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_relprev));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(mddnode_getvalue(n_set), down, right);
+    } else if (m_val == 1) { // read level
+        // result value is in case of copy: everything in uni!
+        // result value is in case of not-copy: match uni and rel!
+        lddmc_refs_spawn(SPAWN(lddmc_relprev, set, mddnode_getright(n_rel), meta, uni)); // next in rel
+        if (mddnode_getcopy(n_rel)) {
+            // result is everything in uni
+            // spawn for every value to have been read (uni)
+            int count = 0;
+            for (;;) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprev_help, mddnode_getvalue(n_uni), set, mddnode_getdown(n_rel), mddnode_getdown(n_meta), uni));
+                count++;
+                uni = mddnode_getright(n_uni);
+                if (uni == lddmc_false) break;
+                n_uni = GETNODE(uni);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        } else {
+            // already matched
+            MDD down = CALL(lddmc_relprev, set, mddnode_getdown(n_rel), mddnode_getdown(n_meta), uni);
+            result = lddmc_makenode(mddnode_getvalue(n_uni), down, lddmc_false);
+        }
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev));
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    } else if (m_val == 3) { // only-read level
+        // result value is in case of copy: match set and uni! (already done first match)
+        // result value is in case of not-copy: match set and uni and rel!
+        lddmc_refs_spawn(SPAWN(lddmc_relprev, set, mddnode_getright(n_rel), meta, uni)); // next in rel
+        if (mddnode_getcopy(n_rel)) {
+            // spawn for every matching set+uni
+            int count = 0;
+            for (;;) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprev_help, mddnode_getvalue(n_uni), mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni)));
+                count++;
+                uni = mddnode_getright(n_uni);
+                if (!match_ldds(&set, &uni)) break;
+                n_set = GETNODE(set);
+                n_uni = GETNODE(uni);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        } else {
+            // already matched
+            MDD down = CALL(lddmc_relprev, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni));
+            result = lddmc_makenode(mddnode_getvalue(n_uni), down, lddmc_false);
+        }
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev));
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    } else if (m_val == 2) { // write level
+        // note: the read level has already matched the uni that was read.
+        // write+copy: only for the one set equal to uni...
+        // write: match set and rel (already done)
+        lddmc_refs_spawn(SPAWN(lddmc_relprev, set, mddnode_getright(n_rel), meta, uni));
+        if (mddnode_getcopy(n_rel)) {
+            MDD down = lddmc_follow(set, mddnode_getvalue(n_uni));
+            if (down != lddmc_false) {
+                result = CALL(lddmc_relprev, down, mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni));
+            } else {
+                result = lddmc_false;
+            }
+        } else {
+            result = CALL(lddmc_relprev, mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni));
+        }
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev));
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    } else if (m_val == 4) { // only-write level
+        // only-write+copy: match set and uni after spawn
+        // only-write: match set and rel (already done)
+        lddmc_refs_spawn(SPAWN(lddmc_relprev, set, mddnode_getright(n_rel), meta, uni));
+        if (mddnode_getcopy(n_rel)) {
+            // spawn for every matching set+uni
+            int count = 0;
+            for (;;) {
+                if (!match_ldds(&set, &uni)) break;
+                n_set = GETNODE(set);
+                n_uni = GETNODE(uni);
+                lddmc_refs_spawn(SPAWN(lddmc_relprev_help, mddnode_getvalue(n_uni), mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni)));
+                count++;
+                uni = mddnode_getright(n_uni);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        } else {
+            // spawn for every value in universe!!
+            int count = 0;
+            for (;;) {
+                lddmc_refs_spawn(SPAWN(lddmc_relprev_help, mddnode_getvalue(n_uni), mddnode_getdown(n_set), mddnode_getdown(n_rel), mddnode_getdown(n_meta), mddnode_getdown(n_uni)));
+                count++;
+                uni = mddnode_getright(n_uni);
+                if (uni == lddmc_false) break;
+                n_uni = GETNODE(uni);
+            }
+
+            // sync+union (one by one)
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev_help));
+                lddmc_refs_push(result2);
+                result = CALL(lddmc_union, result, result2);
+                lddmc_refs_pop(2);
+            }
+        }
+        lddmc_refs_push(result);
+        MDD result2 = lddmc_refs_sync(SYNC(lddmc_relprev));
+        lddmc_refs_push(result2);
+        result = CALL(lddmc_union, result, result2);
+        lddmc_refs_pop(2);
+    }
+
+    /* Write to cache */
+    if (cache_put4(CACHE_MDD_RELPREV, set, rel, meta, uni, result)) sylvan_stats_count(LDD_RELPREV_CACHEDPUT);
+
+    return result;
+}
+
+// Same 'proj' as project. So: proj: -2 (end; quantify rest), -1 (end; keep rest), 0 (quantify), 1 (keep)
+TASK_IMPL_4(MDD, lddmc_join, MDD, a, MDD, b, MDD, a_proj, MDD, b_proj)
+{
+    if (a == lddmc_false || b == lddmc_false) return lddmc_false;
+
+    /* Test gc */
+    sylvan_gc_test();
+
+    mddnode_t n_a_proj = GETNODE(a_proj);
+    mddnode_t n_b_proj = GETNODE(b_proj);
+    uint32_t a_proj_val = mddnode_getvalue(n_a_proj);
+    uint32_t b_proj_val = mddnode_getvalue(n_b_proj);
+
+    while (a_proj_val == 0 && b_proj_val == 0) {
+        a_proj = mddnode_getdown(n_a_proj);
+        b_proj = mddnode_getdown(n_b_proj);
+        n_a_proj = GETNODE(a_proj);
+        n_b_proj = GETNODE(b_proj);
+        a_proj_val = mddnode_getvalue(n_a_proj);
+        b_proj_val = mddnode_getvalue(n_b_proj);
+    }
+
+    if (a_proj_val == (uint32_t)-2) return b; // no a left
+    if (b_proj_val == (uint32_t)-2) return a; // no b left
+    if (a_proj_val == (uint32_t)-1 && b_proj_val == (uint32_t)-1) return CALL(lddmc_intersect, a, b);
+
+    // At this point, only proj_val {-1, 0, 1}; max one with -1; max one with 0.
+    const int keep_a = a_proj_val != 0;
+    const int keep_b = b_proj_val != 0;
+
+    if (keep_a && keep_b) {
+        // If both 'keep', then match values
+        if (!match_ldds(&a, &b)) return lddmc_false;
+    }
+
+    sylvan_stats_count(LDD_JOIN);
+
+    /* Access cache */
+    MDD result;
+    if (cache_get4(CACHE_MDD_JOIN, a, b, a_proj, b_proj, &result)) {
+        sylvan_stats_count(LDD_JOIN_CACHED);
+        return result;
+    }
+
+    /* Perform recursive calculation */
+    const mddnode_t na = GETNODE(a);
+    const mddnode_t nb = GETNODE(b);
+    uint32_t val;
+    MDD down;
+
+    // Make copies (for cache)
+    MDD _a_proj = a_proj, _b_proj = b_proj;
+    if (keep_a) {
+        if (keep_b) {
+            val = mddnode_getvalue(nb);
+            lddmc_refs_spawn(SPAWN(lddmc_join, mddnode_getright(na), mddnode_getright(nb), a_proj, b_proj));
+            if (a_proj_val != (uint32_t)-1) a_proj = mddnode_getdown(n_a_proj);
+            if (b_proj_val != (uint32_t)-1) b_proj = mddnode_getdown(n_b_proj);
+            down = CALL(lddmc_join, mddnode_getdown(na), mddnode_getdown(nb), a_proj, b_proj);
+        } else {
+            val = mddnode_getvalue(na);
+            lddmc_refs_spawn(SPAWN(lddmc_join, mddnode_getright(na), b, a_proj, b_proj));
+            if (a_proj_val != (uint32_t)-1) a_proj = mddnode_getdown(n_a_proj);
+            if (b_proj_val != (uint32_t)-1) b_proj = mddnode_getdown(n_b_proj);
+            down = CALL(lddmc_join, mddnode_getdown(na), b, a_proj, b_proj);
+        }
+    } else {
+        val = mddnode_getvalue(nb);
+        lddmc_refs_spawn(SPAWN(lddmc_join, a, mddnode_getright(nb), a_proj, b_proj));
+        if (a_proj_val != (uint32_t)-1) a_proj = mddnode_getdown(n_a_proj);
+        if (b_proj_val != (uint32_t)-1) b_proj = mddnode_getdown(n_b_proj);
+        down = CALL(lddmc_join, a, mddnode_getdown(nb), a_proj, b_proj);
+    }
+
+    lddmc_refs_push(down);
+    MDD right = lddmc_refs_sync(SYNC(lddmc_join));
+    lddmc_refs_pop(1);
+    result = lddmc_makenode(val, down, right);
+
+    /* Write to cache */
+    if (cache_put4(CACHE_MDD_JOIN, a, b, _a_proj, _b_proj, result)) sylvan_stats_count(LDD_JOIN_CACHEDPUT);
+
+    return result;
+}
+
+// so: proj: -2 (end; quantify rest), -1 (end; keep rest), 0 (quantify), 1 (keep)
+TASK_IMPL_2(MDD, lddmc_project, const MDD, mdd, const MDD, proj)
+{
+    if (mdd == lddmc_false) return lddmc_false; // projection of empty is empty
+    if (mdd == lddmc_true) return lddmc_true; // projection of universe is universe...
+
+    mddnode_t p_node = GETNODE(proj);
+    uint32_t p_val = mddnode_getvalue(p_node);
+    if (p_val == (uint32_t)-1) return mdd;
+    if (p_val == (uint32_t)-2) return lddmc_true; // because we always end with true.
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_PROJECT);
+
+    MDD result;
+    if (cache_get3(CACHE_MDD_PROJECT, mdd, proj, 0, &result)) {
+        sylvan_stats_count(LDD_PROJECT_CACHED);
+        return result;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    if (p_val == 1) { // keep
+        lddmc_refs_spawn(SPAWN(lddmc_project, mddnode_getright(n), proj));
+        MDD down = CALL(lddmc_project, mddnode_getdown(n), mddnode_getdown(p_node));
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_project));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(mddnode_getvalue(n), down, right);
+    } else { // quantify
+        if (mddnode_getdown(n) == lddmc_true) { // assume lowest level
+            result = lddmc_true;
+        } else {
+            int count = 0;
+            MDD p_down = mddnode_getdown(p_node), _mdd=mdd;
+            while (1) {
+                lddmc_refs_spawn(SPAWN(lddmc_project, mddnode_getdown(n), p_down));
+                count++;
+                _mdd = mddnode_getright(n);
+                assert(_mdd != lddmc_true);
+                if (_mdd == lddmc_false) break;
+                n = GETNODE(_mdd);
+            }
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD down = lddmc_refs_sync(SYNC(lddmc_project));
+                lddmc_refs_push(down);
+                result = CALL(lddmc_union, result, down);
+                lddmc_refs_pop(2);
+            }
+        }
+    }
+
+    if (cache_put3(CACHE_MDD_PROJECT, mdd, proj, 0, result)) sylvan_stats_count(LDD_PROJECT_CACHEDPUT);
+
+    return result;
+}
+
+// so: proj: -2 (end; quantify rest), -1 (end; keep rest), 0 (quantify), 1 (keep)
+TASK_IMPL_3(MDD, lddmc_project_minus, const MDD, mdd, const MDD, proj, MDD, avoid)
+{
+    // This implementation assumed "avoid" has correct depth
+    if (avoid == lddmc_true) return lddmc_false;
+    if (mdd == avoid) return lddmc_false;
+    if (mdd == lddmc_false) return lddmc_false; // projection of empty is empty
+    if (mdd == lddmc_true) return lddmc_true; // avoid != lddmc_true
+
+    mddnode_t p_node = GETNODE(proj);
+    uint32_t p_val = mddnode_getvalue(p_node);
+    if (p_val == (uint32_t)-1) return lddmc_minus(mdd, avoid);
+    if (p_val == (uint32_t)-2) return lddmc_true;
+
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_PROJECT_MINUS);
+
+    MDD result;
+    if (cache_get3(CACHE_MDD_PROJECT, mdd, proj, avoid, &result)) {
+        sylvan_stats_count(LDD_PROJECT_MINUS_CACHED);
+        return result;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    if (p_val == 1) { // keep
+        // move 'avoid' until it matches
+        uint32_t val = mddnode_getvalue(n);
+        MDD a_down = lddmc_false;
+        while (avoid != lddmc_false) {
+            mddnode_t a_node = GETNODE(avoid);
+            uint32_t a_val = mddnode_getvalue(a_node);
+            if (a_val > val) {
+                break;
+            } else if (a_val == val) {
+                a_down = mddnode_getdown(a_node);
+                break;
+            }
+            avoid = mddnode_getright(a_node);
+        }
+        lddmc_refs_spawn(SPAWN(lddmc_project_minus, mddnode_getright(n), proj, avoid));
+        MDD down = CALL(lddmc_project_minus, mddnode_getdown(n), mddnode_getdown(p_node), a_down);
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_project_minus));
+        lddmc_refs_pop(1);
+        result = lddmc_makenode(val, down, right);
+    } else { // quantify
+        if (mddnode_getdown(n) == lddmc_true) { // assume lowest level
+            result = lddmc_true;
+        } else {
+            int count = 0;
+            MDD p_down = mddnode_getdown(p_node), _mdd=mdd;
+            while (1) {
+                lddmc_refs_spawn(SPAWN(lddmc_project_minus, mddnode_getdown(n), p_down, avoid));
+                count++;
+                _mdd = mddnode_getright(n);
+                assert(_mdd != lddmc_true);
+                if (_mdd == lddmc_false) break;
+                n = GETNODE(_mdd);
+            }
+            result = lddmc_false;
+            while (count--) {
+                lddmc_refs_push(result);
+                MDD down = lddmc_refs_sync(SYNC(lddmc_project_minus));
+                lddmc_refs_push(down);
+                result = CALL(lddmc_union, result, down);
+                lddmc_refs_pop(2);
+            }
+        }
+    }
+
+    if (cache_put3(CACHE_MDD_PROJECT, mdd, proj, avoid, result)) sylvan_stats_count(LDD_PROJECT_MINUS_CACHEDPUT);
+
+    return result;
+}
+
+MDD
+lddmc_union_cube(MDD a, uint32_t* values, size_t count)
+{
+    if (a == lddmc_false) return lddmc_cube(values, count);
+    if (a == lddmc_true) {
+        assert(count == 0);
+        return lddmc_true;
+    }
+    assert(count != 0);
+
+    mddnode_t na = GETNODE(a);
+    uint32_t na_value = mddnode_getvalue(na);
+
+    /* Only create a new node if something actually changed */
+
+    if (na_value < *values) {
+        MDD right = lddmc_union_cube(mddnode_getright(na), values, count);
+        if (right == mddnode_getright(na)) return a; // no actual change
+        return lddmc_makenode(na_value, mddnode_getdown(na), right);
+    } else if (na_value == *values) {
+        MDD down = lddmc_union_cube(mddnode_getdown(na), values+1, count-1);
+        if (down == mddnode_getdown(na)) return a; // no actual change
+        return lddmc_makenode(na_value, down, mddnode_getright(na));
+    } else /* na_value > *values */ {
+        return lddmc_makenode(*values, lddmc_cube(values+1, count-1), a);
+    }
+}
+
+MDD
+lddmc_union_cube_copy(MDD a, uint32_t* values, int* copy, size_t count)
+{
+    if (a == lddmc_false) return lddmc_cube_copy(values, copy, count);
+    if (a == lddmc_true) {
+        assert(count == 0);
+        return lddmc_true;
+    }
+    assert(count != 0);
+
+    mddnode_t na = GETNODE(a);
+
+    /* Only create a new node if something actually changed */
+
+    int na_copy = mddnode_getcopy(na);
+    if (na_copy && *copy) {
+        MDD down = lddmc_union_cube_copy(mddnode_getdown(na), values+1, copy+1, count-1);
+        if (down == mddnode_getdown(na)) return a; // no actual change
+        return lddmc_make_copynode(down, mddnode_getright(na));
+    } else if (na_copy) {
+        MDD right = lddmc_union_cube_copy(mddnode_getright(na), values, copy, count);
+        if (right == mddnode_getright(na)) return a; // no actual change
+        return lddmc_make_copynode(mddnode_getdown(na), right);
+    } else if (*copy) {
+        return lddmc_make_copynode(lddmc_cube_copy(values+1, copy+1, count-1), a);
+    }
+
+    uint32_t na_value = mddnode_getvalue(na);
+    if (na_value < *values) {
+        MDD right = lddmc_union_cube_copy(mddnode_getright(na), values, copy, count);
+        if (right == mddnode_getright(na)) return a; // no actual change
+        return lddmc_makenode(na_value, mddnode_getdown(na), right);
+    } else if (na_value == *values) {
+        MDD down = lddmc_union_cube_copy(mddnode_getdown(na), values+1, copy+1, count-1);
+        if (down == mddnode_getdown(na)) return a; // no actual change
+        return lddmc_makenode(na_value, down, mddnode_getright(na));
+    } else /* na_value > *values */ {
+        return lddmc_makenode(*values, lddmc_cube_copy(values+1, copy+1, count-1), a);
+    }
+}
+
+int
+lddmc_member_cube(MDD a, uint32_t* values, size_t count)
+{
+    while (1) {
+        if (a == lddmc_false) return 0;
+        if (a == lddmc_true) return 1;
+        assert(count > 0); // size mismatch
+
+        a = lddmc_follow(a, *values);
+        values++;
+        count--;
+    }
+}
+
+int
+lddmc_member_cube_copy(MDD a, uint32_t* values, int* copy, size_t count)
+{
+    while (1) {
+        if (a == lddmc_false) return 0;
+        if (a == lddmc_true) return 1;
+        assert(count > 0); // size mismatch
+
+        if (*copy) a = lddmc_followcopy(a);
+        else a = lddmc_follow(a, *values);
+        values++;
+        count--;
+    }
+}
+
+MDD
+lddmc_cube(uint32_t* values, size_t count)
+{
+    if (count == 0) return lddmc_true;
+    return lddmc_makenode(*values, lddmc_cube(values+1, count-1), lddmc_false);
+}
+
+MDD
+lddmc_cube_copy(uint32_t* values, int* copy, size_t count)
+{
+    if (count == 0) return lddmc_true;
+    if (*copy) return lddmc_make_copynode(lddmc_cube_copy(values+1, copy+1, count-1), lddmc_false);
+    else return lddmc_makenode(*values, lddmc_cube_copy(values+1, copy+1, count-1), lddmc_false);
+}
+
+/**
+ * Count number of nodes for each level
+ */
+
+static void
+lddmc_nodecount_levels_mark(MDD mdd, size_t *variables)
+{
+    if (mdd <= lddmc_true) return;
+    mddnode_t n = GETNODE(mdd);
+    if (!mddnode_getmark(n)) {
+        mddnode_setmark(n, 1);
+        (*variables) += 1;
+        lddmc_nodecount_levels_mark(mddnode_getright(n), variables);
+        lddmc_nodecount_levels_mark(mddnode_getdown(n), variables+1);
+    }
+}
+
+static void
+lddmc_nodecount_levels_unmark(MDD mdd)
+{
+    if (mdd <= lddmc_true) return;
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getmark(n)) {
+        mddnode_setmark(n, 0);
+        lddmc_nodecount_levels_unmark(mddnode_getright(n));
+        lddmc_nodecount_levels_unmark(mddnode_getdown(n));
+    }
+}
+
+void
+lddmc_nodecount_levels(MDD mdd, size_t *variables)
+{
+    lddmc_nodecount_levels_mark(mdd, variables);
+    lddmc_nodecount_levels_unmark(mdd);
+}
+
+/**
+ * Count number of nodes in MDD
+ */
+
+static size_t
+lddmc_nodecount_mark(MDD mdd)
+{
+    if (mdd <= lddmc_true) return 0;
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getmark(n)) return 0;
+    mddnode_setmark(n, 1);
+    return 1 + lddmc_nodecount_mark(mddnode_getdown(n)) + lddmc_nodecount_mark(mddnode_getright(n));
+}
+
+static void
+lddmc_nodecount_unmark(MDD mdd)
+{
+    if (mdd <= lddmc_true) return;
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getmark(n)) {
+        mddnode_setmark(n, 0);
+        lddmc_nodecount_unmark(mddnode_getright(n));
+        lddmc_nodecount_unmark(mddnode_getdown(n));
+    }
+}
+
+size_t
+lddmc_nodecount(MDD mdd)
+{
+    size_t result = lddmc_nodecount_mark(mdd);
+    lddmc_nodecount_unmark(mdd);
+    return result;
+}
+
+/**
+ * CALCULATE NUMBER OF VAR ASSIGNMENTS THAT YIELD TRUE
+ */
+
+TASK_IMPL_1(lddmc_satcount_double_t, lddmc_satcount_cached, MDD, mdd)
+{
+    if (mdd == lddmc_false) return 0.0;
+    if (mdd == lddmc_true) return 1.0;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    union {
+        lddmc_satcount_double_t d;
+        uint64_t s;
+    } hack;
+
+    sylvan_stats_count(LDD_SATCOUNT);
+
+    if (cache_get3(CACHE_MDD_SATCOUNT, mdd, 0, 0, &hack.s)) {
+        sylvan_stats_count(LDD_SATCOUNT_CACHED);
+        return hack.d;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    SPAWN(lddmc_satcount_cached, mddnode_getdown(n));
+    lddmc_satcount_double_t right = CALL(lddmc_satcount_cached, mddnode_getright(n));
+    hack.d = right + SYNC(lddmc_satcount_cached);
+
+    if (cache_put3(CACHE_MDD_SATCOUNT, mdd, 0, 0, hack.s)) sylvan_stats_count(LDD_SATCOUNT_CACHEDPUT);
+
+    return hack.d;
+}
+
+TASK_IMPL_1(long double, lddmc_satcount, MDD, mdd)
+{
+    if (mdd == lddmc_false) return 0.0;
+    if (mdd == lddmc_true) return 1.0;
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    sylvan_stats_count(LDD_SATCOUNTL);
+
+    union {
+        long double d;
+        struct {
+            uint64_t s1;
+            uint64_t s2;
+        } s;
+    } hack;
+
+    if (cache_get3(CACHE_MDD_SATCOUNTL1, mdd, 0, 0, &hack.s.s1) &&
+        cache_get3(CACHE_MDD_SATCOUNTL2, mdd, 0, 0, &hack.s.s2)) {
+        sylvan_stats_count(LDD_SATCOUNTL_CACHED);
+        return hack.d;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    SPAWN(lddmc_satcount, mddnode_getdown(n));
+    long double right = CALL(lddmc_satcount, mddnode_getright(n));
+    hack.d = right + SYNC(lddmc_satcount);
+
+    int c1 = cache_put3(CACHE_MDD_SATCOUNTL1, mdd, 0, 0, hack.s.s1);
+    int c2 = cache_put3(CACHE_MDD_SATCOUNTL2, mdd, 0, 0, hack.s.s2);
+    if (c1 && c2) sylvan_stats_count(LDD_SATCOUNTL_CACHEDPUT);
+
+    return hack.d;
+}
+
+TASK_IMPL_5(MDD, lddmc_collect, MDD, mdd, lddmc_collect_cb, cb, void*, context, uint32_t*, values, size_t, count)
+{
+    if (mdd == lddmc_false) return lddmc_false;
+    if (mdd == lddmc_true) {
+        return WRAP(cb, values, count, context);
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    lddmc_refs_spawn(SPAWN(lddmc_collect, mddnode_getright(n), cb, context, values, count));
+
+    uint32_t newvalues[count+1];
+    if (count > 0) memcpy(newvalues, values, sizeof(uint32_t)*count);
+    newvalues[count] = mddnode_getvalue(n);
+    MDD down = CALL(lddmc_collect, mddnode_getdown(n), cb, context, newvalues, count+1);
+
+    if (down == lddmc_false) {
+        MDD result = lddmc_refs_sync(SYNC(lddmc_collect));
+        return result;
+    }
+
+    lddmc_refs_push(down);
+    MDD right = lddmc_refs_sync(SYNC(lddmc_collect));
+
+    if (right == lddmc_false) {
+        lddmc_refs_pop(1);
+        return down;
+    } else {
+        lddmc_refs_push(right);
+        MDD result = CALL(lddmc_union, down, right);
+        lddmc_refs_pop(2);
+        return result;
+    }
+}
+
+VOID_TASK_5(_lddmc_sat_all_nopar, MDD, mdd, lddmc_enum_cb, cb, void*, context, uint32_t*, values, size_t, count)
+{
+    if (mdd == lddmc_false) return;
+    if (mdd == lddmc_true) {
+        WRAP(cb, values, count, context);
+        return;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+    values[count] = mddnode_getvalue(n);
+    CALL(_lddmc_sat_all_nopar, mddnode_getdown(n), cb, context, values, count+1);
+    CALL(_lddmc_sat_all_nopar, mddnode_getright(n), cb, context, values, count);
+}
+
+VOID_TASK_IMPL_3(lddmc_sat_all_nopar, MDD, mdd, lddmc_enum_cb, cb, void*, context)
+{
+    // determine depth
+    size_t count=0;
+    MDD _mdd = mdd;
+    while (_mdd > lddmc_true) {
+        _mdd = mddnode_getdown(GETNODE(_mdd));
+        assert(_mdd != lddmc_false);
+        count++;
+    }
+
+    uint32_t values[count];
+    CALL(_lddmc_sat_all_nopar, mdd, cb, context, values, 0);
+}
+
+VOID_TASK_IMPL_5(lddmc_sat_all_par, MDD, mdd, lddmc_enum_cb, cb, void*, context, uint32_t*, values, size_t, count)
+{
+    if (mdd == lddmc_false) return;
+    if (mdd == lddmc_true) {
+        WRAP(cb, values, count, context);
+        return;
+    }
+
+    mddnode_t n = GETNODE(mdd);
+
+    SPAWN(lddmc_sat_all_par, mddnode_getright(n), cb, context, values, count);
+
+    uint32_t newvalues[count+1];
+    if (count > 0) memcpy(newvalues, values, sizeof(uint32_t)*count);
+    newvalues[count] = mddnode_getvalue(n);
+    CALL(lddmc_sat_all_par, mddnode_getdown(n), cb, context, newvalues, count+1);
+
+    SYNC(lddmc_sat_all_par);
+}
+
+struct lddmc_match_sat_info
+{
+    MDD mdd;
+    MDD match;
+    MDD proj;
+    size_t count;
+    uint32_t values[0];
+};
+
+// proj: -1 (rest 0), 0 (no match), 1 (match)
+VOID_TASK_3(lddmc_match_sat, struct lddmc_match_sat_info *, info, lddmc_enum_cb, cb, void*, context)
+{
+    MDD a = info->mdd, b = info->match, proj = info->proj;
+
+    if (a == lddmc_false || b == lddmc_false) return;
+
+    if (a == lddmc_true) {
+        assert(b == lddmc_true);
+        WRAP(cb, info->values, info->count, context);
+        return;
+    }
+
+    mddnode_t p_node = GETNODE(proj);
+    uint32_t p_val = mddnode_getvalue(p_node);
+    if (p_val == (uint32_t)-1) {
+        assert(b == lddmc_true);
+        CALL(lddmc_sat_all_par, a, cb, context, info->values, info->count);
+        return;
+    }
+
+    /* Get nodes */
+    mddnode_t na = GETNODE(a);
+    mddnode_t nb = GETNODE(b);
+    uint32_t na_value = mddnode_getvalue(na);
+    uint32_t nb_value = mddnode_getvalue(nb);
+
+    /* Skip nodes if possible */
+    if (p_val == 1) {
+        while (na_value != nb_value) {
+            if (na_value < nb_value) {
+                a = mddnode_getright(na);
+                if (a == lddmc_false) return;
+                na = GETNODE(a);
+                na_value = mddnode_getvalue(na);
+            }
+            if (nb_value < na_value) {
+                b = mddnode_getright(nb);
+                if (b == lddmc_false) return;
+                nb = GETNODE(b);
+                nb_value = mddnode_getvalue(nb);
+            }
+        }
+    }
+
+    struct lddmc_match_sat_info *ri = (struct lddmc_match_sat_info*)alloca(sizeof(struct lddmc_match_sat_info)+sizeof(uint32_t[info->count]));
+    struct lddmc_match_sat_info *di = (struct lddmc_match_sat_info*)alloca(sizeof(struct lddmc_match_sat_info)+sizeof(uint32_t[info->count+1]));
+
+    ri->mdd = mddnode_getright(na);
+    di->mdd = mddnode_getdown(na);
+    ri->match = b;
+    di->match = mddnode_getdown(nb);
+    ri->proj = proj;
+    di->proj = mddnode_getdown(p_node);
+    ri->count = info->count;
+    di->count = info->count+1;
+    if (ri->count > 0) memcpy(ri->values, info->values, sizeof(uint32_t[info->count]));
+    if (di->count > 0) memcpy(di->values, info->values, sizeof(uint32_t[info->count]));
+    di->values[info->count] = na_value;
+
+    SPAWN(lddmc_match_sat, ri, cb, context);
+    CALL(lddmc_match_sat, di, cb, context);
+    SYNC(lddmc_match_sat);
+}
+
+VOID_TASK_IMPL_5(lddmc_match_sat_par, MDD, mdd, MDD, match, MDD, proj, lddmc_enum_cb, cb, void*, context)
+{
+    struct lddmc_match_sat_info i;
+    i.mdd = mdd;
+    i.match = match;
+    i.proj = proj;
+    i.count = 0;
+    CALL(lddmc_match_sat, &i, cb, context);
+}
+
+int
+lddmc_sat_one(MDD mdd, uint32_t* values, size_t count)
+{
+    if (mdd == lddmc_false) return 0;
+    if (mdd == lddmc_true) return 1;
+    assert(count != 0);
+    mddnode_t n = GETNODE(mdd);
+    *values = mddnode_getvalue(n);
+    return lddmc_sat_one(mddnode_getdown(n), values+1, count-1);
+}
+
+MDD
+lddmc_sat_one_mdd(MDD mdd)
+{
+    if (mdd == lddmc_false) return lddmc_false;
+    if (mdd == lddmc_true) return lddmc_true;
+    mddnode_t n = GETNODE(mdd);
+    MDD down = lddmc_sat_one_mdd(mddnode_getdown(n));
+    return lddmc_makenode(mddnode_getvalue(n), down, lddmc_false);
+}
+
+TASK_IMPL_4(MDD, lddmc_compose, MDD, mdd, lddmc_compose_cb, cb, void*, context, int, depth)
+{
+    if (depth == 0 || mdd == lddmc_false || mdd == lddmc_true) {
+        return WRAP(cb, mdd, context);
+    } else {
+        mddnode_t n = GETNODE(mdd);
+        lddmc_refs_spawn(SPAWN(lddmc_compose, mddnode_getright(n), cb, context, depth));
+        MDD down = lddmc_compose(mddnode_getdown(n), cb, context, depth-1);
+        lddmc_refs_push(down);
+        MDD right = lddmc_refs_sync(SYNC(lddmc_compose));
+        lddmc_refs_pop(1);
+        return lddmc_makenode(mddnode_getvalue(n), down, right);
+    }
+}
+
+VOID_TASK_IMPL_4(lddmc_visit_seq, MDD, mdd, lddmc_visit_callbacks_t*, cbs, size_t, ctx_size, void*, context)
+{
+    if (WRAP(cbs->lddmc_visit_pre, mdd, context) == 0) return;
+
+    void* context_down = alloca(ctx_size);
+    void* context_right = alloca(ctx_size);
+    WRAP(cbs->lddmc_visit_init_context, context_down, context, 1);
+    WRAP(cbs->lddmc_visit_init_context, context_right, context, 0);
+
+    CALL(lddmc_visit_seq, mddnode_getdown(GETNODE(mdd)), cbs, ctx_size, context_down);
+    CALL(lddmc_visit_seq, mddnode_getright(GETNODE(mdd)), cbs, ctx_size, context_right);
+
+    WRAP(cbs->lddmc_visit_post, mdd, context);
+}
+
+VOID_TASK_IMPL_4(lddmc_visit_par, MDD, mdd, lddmc_visit_callbacks_t*, cbs, size_t, ctx_size, void*, context)
+{
+    if (WRAP(cbs->lddmc_visit_pre, mdd, context) == 0) return;
+
+    void* context_down = alloca(ctx_size);
+    void* context_right = alloca(ctx_size);
+    WRAP(cbs->lddmc_visit_init_context, context_down, context, 1);
+    WRAP(cbs->lddmc_visit_init_context, context_right, context, 0);
+
+    SPAWN(lddmc_visit_par, mddnode_getdown(GETNODE(mdd)), cbs, ctx_size, context_down);
+    CALL(lddmc_visit_par, mddnode_getright(GETNODE(mdd)), cbs, ctx_size, context_right);
+    SYNC(lddmc_visit_par);
+
+    WRAP(cbs->lddmc_visit_post, mdd, context);
+}
+
+/**
+ * GENERIC MARK/UNMARK METHODS
+ */
+
+static inline int
+lddmc_mark(mddnode_t node)
+{
+    if (mddnode_getmark(node)) return 0;
+    mddnode_setmark(node, 1);
+    return 1;
+}
+
+static inline int
+lddmc_unmark(mddnode_t node)
+{
+    if (mddnode_getmark(node)) {
+        mddnode_setmark(node, 0);
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static void
+lddmc_unmark_rec(mddnode_t node)
+{
+    if (lddmc_unmark(node)) {
+        MDD node_right = mddnode_getright(node);
+        if (node_right > lddmc_true) lddmc_unmark_rec(GETNODE(node_right));
+        MDD node_down = mddnode_getdown(node);
+        if (node_down > lddmc_true) lddmc_unmark_rec(GETNODE(node_down));
+    }
+}
+
+/*************
+ * DOT OUTPUT
+*************/
+
+static void
+lddmc_fprintdot_rec(FILE* out, MDD mdd)
+{
+    // assert(mdd > lddmc_true);
+
+    // check mark
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getmark(n)) return;
+    mddnode_setmark(n, 1);
+
+    // print the node
+    uint32_t val = mddnode_getvalue(n);
+    fprintf(out, "%" PRIu64 " [shape=record, label=\"", mdd);
+    if (mddnode_getcopy(n)) fprintf(out, "<c> *");
+    else fprintf(out, "<%u> %u", val, val);
+    MDD right = mddnode_getright(n);
+    while (right != lddmc_false) {
+        mddnode_t n2 = GETNODE(right);
+        uint32_t val2 = mddnode_getvalue(n2);
+        fprintf(out, "|<%u> %u", val2, val2);
+        right = mddnode_getright(n2);
+        // assert(right != lddmc_true);
+    }
+    fprintf(out, "\"];\n");
+
+    // recurse and print the edges
+    for (;;) {
+        MDD down = mddnode_getdown(n);
+        // assert(down != lddmc_false);
+        if (down > lddmc_true) {
+            lddmc_fprintdot_rec(out, down);
+            if (mddnode_getcopy(n)) {
+                fprintf(out, "%" PRIu64 ":c -> ", mdd);
+            } else {
+                fprintf(out, "%" PRIu64 ":%u -> ", mdd, mddnode_getvalue(n));
+            }
+            if (mddnode_getcopy(GETNODE(down))) {
+                fprintf(out, "%" PRIu64 ":c [style=solid];\n", down);
+            } else {
+                fprintf(out, "%" PRIu64 ":%u [style=solid];\n", down, mddnode_getvalue(GETNODE(down)));
+            }
+        }
+        MDD right = mddnode_getright(n);
+        if (right == lddmc_false) break;
+        n = GETNODE(right);
+    }
+}
+
+static void
+lddmc_fprintdot_unmark(MDD mdd)
+{
+    if (mdd <= lddmc_true) return;
+    mddnode_t n = GETNODE(mdd);
+    if (mddnode_getmark(n)) {
+        mddnode_setmark(n, 0);
+        for (;;) {
+            lddmc_fprintdot_unmark(mddnode_getdown(n));
+            mdd = mddnode_getright(n);
+            if (mdd == lddmc_false) return;
+            n = GETNODE(mdd);
+        }
+    }
+}
+
+void
+lddmc_fprintdot(FILE *out, MDD mdd)
+{
+    fprintf(out, "digraph \"DD\" {\n");
+    fprintf(out, "graph [dpi = 300];\n");
+    fprintf(out, "center = true;\n");
+    fprintf(out, "edge [dir = forward];\n");
+
+    // Special case: false
+    if (mdd == lddmc_false) {
+        fprintf(out, "0 [shape=record, label=\"False\"];\n");
+        fprintf(out, "}\n");
+        return;
+    }
+
+    // Special case: true
+    if (mdd == lddmc_true) {
+        fprintf(out, "1 [shape=record, label=\"True\"];\n");
+        fprintf(out, "}\n");
+        return;
+    }
+
+    lddmc_fprintdot_rec(out, mdd);
+    lddmc_fprintdot_unmark(mdd);
+
+    fprintf(out, "}\n");
+}
+
+void
+lddmc_printdot(MDD mdd)
+{
+    lddmc_fprintdot(stdout, mdd);
+}
+
+/**
+ * Some debug stuff
+ */
+void
+lddmc_fprint(FILE *f, MDD mdd)
+{
+    lddmc_serialize_reset();
+    size_t v = lddmc_serialize_add(mdd);
+    fprintf(f, "%zu,", v);
+    lddmc_serialize_totext(f);
+}
+
+void
+lddmc_print(MDD mdd)
+{
+    lddmc_fprint(stdout, mdd);
+}
+
+/**
+ * SERIALIZATION
+ */
+
+struct lddmc_ser {
+    MDD mdd;
+    size_t assigned;
+};
+
+// Define a AVL tree type with prefix 'lddmc_ser' holding
+// nodes of struct lddmc_ser with the following compare() function...
+AVL(lddmc_ser, struct lddmc_ser)
+{
+    if (left->mdd > right->mdd) return 1;
+    if (left->mdd < right->mdd) return -1;
+    return 0;
+}
+
+// Define a AVL tree type with prefix 'lddmc_ser_reversed' holding
+// nodes of struct lddmc_ser with the following compare() function...
+AVL(lddmc_ser_reversed, struct lddmc_ser)
+{
+    if (left->assigned > right->assigned) return 1;
+    if (left->assigned < right->assigned) return -1;
+    return 0;
+}
+
+// Initially, both sets are empty
+static avl_node_t *lddmc_ser_set = NULL;
+static avl_node_t *lddmc_ser_reversed_set = NULL;
+
+// Start counting (assigning numbers to MDDs) at 2
+static volatile size_t lddmc_ser_counter = 2;
+static size_t lddmc_ser_done = 0;
+
+// Given a MDD, assign unique numbers to all nodes
+static size_t
+lddmc_serialize_assign_rec(MDD mdd)
+{
+    if (mdd <= lddmc_true) return mdd;
+
+    mddnode_t n = GETNODE(mdd);
+
+    struct lddmc_ser s, *ss;
+    s.mdd = mdd;
+    ss = lddmc_ser_search(lddmc_ser_set, &s);
+    if (ss == NULL) {
+        // assign dummy value
+        s.assigned = 0;
+        ss = lddmc_ser_put(&lddmc_ser_set, &s, NULL);
+
+        // first assign recursively
+        lddmc_serialize_assign_rec(mddnode_getright(n));
+        lddmc_serialize_assign_rec(mddnode_getdown(n));
+
+        // assign real value
+        ss->assigned = lddmc_ser_counter++;
+
+        // put a copy in the reversed table
+        lddmc_ser_reversed_insert(&lddmc_ser_reversed_set, ss);
+    }
+
+    return ss->assigned;
+}
+
+size_t
+lddmc_serialize_add(MDD mdd)
+{
+    return lddmc_serialize_assign_rec(mdd);
+}
+
+void
+lddmc_serialize_reset()
+{
+    lddmc_ser_free(&lddmc_ser_set);
+    lddmc_ser_free(&lddmc_ser_reversed_set);
+    lddmc_ser_counter = 2;
+    lddmc_ser_done = 0;
+}
+
+size_t
+lddmc_serialize_get(MDD mdd)
+{
+    if (mdd <= lddmc_true) return mdd;
+    struct lddmc_ser s, *ss;
+    s.mdd = mdd;
+    ss = lddmc_ser_search(lddmc_ser_set, &s);
+    assert(ss != NULL);
+    return ss->assigned;
+}
+
+MDD
+lddmc_serialize_get_reversed(size_t value)
+{
+    if ((MDD)value <= lddmc_true) return (MDD)value;
+    struct lddmc_ser s, *ss;
+    s.assigned = value;
+    ss = lddmc_ser_reversed_search(lddmc_ser_reversed_set, &s);
+    assert(ss != NULL);
+    return ss->mdd;
+}
+
+void
+lddmc_serialize_totext(FILE *out)
+{
+    avl_iter_t *it = lddmc_ser_reversed_iter(lddmc_ser_reversed_set);
+    struct lddmc_ser *s;
+
+    fprintf(out, "[");
+    while ((s=lddmc_ser_reversed_iter_next(it))) {
+        MDD mdd = s->mdd;
+        mddnode_t n = GETNODE(mdd);
+        fprintf(out, "(%zu,v=%u,d=%zu,r=%zu),", s->assigned,
+                                                mddnode_getvalue(n),
+                                                lddmc_serialize_get(mddnode_getdown(n)),
+                                                lddmc_serialize_get(mddnode_getright(n)));
+    }
+    fprintf(out, "]");
+
+    lddmc_ser_reversed_iter_free(it);
+}
+
+void
+lddmc_serialize_tofile(FILE *out)
+{
+    size_t count = avl_count(lddmc_ser_reversed_set);
+    assert(count >= lddmc_ser_done);
+    assert(count == lddmc_ser_counter-2);
+    count -= lddmc_ser_done;
+    fwrite(&count, sizeof(size_t), 1, out);
+
+    struct lddmc_ser *s;
+    avl_iter_t *it = lddmc_ser_reversed_iter(lddmc_ser_reversed_set);
+
+    /* Skip already written entries */
+    size_t index = 0;
+    while (index < lddmc_ser_done && (s=lddmc_ser_reversed_iter_next(it))) {
+        assert(s->assigned == index+2);
+        index++;
+    }
+
+    while ((s=lddmc_ser_reversed_iter_next(it))) {
+        assert(s->assigned == index+2);
+        index++;
+
+        mddnode_t n = GETNODE(s->mdd);
+
+        struct mddnode node;
+        uint64_t right = lddmc_serialize_get(mddnode_getright(n));
+        uint64_t down = lddmc_serialize_get(mddnode_getdown(n));
+        if (mddnode_getcopy(n)) mddnode_makecopy(&node, right, down);
+        else mddnode_make(&node, mddnode_getvalue(n), right, down);
+
+        assert(right <= index);
+        assert(down <= index);
+
+        fwrite(&node, sizeof(struct mddnode), 1, out);
+    }
+
+    lddmc_ser_done = lddmc_ser_counter-2;
+    lddmc_ser_reversed_iter_free(it);
+}
+
+void
+lddmc_serialize_fromfile(FILE *in)
+{
+    size_t count, i;
+    if (fread(&count, sizeof(size_t), 1, in) != 1) {
+        // TODO FIXME return error
+        printf("sylvan_serialize_fromfile: file format error, giving up\n");
+        exit(-1);
+    }
+
+    for (i=1; i<=count; i++) {
+        struct mddnode node;
+        if (fread(&node, sizeof(struct mddnode), 1, in) != 1) {
+            // TODO FIXME return error
+            printf("sylvan_serialize_fromfile: file format error, giving up\n");
+            exit(-1);
+        }
+
+        assert(mddnode_getright(&node) <= lddmc_ser_done+1);
+        assert(mddnode_getdown(&node) <= lddmc_ser_done+1);
+
+        MDD right = lddmc_serialize_get_reversed(mddnode_getright(&node));
+        MDD down = lddmc_serialize_get_reversed(mddnode_getdown(&node));
+
+        struct lddmc_ser s;
+        if (mddnode_getcopy(&node)) s.mdd = lddmc_make_copynode(down, right);
+        else s.mdd = lddmc_makenode(mddnode_getvalue(&node), down, right);
+        s.assigned = lddmc_ser_done+2; // starts at 0 but we want 2-based...
+        lddmc_ser_done++;
+
+        lddmc_ser_insert(&lddmc_ser_set, &s);
+        lddmc_ser_reversed_insert(&lddmc_ser_reversed_set, &s);
+    }
+}
+
+static void
+lddmc_sha2_rec(MDD mdd, SHA256_CTX *ctx)
+{
+    if (mdd <= lddmc_true) {
+        SHA256_Update(ctx, (void*)&mdd, sizeof(uint64_t));
+        return;
+    }
+
+    mddnode_t node = GETNODE(mdd);
+    if (lddmc_mark(node)) {
+        uint32_t val = mddnode_getvalue(node);
+        SHA256_Update(ctx, (void*)&val, sizeof(uint32_t));
+        lddmc_sha2_rec(mddnode_getdown(node), ctx);
+        lddmc_sha2_rec(mddnode_getright(node), ctx);
+    }
+}
+
+void
+lddmc_printsha(MDD mdd)
+{
+    lddmc_fprintsha(stdout, mdd);
+}
+
+void
+lddmc_fprintsha(FILE *out, MDD mdd)
+{
+    char buf[80];
+    lddmc_getsha(mdd, buf);
+    fprintf(out, "%s", buf);
+}
+
+void
+lddmc_getsha(MDD mdd, char *target)
+{
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    lddmc_sha2_rec(mdd, &ctx);
+    if (mdd > lddmc_true) lddmc_unmark_rec(GETNODE(mdd));
+    SHA256_End(&ctx, target);
+}
+
+#ifndef NDEBUG
+size_t
+lddmc_test_ismdd(MDD mdd)
+{
+    if (mdd == lddmc_true) return 1;
+    if (mdd == lddmc_false) return 0;
+
+    int first = 1;
+    size_t depth = 0;
+
+    if (mdd != lddmc_false) {
+        mddnode_t n = GETNODE(mdd);
+        if (mddnode_getcopy(n)) {
+            mdd = mddnode_getright(n);
+            depth = lddmc_test_ismdd(mddnode_getdown(n));
+            assert(depth >= 1);
+        }
+    }
+
+    uint32_t value = 0;
+    while (mdd != lddmc_false) {
+        assert(llmsset_is_marked(nodes, mdd));
+
+        mddnode_t n = GETNODE(mdd);
+        uint32_t next_value = mddnode_getvalue(n);
+        assert(mddnode_getcopy(n) == 0);
+        if (first) {
+            first = 0;
+            depth = lddmc_test_ismdd(mddnode_getdown(n));
+            assert(depth >= 1);
+        } else {
+            assert(value < next_value);
+            assert(depth == lddmc_test_ismdd(mddnode_getdown(n)));
+        }
+
+        value = next_value;
+        mdd = mddnode_getright(n);
+    }
+
+    return 1 + depth;
+}
+#endif
diff --git a/src/sylvan_ldd.h b/src/sylvan_ldd.h
new file mode 100644
index 000000000..f104309b2
--- /dev/null
+++ b/src/sylvan_ldd.h
@@ -0,0 +1,288 @@
+/*
+ * Copyright 2011-2014 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Do not include this file directly. Instead, include sylvan.h */
+
+#ifndef SYLVAN_LDD_H
+#define SYLVAN_LDD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+typedef uint64_t MDD;       // Note: low 40 bits only
+
+#define lddmc_false         ((MDD)0)
+#define lddmc_true          ((MDD)1)
+
+/* Initialize LDD functionality */
+void sylvan_init_ldd();
+
+/* Primitives */
+MDD lddmc_makenode(uint32_t value, MDD ifeq, MDD ifneq);
+MDD lddmc_extendnode(MDD mdd, uint32_t value, MDD ifeq);
+uint32_t lddmc_getvalue(MDD mdd);
+MDD lddmc_getdown(MDD mdd);
+MDD lddmc_getright(MDD mdd);
+MDD lddmc_follow(MDD mdd, uint32_t value);
+
+/**
+ * Copy nodes in relations.
+ * A copy node represents 'read x, then write x' for every x.
+ * In a read-write relation, use copy nodes twice, once on read level, once on write level.
+ * Copy nodes are only supported by relprod, relprev and union.
+ */
+
+/* Primitive for special 'copy node' (for relprod/relprev) */
+MDD lddmc_make_copynode(MDD ifeq, MDD ifneq);
+int lddmc_iscopy(MDD mdd);
+MDD lddmc_followcopy(MDD mdd);
+
+/* Add or remove external reference to MDD */
+MDD lddmc_ref(MDD a);
+void lddmc_deref(MDD a);
+
+/* For use in custom mark functions */
+VOID_TASK_DECL_1(lddmc_gc_mark_rec, MDD)
+#define lddmc_gc_mark_rec(mdd) CALL(lddmc_gc_mark_rec, mdd)
+
+/* Return the number of external references */
+size_t lddmc_count_refs();
+
+/* Mark MDD for "notify on dead" */
+#define lddmc_notify_ondead(mdd) llmsset_notify_ondead(nodes, mdd)
+
+/* Sanity check - returns depth of MDD including 'true' terminal or 0 for empty set */
+#ifndef NDEBUG
+size_t lddmc_test_ismdd(MDD mdd);
+#endif
+
+/* Operations for model checking */
+TASK_DECL_2(MDD, lddmc_union, MDD, MDD);
+#define lddmc_union(a, b) CALL(lddmc_union, a, b)
+
+TASK_DECL_2(MDD, lddmc_minus, MDD, MDD);
+#define lddmc_minus(a, b) CALL(lddmc_minus, a, b)
+
+TASK_DECL_3(MDD, lddmc_zip, MDD, MDD, MDD*);
+#define lddmc_zip(a, b, res) CALL(lddmc_zip, a, b, res)
+
+TASK_DECL_2(MDD, lddmc_intersect, MDD, MDD);
+#define lddmc_intersect(a, b) CALL(lddmc_intersect, a, b)
+
+TASK_DECL_3(MDD, lddmc_match, MDD, MDD, MDD);
+#define lddmc_match(a, b, proj) CALL(lddmc_match, a, b, proj)
+
+MDD lddmc_union_cube(MDD a, uint32_t* values, size_t count);
+int lddmc_member_cube(MDD a, uint32_t* values, size_t count);
+MDD lddmc_cube(uint32_t* values, size_t count);
+
+MDD lddmc_union_cube_copy(MDD a, uint32_t* values, int* copy, size_t count);
+int lddmc_member_cube_copy(MDD a, uint32_t* values, int* copy, size_t count);
+MDD lddmc_cube_copy(uint32_t* values, int* copy, size_t count);
+
+TASK_DECL_3(MDD, lddmc_relprod, MDD, MDD, MDD);
+#define lddmc_relprod(a, b, proj) CALL(lddmc_relprod, a, b, proj)
+
+TASK_DECL_4(MDD, lddmc_relprod_union, MDD, MDD, MDD, MDD);
+#define lddmc_relprod_union(a, b, meta, un) CALL(lddmc_relprod_union, a, b, meta, un)
+
+/**
+ * Calculate all predecessors to a in uni according to rel[proj]
+ * <proj> follows the same semantics as relprod
+ * i.e. 0 (not in rel), 1 (read+write), 2 (read), 3 (write), -1 (end; rest=0)
+ */
+TASK_DECL_4(MDD, lddmc_relprev, MDD, MDD, MDD, MDD);
+#define lddmc_relprev(a, rel, proj, uni) CALL(lddmc_relprev, a, rel, proj, uni)
+
+// so: proj: -2 (end; quantify rest), -1 (end; keep rest), 0 (quantify), 1 (keep)
+TASK_DECL_2(MDD, lddmc_project, MDD, MDD);
+#define lddmc_project(mdd, proj) CALL(lddmc_project, mdd, proj)
+
+TASK_DECL_3(MDD, lddmc_project_minus, MDD, MDD, MDD);
+#define lddmc_project_minus(mdd, proj, avoid) CALL(lddmc_project_minus, mdd, proj, avoid)
+
+TASK_DECL_4(MDD, lddmc_join, MDD, MDD, MDD, MDD);
+#define lddmc_join(a, b, a_proj, b_proj) CALL(lddmc_join, a, b, a_proj, b_proj)
+
+/* Write a DOT representation */
+void lddmc_printdot(MDD mdd);
+void lddmc_fprintdot(FILE *out, MDD mdd);
+
+void lddmc_fprint(FILE *out, MDD mdd);
+void lddmc_print(MDD mdd);
+
+void lddmc_printsha(MDD mdd);
+void lddmc_fprintsha(FILE *out, MDD mdd);
+void lddmc_getsha(MDD mdd, char *target); // at least 65 bytes...
+
+/**
+ * Calculate number of satisfying variable assignments.
+ * The set of variables must be >= the support of the MDD.
+ * (i.e. all variables in the MDD must be in variables)
+ *
+ * The cached version uses the operation cache, but is limited to 64-bit floating point numbers.
+ */
+
+typedef double lddmc_satcount_double_t;
+// if this line below gives an error, modify the above typedef until fixed ;)
+typedef char __lddmc_check_float_is_8_bytes[(sizeof(lddmc_satcount_double_t) == sizeof(uint64_t))?1:-1];
+
+TASK_DECL_1(lddmc_satcount_double_t, lddmc_satcount_cached, MDD);
+#define lddmc_satcount_cached(mdd) CALL(lddmc_satcount_cached, mdd)
+
+TASK_DECL_1(long double, lddmc_satcount, MDD);
+#define lddmc_satcount(mdd) CALL(lddmc_satcount, mdd)
+
+/**
+ * A callback for enumerating functions like sat_all_par, collect and match
+ * Example:
+ * TASK_3(void*, my_function, uint32_t*, values, size_t, count, void*, context) ...
+ * For collect, use:
+ * TASK_3(MDD, ...)
+ */
+LACE_TYPEDEF_CB(void, lddmc_enum_cb, uint32_t*, size_t, void*);
+LACE_TYPEDEF_CB(MDD, lddmc_collect_cb, uint32_t*, size_t, void*);
+
+VOID_TASK_DECL_5(lddmc_sat_all_par, MDD, lddmc_enum_cb, void*, uint32_t*, size_t);
+#define lddmc_sat_all_par(mdd, cb, context) CALL(lddmc_sat_all_par, mdd, cb, context, 0, 0)
+
+VOID_TASK_DECL_3(lddmc_sat_all_nopar, MDD, lddmc_enum_cb, void*);
+#define lddmc_sat_all_nopar(mdd, cb, context) CALL(lddmc_sat_all_nopar, mdd, cb, context)
+
+TASK_DECL_5(MDD, lddmc_collect, MDD, lddmc_collect_cb, void*, uint32_t*, size_t);
+#define lddmc_collect(mdd, cb, context) CALL(lddmc_collect, mdd, cb, context, 0, 0)
+
+VOID_TASK_DECL_5(lddmc_match_sat_par, MDD, MDD, MDD, lddmc_enum_cb, void*);
+#define lddmc_match_sat_par(mdd, match, proj, cb, context) CALL(lddmc_match_sat_par, mdd, match, proj, cb, context)
+
+int lddmc_sat_one(MDD mdd, uint32_t *values, size_t count);
+MDD lddmc_sat_one_mdd(MDD mdd);
+#define lddmc_pick_cube lddmc_sat_one_mdd
+
+/**
+ * Callback functions for visiting nodes.
+ * lddmc_visit_seq sequentially visits nodes, down first, then right.
+ * lddmc_visit_par visits nodes in parallel (down || right)
+ */
+LACE_TYPEDEF_CB(int, lddmc_visit_pre_cb, MDD, void*); // int pre(MDD, context)
+LACE_TYPEDEF_CB(void, lddmc_visit_post_cb, MDD, void*); // void post(MDD, context)
+LACE_TYPEDEF_CB(void, lddmc_visit_init_context_cb, void*, void*, int); // void init_context(context, parent, is_down)
+
+typedef struct lddmc_visit_node_callbacks {
+    lddmc_visit_pre_cb lddmc_visit_pre;
+    lddmc_visit_post_cb lddmc_visit_post;
+    lddmc_visit_init_context_cb lddmc_visit_init_context;
+} lddmc_visit_callbacks_t;
+
+VOID_TASK_DECL_4(lddmc_visit_par, MDD, lddmc_visit_callbacks_t*, size_t, void*);
+#define lddmc_visit_par(mdd, cbs, ctx_size, context) CALL(lddmc_visit_par, mdd, cbs, ctx_size, context);
+
+VOID_TASK_DECL_4(lddmc_visit_seq, MDD, lddmc_visit_callbacks_t*, size_t, void*);
+#define lddmc_visit_seq(mdd, cbs, ctx_size, context) CALL(lddmc_visit_seq, mdd, cbs, ctx_size, context);
+
+size_t lddmc_nodecount(MDD mdd);
+void lddmc_nodecount_levels(MDD mdd, size_t *variables);
+
+/**
+ * Functional composition
+ * For every node at depth <depth>, call function cb (MDD -> MDD).
+ * and replace the node by the result of the function
+ */
+LACE_TYPEDEF_CB(MDD, lddmc_compose_cb, MDD, void*);
+TASK_DECL_4(MDD, lddmc_compose, MDD, lddmc_compose_cb, void*, int);
+#define lddmc_compose(mdd, cb, context, depth) CALL(lddmc_compose, mdd, cb, context, depth)
+
+/**
+ * SAVING:
+ * use lddmc_serialize_add on every MDD you want to store
+ * use lddmc_serialize_get to retrieve the key of every stored MDD
+ * use lddmc_serialize_tofile
+ *
+ * LOADING:
+ * use lddmc_serialize_fromfile (implies lddmc_serialize_reset)
+ * use lddmc_serialize_get_reversed for every key
+ *
+ * MISC:
+ * use lddmc_serialize_reset to free all allocated structures
+ * use lddmc_serialize_totext to write a textual list of tuples of all MDDs.
+ *         format: [(<key>,<level>,<key_low>,<key_high>,<complement_high>),...]
+ *
+ * for the old lddmc_print functions, use lddmc_serialize_totext
+ */
+size_t lddmc_serialize_add(MDD mdd);
+size_t lddmc_serialize_get(MDD mdd);
+MDD lddmc_serialize_get_reversed(size_t value);
+void lddmc_serialize_reset();
+void lddmc_serialize_totext(FILE *out);
+void lddmc_serialize_tofile(FILE *out);
+void lddmc_serialize_fromfile(FILE *in);
+
+/* Infrastructure for internal markings */
+typedef struct lddmc_refs_internal
+{
+    size_t r_size, r_count;
+    size_t s_size, s_count;
+    MDD *results;
+    Task **spawns;
+} *lddmc_refs_internal_t;
+
+extern DECLARE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+
+static inline MDD
+lddmc_refs_push(MDD ldd)
+{
+    LOCALIZE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+    if (lddmc_refs_key->r_count >= lddmc_refs_key->r_size) {
+        lddmc_refs_key->r_size *= 2;
+        lddmc_refs_key->results = (MDD*)realloc(lddmc_refs_key->results, sizeof(MDD) * lddmc_refs_key->r_size);
+    }
+    lddmc_refs_key->results[lddmc_refs_key->r_count++] = ldd;
+    return ldd;
+}
+
+static inline void
+lddmc_refs_pop(int amount)
+{
+    LOCALIZE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+    lddmc_refs_key->r_count-=amount;
+}
+
+static inline void
+lddmc_refs_spawn(Task *t)
+{
+    LOCALIZE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+    if (lddmc_refs_key->s_count >= lddmc_refs_key->s_size) {
+        lddmc_refs_key->s_size *= 2;
+        lddmc_refs_key->spawns = (Task**)realloc(lddmc_refs_key->spawns, sizeof(Task*) * lddmc_refs_key->s_size);
+    }
+    lddmc_refs_key->spawns[lddmc_refs_key->s_count++] = t;
+}
+
+static inline MDD
+lddmc_refs_sync(MDD result)
+{
+    LOCALIZE_THREAD_LOCAL(lddmc_refs_key, lddmc_refs_internal_t);
+    lddmc_refs_key->s_count--;
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_mtbdd.c b/src/sylvan_mtbdd.c
new file mode 100644
index 000000000..a4eb1bd62
--- /dev/null
+++ b/src/sylvan_mtbdd.c
@@ -0,0 +1,2542 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_config.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <math.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <refs.h>
+#include <sha2.h>
+#include <sylvan.h>
+#include <sylvan_common.h>
+#include <sylvan_mtbdd_int.h>
+
+/* Primitives */
+int
+mtbdd_isleaf(MTBDD bdd)
+{
+    if (bdd == mtbdd_true || bdd == mtbdd_false) return 1;
+    return mtbddnode_isleaf(GETNODE(bdd));
+}
+
+// for nodes
+uint32_t
+mtbdd_getvar(MTBDD node)
+{
+    return mtbddnode_getvariable(GETNODE(node));
+}
+
+MTBDD
+mtbdd_getlow(MTBDD mtbdd)
+{
+    return node_getlow(mtbdd, GETNODE(mtbdd));
+}
+
+MTBDD
+mtbdd_gethigh(MTBDD mtbdd)
+{
+    return node_gethigh(mtbdd, GETNODE(mtbdd));
+}
+
+// for leaves
+uint32_t
+mtbdd_gettype(MTBDD leaf)
+{
+    return mtbddnode_gettype(GETNODE(leaf));
+}
+
+uint64_t
+mtbdd_getvalue(MTBDD leaf)
+{
+    return mtbddnode_getvalue(GETNODE(leaf));
+}
+
+// for leaf type 0 (integer)
+int64_t
+mtbdd_getint64(MTBDD leaf)
+{
+    uint64_t value = mtbdd_getvalue(leaf);
+    return *(int64_t*)&value;
+}
+
+// for leaf type 1 (double)
+double
+mtbdd_getdouble(MTBDD leaf)
+{
+    uint64_t value = mtbdd_getvalue(leaf);
+    return *(double*)&value;
+}
+
+/**
+ * Implementation of garbage collection
+ */
+
+/* Recursively mark MDD nodes as 'in use' */
+VOID_TASK_IMPL_1(mtbdd_gc_mark_rec, MDD, mtbdd)
+{
+    if (mtbdd == mtbdd_true) return;
+    if (mtbdd == mtbdd_false) return;
+
+    if (llmsset_mark(nodes, mtbdd&(~mtbdd_complement))) {
+        mtbddnode_t n = GETNODE(mtbdd);
+        if (!mtbddnode_isleaf(n)) {
+            SPAWN(mtbdd_gc_mark_rec, mtbddnode_getlow(n));
+            CALL(mtbdd_gc_mark_rec, mtbddnode_gethigh(n));
+            SYNC(mtbdd_gc_mark_rec);
+        }
+    }
+}
+
+/**
+ * External references
+ */
+
+refs_table_t mtbdd_refs;
+refs_table_t mtbdd_protected;
+static int mtbdd_protected_created = 0;
+
+MDD
+mtbdd_ref(MDD a)
+{
+    if (a == mtbdd_true || a == mtbdd_false) return a;
+    refs_up(&mtbdd_refs, a);
+    return a;
+}
+
+void
+mtbdd_deref(MDD a)
+{
+    if (a == mtbdd_true || a == mtbdd_false) return;
+    refs_down(&mtbdd_refs, a);
+}
+
+size_t
+mtbdd_count_refs()
+{
+    return refs_count(&mtbdd_refs);
+}
+
+void
+mtbdd_protect(MTBDD *a)
+{
+    if (!mtbdd_protected_created) {
+        // In C++, sometimes mtbdd_protect is called before Sylvan is initialized. Just create a table.
+        protect_create(&mtbdd_protected, 4096);
+        mtbdd_protected_created = 1;
+    }
+    protect_up(&mtbdd_protected, (size_t)a);
+}
+
+void
+mtbdd_unprotect(MTBDD *a)
+{
+    if (mtbdd_protected.refs_table != NULL) protect_down(&mtbdd_protected, (size_t)a);
+}
+
+size_t
+mtbdd_count_protected()
+{
+    return protect_count(&mtbdd_protected);
+}
+
+/* Called during garbage collection */
+VOID_TASK_0(mtbdd_gc_mark_external_refs)
+{
+    // iterate through refs hash table, mark all found
+    size_t count=0;
+    uint64_t *it = refs_iter(&mtbdd_refs, 0, mtbdd_refs.refs_size);
+    while (it != NULL) {
+        SPAWN(mtbdd_gc_mark_rec, refs_next(&mtbdd_refs, &it, mtbdd_refs.refs_size));
+        count++;
+    }
+    while (count--) {
+        SYNC(mtbdd_gc_mark_rec);
+    }
+}
+
+VOID_TASK_0(mtbdd_gc_mark_protected)
+{
+    // iterate through refs hash table, mark all found
+    size_t count=0;
+    uint64_t *it = protect_iter(&mtbdd_protected, 0, mtbdd_protected.refs_size);
+    while (it != NULL) {
+        BDD *to_mark = (BDD*)protect_next(&mtbdd_protected, &it, mtbdd_protected.refs_size);
+        SPAWN(mtbdd_gc_mark_rec, *to_mark);
+        count++;
+    }
+    while (count--) {
+        SYNC(mtbdd_gc_mark_rec);
+    }
+}
+
+/* Infrastructure for internal markings */
+DECLARE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+
+VOID_TASK_0(mtbdd_refs_mark_task)
+{
+    LOCALIZE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+    size_t i, j=0;
+    for (i=0; i<mtbdd_refs_key->r_count; i++) {
+        if (j >= 40) {
+            while (j--) SYNC(mtbdd_gc_mark_rec);
+            j=0;
+        }
+        SPAWN(mtbdd_gc_mark_rec, mtbdd_refs_key->results[i]);
+        j++;
+    }
+    for (i=0; i<mtbdd_refs_key->s_count; i++) {
+        Task *t = mtbdd_refs_key->spawns[i];
+        if (!TASK_IS_STOLEN(t)) break;
+        if (TASK_IS_COMPLETED(t)) {
+            if (j >= 40) {
+                while (j--) SYNC(mtbdd_gc_mark_rec);
+                j=0;
+            }
+            SPAWN(mtbdd_gc_mark_rec, *(BDD*)TASK_RESULT(t));
+            j++;
+        }
+    }
+    while (j--) SYNC(mtbdd_gc_mark_rec);
+}
+
+VOID_TASK_0(mtbdd_refs_mark)
+{
+    TOGETHER(mtbdd_refs_mark_task);
+}
+
+VOID_TASK_0(mtbdd_refs_init_task)
+{
+    mtbdd_refs_internal_t s = (mtbdd_refs_internal_t)malloc(sizeof(struct mtbdd_refs_internal));
+    s->r_size = 128;
+    s->r_count = 0;
+    s->s_size = 128;
+    s->s_count = 0;
+    s->results = (BDD*)malloc(sizeof(BDD) * 128);
+    s->spawns = (Task**)malloc(sizeof(Task*) * 128);
+    SET_THREAD_LOCAL(mtbdd_refs_key, s);
+}
+
+VOID_TASK_0(mtbdd_refs_init)
+{
+    INIT_THREAD_LOCAL(mtbdd_refs_key);
+    TOGETHER(mtbdd_refs_init_task);
+    sylvan_gc_add_mark(10, TASK(mtbdd_refs_mark));
+}
+
+/**
+ * Handling of custom leaves "registry"
+ */
+
+typedef struct
+{
+    mtbdd_hash_cb hash_cb;
+    mtbdd_equals_cb equals_cb;
+    mtbdd_create_cb create_cb;
+    mtbdd_destroy_cb destroy_cb;
+} customleaf_t;
+
+static customleaf_t *cl_registry;
+static size_t cl_registry_count;
+
+static void
+_mtbdd_create_cb(uint64_t *a, uint64_t *b)
+{
+    // for leaf
+    if ((*a & 0x4000000000000000) == 0) return; // huh?
+    uint32_t type = *a & 0xffffffff;
+    if (type >= cl_registry_count) return; // not in registry
+    customleaf_t *c = cl_registry + type;
+    if (c->create_cb == NULL) return; // not in registry
+    c->create_cb(b);
+}
+
+static void
+_mtbdd_destroy_cb(uint64_t a, uint64_t b)
+{
+    // for leaf
+    if ((a & 0x4000000000000000) == 0) return; // huh?
+    uint32_t type = a & 0xffffffff;
+    if (type >= cl_registry_count) return; // not in registry
+    customleaf_t *c = cl_registry + type;
+    if (c->destroy_cb == NULL) return; // not in registry
+    c->destroy_cb(b);
+}
+
+static uint64_t
+_mtbdd_hash_cb(uint64_t a, uint64_t b, uint64_t seed)
+{
+    // for leaf
+    if ((a & 0x4000000000000000) == 0) return llmsset_hash(a, b, seed);
+    uint32_t type = a & 0xffffffff;
+    if (type >= cl_registry_count) return llmsset_hash(a, b, seed);
+    customleaf_t *c = cl_registry + type;
+    if (c->hash_cb == NULL) return llmsset_hash(a, b, seed);
+    return c->hash_cb(b, seed ^ a);
+}
+
+static int
+_mtbdd_equals_cb(uint64_t a, uint64_t b, uint64_t aa, uint64_t bb)
+{
+    // for leaf
+    if (a != aa) return 0;
+    if ((a & 0x4000000000000000) == 0) return b == bb ? 1 : 0;
+    if ((aa & 0x4000000000000000) == 0) return b == bb ? 1 : 0;
+    uint32_t type = a & 0xffffffff;
+    if (type >= cl_registry_count) return b == bb ? 1 : 0;
+    customleaf_t *c = cl_registry + type;
+    if (c->equals_cb == NULL) return b == bb ? 1 : 0;
+    return c->equals_cb(b, bb);
+}
+
+uint32_t
+mtbdd_register_custom_leaf(mtbdd_hash_cb hash_cb, mtbdd_equals_cb equals_cb, mtbdd_create_cb create_cb, mtbdd_destroy_cb destroy_cb)
+{
+    uint32_t type = cl_registry_count;
+    if (type == 0) type = 3;
+    if (cl_registry == NULL) {
+        cl_registry = (customleaf_t *)calloc(sizeof(customleaf_t), (type+1));
+        cl_registry_count = type+1;
+        llmsset_set_custom(nodes, _mtbdd_hash_cb, _mtbdd_equals_cb, _mtbdd_create_cb, _mtbdd_destroy_cb);
+    } else if (cl_registry_count <= type) {
+        cl_registry = (customleaf_t *)realloc(cl_registry, sizeof(customleaf_t) * (type+1));
+        memset(cl_registry + cl_registry_count, 0, sizeof(customleaf_t) * (type+1-cl_registry_count));
+        cl_registry_count = type+1;
+    }
+    customleaf_t *c = cl_registry + type;
+    c->hash_cb = hash_cb;
+    c->equals_cb = equals_cb;
+    c->create_cb = create_cb;
+    c->destroy_cb = destroy_cb;
+    return type;
+}
+
+/**
+ * Initialize and quit functions
+ */
+
+static void
+mtbdd_quit()
+{
+    refs_free(&mtbdd_refs);
+    if (mtbdd_protected_created) {
+        protect_free(&mtbdd_protected);
+        mtbdd_protected_created = 0;
+    }
+    if (cl_registry != NULL) {
+        free(cl_registry);
+        cl_registry = NULL;
+        cl_registry_count = 0;
+    }
+}
+
+void
+sylvan_init_mtbdd()
+{
+    sylvan_register_quit(mtbdd_quit);
+    sylvan_gc_add_mark(10, TASK(mtbdd_gc_mark_external_refs));
+    sylvan_gc_add_mark(10, TASK(mtbdd_gc_mark_protected));
+
+    // Sanity check
+    if (sizeof(struct mtbddnode) != 16) {
+        fprintf(stderr, "Invalid size of mtbdd nodes: %ld\n", sizeof(struct mtbddnode));
+        exit(1);
+    }
+
+    refs_create(&mtbdd_refs, 1024);
+    if (!mtbdd_protected_created) {
+        protect_create(&mtbdd_protected, 4096);
+        mtbdd_protected_created = 1;
+    }
+
+    LACE_ME;
+    CALL(mtbdd_refs_init);
+
+    cl_registry = NULL;
+    cl_registry_count = 0;
+}
+
+/**
+ * Primitives
+ */
+MTBDD
+mtbdd_makeleaf(uint32_t type, uint64_t value)
+{
+    struct mtbddnode n;
+    mtbddnode_makeleaf(&n, type, value);
+
+    int custom = type < cl_registry_count && cl_registry[type].hash_cb != NULL ? 1 : 0;
+
+    int created;
+    uint64_t index = custom ? llmsset_lookupc(nodes, n.a, n.b, &created) : llmsset_lookup(nodes, n.a, n.b, &created);
+    if (index == 0) {
+        LACE_ME;
+
+        sylvan_gc();
+
+        index = custom ? llmsset_lookupc(nodes, n.a, n.b, &created) : llmsset_lookup(nodes, n.a, n.b, &created);
+        if (index == 0) {
+            fprintf(stderr, "BDD Unique table full, %zu of %zu buckets filled!\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+            exit(1);
+        }
+    }
+
+    return (MTBDD)index;
+}
+
+MTBDD
+mtbdd_makenode(uint32_t var, MTBDD low, MTBDD high)
+{
+    if (low == high) return low;
+
+    // Normalization to keep canonicity
+    // low will have no mark
+
+    struct mtbddnode n;
+    int mark, created;
+
+    if (MTBDD_HASMARK(low)) {
+        mark = 1;
+        low = MTBDD_TOGGLEMARK(low);
+        high = MTBDD_TOGGLEMARK(high);
+    } else {
+        mark = 0;
+    }
+
+    mtbddnode_makenode(&n, var, low, high);
+
+    MTBDD result;
+    uint64_t index = llmsset_lookup(nodes, n.a, n.b, &created);
+    if (index == 0) {
+        LACE_ME;
+
+        mtbdd_refs_push(low);
+        mtbdd_refs_push(high);
+        sylvan_gc();
+        mtbdd_refs_pop(2);
+
+        index = llmsset_lookup(nodes, n.a, n.b, &created);
+        if (index == 0) {
+            fprintf(stderr, "BDD Unique table full, %zu of %zu buckets filled!\n", llmsset_count_marked(nodes), llmsset_get_size(nodes));
+            exit(1);
+        }
+    }
+
+    result = index;
+    return mark ? result | mtbdd_complement : result;
+}
+
+/* Operations */
+
+/**
+ * Calculate greatest common divisor
+ * Source: http://lemire.me/blog/archives/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
+ */
+uint32_t
+gcd(uint32_t u, uint32_t v)
+{
+    int shift;
+    if (u == 0) return v;
+    if (v == 0) return u;
+    shift = __builtin_ctz(u | v);
+    u >>= __builtin_ctz(u);
+    do {
+        v >>= __builtin_ctz(v);
+        if (u > v) {
+            unsigned int t = v;
+            v = u;
+            u = t;
+        }
+        v = v - u;
+    } while (v != 0);
+    return u << shift;
+}
+
+/**
+ * Create leaves of unsigned/signed integers and doubles
+ */
+
+MTBDD
+mtbdd_int64(int64_t value)
+{
+    return mtbdd_makeleaf(0, *(uint64_t*)&value);
+}
+
+MTBDD
+mtbdd_double(double value)
+{
+    return mtbdd_makeleaf(1, *(uint64_t*)&value);
+}
+
+MTBDD
+mtbdd_fraction(int64_t nom, uint64_t denom)
+{
+    if (nom == 0) return mtbdd_makeleaf(2, 1);
+    uint32_t c = gcd(nom < 0 ? -nom : nom, denom);
+    nom /= c;
+    denom /= c;
+    if (nom > 2147483647 || nom < -2147483647 || denom > 4294967295) fprintf(stderr, "mtbdd_fraction: fraction overflow\n");
+    return mtbdd_makeleaf(2, (nom<<32)|denom);
+}
+
+/**
+ * Create the cube of variables in arr.
+ */
+MTBDD
+mtbdd_fromarray(uint32_t* arr, size_t length)
+{
+    if (length == 0) return mtbdd_true;
+    else if (length == 1) return mtbdd_makenode(*arr, mtbdd_false, mtbdd_true);
+    else return mtbdd_makenode(*arr, mtbdd_false, mtbdd_fromarray(arr+1, length-1));
+}
+
+/**
+ * Create a MTBDD cube representing the conjunction of variables in their positive or negative
+ * form depending on whether the cube[idx] equals 0 (negative), 1 (positive) or 2 (any).
+ * Use cube[idx]==3 for "s=s'" in interleaved variables (matches with next variable)
+ * <variables> is the cube of variables
+ */
+MTBDD
+mtbdd_cube(MTBDD variables, uint8_t *cube, MTBDD terminal)
+{
+    if (variables == mtbdd_true) return terminal;
+    mtbddnode_t n = GETNODE(variables);
+
+    BDD result;
+    switch (*cube) {
+    case 0:
+        result = mtbdd_cube(node_gethigh(variables, n), cube+1, terminal);
+        result = mtbdd_makenode(mtbddnode_getvariable(n), result, mtbdd_false);
+        return result;
+    case 1:
+        result = mtbdd_cube(node_gethigh(variables, n), cube+1, terminal);
+        result = mtbdd_makenode(mtbddnode_getvariable(n), mtbdd_false, result);
+        return result;
+    case 2:
+        return mtbdd_cube(node_gethigh(variables, n), cube+1, terminal);
+    case 3:
+    {
+        MTBDD variables2 = node_gethigh(variables, n);
+        mtbddnode_t n2 = GETNODE(variables2);
+        uint32_t var2 = mtbddnode_getvariable(n2);
+        result = mtbdd_cube(node_gethigh(variables2, n2), cube+2, terminal);
+        BDD low = mtbdd_makenode(var2, result, mtbdd_false);
+        mtbdd_refs_push(low);
+        BDD high = mtbdd_makenode(var2, mtbdd_false, result);
+        mtbdd_refs_pop(1);
+        result = mtbdd_makenode(mtbddnode_getvariable(n), low, high);
+        return result;
+    }
+    default:
+        return mtbdd_false; // ?
+    }
+}
+
+/**
+ * Same as mtbdd_cube, but also performs "or" with existing MTBDD,
+ * effectively adding an item to the set
+ */
+TASK_IMPL_4(MTBDD, mtbdd_union_cube, MTBDD, mtbdd, MTBDD, vars, uint8_t*, cube, MTBDD, terminal)
+{
+    /* Terminal cases */
+    if (mtbdd == terminal) return terminal;
+    if (mtbdd == mtbdd_false) return mtbdd_cube(vars, cube, terminal);
+    if (vars == mtbdd_true) return terminal;
+
+    sylvan_gc_test();
+
+    mtbddnode_t nv = GETNODE(vars);
+    uint32_t v = mtbddnode_getvariable(nv);
+
+    mtbddnode_t na = GETNODE(mtbdd);
+    uint32_t va = mtbddnode_getvariable(na);
+
+    if (va < v) {
+        MTBDD low = node_getlow(mtbdd, na);
+        MTBDD high = node_gethigh(mtbdd, na);
+        SPAWN(mtbdd_union_cube, high, vars, cube, terminal);
+        BDD new_low = mtbdd_union_cube(low, vars, cube, terminal);
+        mtbdd_refs_push(new_low);
+        BDD new_high = SYNC(mtbdd_union_cube);
+        mtbdd_refs_pop(1);
+        if (new_low != low || new_high != high) return mtbdd_makenode(va, new_low, new_high);
+        else return mtbdd;
+    } else if (va == v) {
+        MTBDD low = node_getlow(mtbdd, na);
+        MTBDD high = node_gethigh(mtbdd, na);
+        switch (*cube) {
+        case 0:
+        {
+            MTBDD new_low = mtbdd_union_cube(low, node_gethigh(vars, nv), cube+1, terminal);
+            if (new_low != low) return mtbdd_makenode(v, new_low, high);
+            else return mtbdd;
+        }
+        case 1:
+        {
+            MTBDD new_high = mtbdd_union_cube(high, node_gethigh(vars, nv), cube+1, terminal);
+            if (new_high != high) return mtbdd_makenode(v, low, new_high);
+            return mtbdd;
+        }
+        case 2:
+        {
+            SPAWN(mtbdd_union_cube, high, node_gethigh(vars, nv), cube+1, terminal);
+            MTBDD new_low = mtbdd_union_cube(low, node_gethigh(vars, nv), cube+1, terminal);
+            mtbdd_refs_push(new_low);
+            MTBDD new_high = SYNC(mtbdd_union_cube);
+            mtbdd_refs_pop(1);
+            if (new_low != low || new_high != high) return mtbdd_makenode(v, new_low, new_high);
+            return mtbdd;
+        }
+        case 3:
+        {
+            return mtbdd_false; // currently not implemented
+        }
+        default:
+            return mtbdd_false;
+        }
+    } else /* va > v */ {
+        switch (*cube) {
+        case 0:
+        {
+            MTBDD new_low = mtbdd_union_cube(mtbdd, node_gethigh(vars, nv), cube+1, terminal);
+            return mtbdd_makenode(v, new_low, mtbdd_false);
+        }
+        case 1:
+        {
+            MTBDD new_high = mtbdd_union_cube(mtbdd, node_gethigh(vars, nv), cube+1, terminal);
+            return mtbdd_makenode(v, mtbdd_false, new_high);
+        }
+        case 2:
+        {
+            SPAWN(mtbdd_union_cube, mtbdd, node_gethigh(vars, nv), cube+1, terminal);
+            MTBDD new_low = mtbdd_union_cube(mtbdd, node_gethigh(vars, nv), cube+1, terminal);
+            mtbdd_refs_push(new_low);
+            MTBDD new_high = SYNC(mtbdd_union_cube);
+            mtbdd_refs_pop(1);
+            return mtbdd_makenode(v, new_low, new_high);
+        }
+        case 3:
+        {
+            return mtbdd_false; // currently not implemented
+        }
+        default:
+            return mtbdd_false;
+        }
+    }
+}
+
+/**
+ * Apply a binary operation <op> to <a> and <b>.
+ */
+TASK_IMPL_3(MTBDD, mtbdd_apply, MTBDD, a, MTBDD, b, mtbdd_apply_op, op)
+{
+    /* Check terminal case */
+    MTBDD result = WRAP(op, &a, &b);
+    if (result != mtbdd_invalid) return result;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    if (cache_get3(CACHE_MTBDD_APPLY, a, b, (size_t)op, &result)) return result;
+
+    /* Get top variable */
+    int la = mtbdd_isleaf(a);
+    int lb = mtbdd_isleaf(b);
+    mtbddnode_t na, nb;
+    uint32_t va, vb;
+    if (!la) {
+        na = GETNODE(a);
+        va = mtbddnode_getvariable(na);
+    } else {
+        na = 0;
+        va = 0xffffffff;
+    }
+    if (!lb) {
+        nb = GETNODE(b);
+        vb = mtbddnode_getvariable(nb);
+    } else {
+        nb = 0;
+        vb = 0xffffffff;
+    }
+    uint32_t v = va < vb ? va : vb;
+
+    /* Get cofactors */
+    MTBDD alow, ahigh, blow, bhigh;
+    if (!la && va == v) {
+        alow = node_getlow(a, na);
+        ahigh = node_gethigh(a, na);
+    } else {
+        alow = a;
+        ahigh = a;
+    }
+    if (!lb && vb == v) {
+        blow = node_getlow(b, nb);
+        bhigh = node_gethigh(b, nb);
+    } else {
+        blow = b;
+        bhigh = b;
+    }
+
+    /* Recursive */
+    mtbdd_refs_spawn(SPAWN(mtbdd_apply, ahigh, bhigh, op));
+    MTBDD low = mtbdd_refs_push(CALL(mtbdd_apply, alow, blow, op));
+    MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_apply));
+    mtbdd_refs_pop(1);
+    result = mtbdd_makenode(v, low, high);
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_APPLY, a, b, (size_t)op, result);
+    return result;
+}
+
+/**
+ * Apply a binary operation <op> to <a> and <b> with parameter <p>
+ */
+TASK_IMPL_5(MTBDD, mtbdd_applyp, MTBDD, a, MTBDD, b, size_t, p, mtbdd_applyp_op, op, uint64_t, opid)
+{
+    /* Check terminal case */
+    MTBDD result = WRAP(op, &a, &b, p);
+    if (result != mtbdd_invalid) return result;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    if (cache_get3(opid, a, b, p, &result)) return result;
+
+    /* Get top variable */
+    int la = mtbdd_isleaf(a);
+    int lb = mtbdd_isleaf(b);
+    mtbddnode_t na, nb;
+    uint32_t va, vb;
+    if (!la) {
+        na = GETNODE(a);
+        va = mtbddnode_getvariable(na);
+    } else {
+        na = 0;
+        va = 0xffffffff;
+    }
+    if (!lb) {
+        nb = GETNODE(b);
+        vb = mtbddnode_getvariable(nb);
+    } else {
+        nb = 0;
+        vb = 0xffffffff;
+    }
+    uint32_t v = va < vb ? va : vb;
+
+    /* Get cofactors */
+    MTBDD alow, ahigh, blow, bhigh;
+    if (!la && va == v) {
+        alow = node_getlow(a, na);
+        ahigh = node_gethigh(a, na);
+    } else {
+        alow = a;
+        ahigh = a;
+    }
+    if (!lb && vb == v) {
+        blow = node_getlow(b, nb);
+        bhigh = node_gethigh(b, nb);
+    } else {
+        blow = b;
+        bhigh = b;
+    }
+
+    /* Recursive */
+    mtbdd_refs_spawn(SPAWN(mtbdd_applyp, ahigh, bhigh, p, op, opid));
+    MTBDD low = mtbdd_refs_push(CALL(mtbdd_applyp, alow, blow, p, op, opid));
+    MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_applyp));
+    mtbdd_refs_pop(1);
+    result = mtbdd_makenode(v, low, high);
+
+    /* Store in cache */
+    cache_put3(opid, a, b, p, result);
+    return result;
+}
+
+/**
+ * Apply a unary operation <op> to <dd>.
+ */
+TASK_IMPL_3(MTBDD, mtbdd_uapply, MTBDD, dd, mtbdd_uapply_op, op, size_t, param)
+{
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_UAPPLY, dd, (size_t)op, param, &result)) return result;
+
+    /* Check terminal case */
+    result = WRAP(op, dd, param);
+    if (result != mtbdd_invalid) {
+        /* Store in cache */
+        cache_put3(CACHE_MTBDD_UAPPLY, dd, (size_t)op, param, result);
+        return result;
+    }
+
+    /* Get cofactors */
+    mtbddnode_t ndd = GETNODE(dd);
+    MTBDD ddlow = node_getlow(dd, ndd);
+    MTBDD ddhigh = node_gethigh(dd, ndd);
+
+    /* Recursive */
+    mtbdd_refs_spawn(SPAWN(mtbdd_uapply, ddhigh, op, param));
+    MTBDD low = mtbdd_refs_push(CALL(mtbdd_uapply, ddlow, op, param));
+    MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_uapply));
+    mtbdd_refs_pop(1);
+    result = mtbdd_makenode(mtbddnode_getvariable(ndd), low, high);
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_UAPPLY, dd, (size_t)op, param, result);
+    return result;
+}
+
+TASK_2(MTBDD, mtbdd_uop_times_uint, MTBDD, a, size_t, k)
+{
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            int64_t v = mtbdd_getint64(a);
+            return mtbdd_int64(v*k);
+        } else if (mtbddnode_gettype(na) == 1) {
+            double d = mtbdd_getdouble(a);
+            return mtbdd_double(d*k);
+        } else if (mtbddnode_gettype(na) == 2) {
+            uint64_t v = mtbddnode_getvalue(na);
+            int64_t n = (int32_t)(v>>32);
+            uint32_t d = v;
+            uint32_t c = gcd(d, (uint32_t)k);
+            return mtbdd_fraction(n*(k/c), d/c);
+        }
+    }
+
+    return mtbdd_invalid;
+}
+
+TASK_2(MTBDD, mtbdd_uop_pow_uint, MTBDD, a, size_t, k)
+{
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            int64_t v = mtbdd_getint64(a);
+            return mtbdd_int64(pow(v, k));
+        } else if (mtbddnode_gettype(na) == 1) {
+            double d = mtbdd_getdouble(a);
+            return mtbdd_double(pow(d, k));
+        } else if (mtbddnode_gettype(na) == 2) {
+            uint64_t v = mtbddnode_getvalue(na);
+            return mtbdd_fraction(pow((int32_t)(v>>32), k), (uint32_t)v);
+        }
+    }
+
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_abstract_op_plus, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k==0) {
+        return mtbdd_apply(a, b, TASK(mtbdd_op_plus));
+    } else {
+        uint64_t factor = 1ULL<<k; // skip 1,2,3,4: times 2,4,8,16
+        return mtbdd_uapply(a, TASK(mtbdd_uop_times_uint), factor);
+    }
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_abstract_op_times, MTBDD, a, MTBDD, b, int, k)
+{
+    if (k==0) {
+        return mtbdd_apply(a, b, TASK(mtbdd_op_times));
+    } else {
+        uint64_t squares = 1ULL<<k; // square k times, ie res^(2^k): 2,4,8,16
+        return mtbdd_uapply(a, TASK(mtbdd_uop_pow_uint), squares);
+    }
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_abstract_op_min, MTBDD, a, MTBDD, b, int, k)
+{
+    return k == 0 ? mtbdd_apply(a, b, TASK(mtbdd_op_min)) : a;
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_abstract_op_max, MTBDD, a, MTBDD, b, int, k)
+{
+    return k == 0 ? mtbdd_apply(a, b, TASK(mtbdd_op_max)) : a;
+}
+
+/**
+ * Abstract the variables in <v> from <a> using the operation <op>
+ */
+TASK_IMPL_3(MTBDD, mtbdd_abstract, MTBDD, a, MTBDD, v, mtbdd_abstract_op, op)
+{
+    /* Check terminal case */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+    if (v == mtbdd_true) return a;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* a != constant, v != constant */
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        /* Count number of variables */
+        uint64_t k = 0;
+        while (v != mtbdd_true) {
+            k++;
+            v = node_gethigh(v, GETNODE(v));
+        }
+
+        /* Check cache */
+        MTBDD result;
+        if (cache_get3(CACHE_MTBDD_ABSTRACT, a, v | (k << 40), (size_t)op, &result)) return result;
+
+        /* Compute result */
+        result = WRAP(op, a, a, k);
+
+        /* Store in cache */
+        cache_put3(CACHE_MTBDD_ABSTRACT, a, v | (k << 40), (size_t)op, result);
+        return result;
+    }
+
+    /* Possibly skip k variables */
+    mtbddnode_t nv = GETNODE(v);
+    uint32_t var_a = mtbddnode_getvariable(na);
+    uint32_t var_v = mtbddnode_getvariable(nv);
+    uint64_t k = 0;
+    while (var_v < var_a) {
+        k++;
+        v = node_gethigh(v, nv);
+        if (v == mtbdd_true) break;
+        nv = GETNODE(v);
+        var_v = mtbddnode_getvariable(nv);
+    }
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_ABSTRACT, a, v | (k << 40), (size_t)op, &result)) return result;
+
+    /* Recursive */
+    if (v == mtbdd_true) {
+        result = a;
+    } else if (var_a < var_v) {
+        mtbdd_refs_spawn(SPAWN(mtbdd_abstract, node_gethigh(a, na), v, op));
+        MTBDD low = mtbdd_refs_push(CALL(mtbdd_abstract, node_getlow(a, na), v, op));
+        MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_abstract));
+        mtbdd_refs_pop(1);
+        result = mtbdd_makenode(var_a, low, high);
+    } else /* var_a == var_v */ {
+        mtbdd_refs_spawn(SPAWN(mtbdd_abstract, node_gethigh(a, na), node_gethigh(v, nv), op));
+        MTBDD low = mtbdd_refs_push(CALL(mtbdd_abstract, node_getlow(a, na), node_gethigh(v, nv), op));
+        MTBDD high = mtbdd_refs_push(mtbdd_refs_sync(SYNC(mtbdd_abstract)));
+        result = WRAP(op, low, high, 0);
+        mtbdd_refs_pop(2);
+    }
+
+    if (k) {
+        mtbdd_refs_push(result);
+        result = WRAP(op, result, result, k);
+        mtbdd_refs_pop(1);
+    }
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_ABSTRACT, a, v | (k << 40), (size_t)op, result);
+    return result;
+}
+
+/**
+ * Binary operation Plus (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDDs, mtbdd_false is interpreted as "0" or "0.0".
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_plus, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false) return b;
+    if (b == mtbdd_false) return a;
+
+    // Handle Boolean MTBDDs: interpret as Or
+    if (a == mtbdd_true) return mtbdd_true;
+    if (b == mtbdd_true) return mtbdd_true;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // both integer
+            return mtbdd_int64(*(int64_t*)(&val_a) + *(int64_t*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            return mtbdd_double(*(double*)(&val_a) + *(double*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            int64_t nom_a = (int32_t)(val_a>>32);
+            int64_t nom_b = (int32_t)(val_b>>32);
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // common cases
+            if (nom_a == 0) return b;
+            if (nom_b == 0) return a;
+            // equalize denominators
+            uint32_t c = gcd(denom_a, denom_b);
+            nom_a *= denom_b/c;
+            nom_b *= denom_a/c;
+            denom_a *= denom_b/c;
+            // add
+            return mtbdd_fraction(nom_a + nom_b, denom_a);
+        }
+    }
+
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Minus (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDDs, mtbdd_false is interpreted as "0" or "0.0".
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_minus, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false) return mtbdd_negate(b);
+    if (b == mtbdd_false) return a;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // both integer
+            return mtbdd_int64(*(int64_t*)(&val_a) - *(int64_t*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            return mtbdd_double(*(double*)(&val_a) - *(double*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            int64_t nom_a = (int32_t)(val_a>>32);
+            int64_t nom_b = (int32_t)(val_b>>32);
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // common cases
+            if (nom_b == 0) return a;
+            // equalize denominators
+            uint32_t c = gcd(denom_a, denom_b);
+            nom_a *= denom_b/c;
+            nom_b *= denom_a/c;
+            denom_a *= denom_b/c;
+            // subtract
+            return mtbdd_fraction(nom_a - nom_b, denom_a);
+        }
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Times (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_times, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false || b == mtbdd_false) return mtbdd_false;
+
+    // Handle Boolean MTBDDs: interpret as And
+    if (a == mtbdd_true) return b;
+    if (b == mtbdd_true) return a;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // both integer
+            return mtbdd_int64(*(int64_t*)(&val_a) * *(int64_t*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            return mtbdd_double(*(double*)(&val_a) * *(double*)(&val_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            int64_t nom_a = (int32_t)(val_a>>32);
+            int64_t nom_b = (int32_t)(val_b>>32);
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // multiply!
+            uint32_t c = gcd(nom_b < 0 ? -nom_b : nom_b, denom_a);
+            uint32_t d = gcd(nom_a < 0 ? -nom_a : nom_a, denom_b);
+            nom_a /= d;
+            denom_a /= c;
+            nom_a *= (nom_b/c);
+            denom_a *= (denom_b/d);
+            return mtbdd_fraction(nom_a, denom_a);
+        }
+    }
+
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Minimum (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_min, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_true) return b;
+    if (b == mtbdd_true) return a;
+    if (a == b) return a;
+
+    // Special case where "false" indicates a partial function
+    if (a == mtbdd_false && b != mtbdd_false && mtbddnode_isleaf(GETNODE(b))) return b;
+    if (b == mtbdd_false && a != mtbdd_false && mtbddnode_isleaf(GETNODE(a))) return a;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // both integer
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+            return va < vb ? a : b;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double va = *(double*)&val_a;
+            double vb = *(double*)&val_b;
+            return va < vb ? a : b;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            int64_t nom_a = (int32_t)(val_a>>32);
+            int64_t nom_b = (int32_t)(val_b>>32);
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(denom_a, denom_b);
+            nom_a *= denom_b/c;
+            nom_b *= denom_a/c;
+            // compute lowest
+            return nom_a < nom_b ? a : b;
+        }
+    }
+
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Maximum (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_max, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_true) return a;
+    if (b == mtbdd_true) return b;
+    if (a == mtbdd_false) return b;
+    if (b == mtbdd_false) return a;
+    if (a == b) return a;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // both integer
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+            return va > vb ? a : b;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            return vval_a > vval_b ? a : b;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            int64_t nom_a = (int32_t)(val_a>>32);
+            int64_t nom_b = (int32_t)(val_b>>32);
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(denom_a, denom_b);
+            nom_a *= denom_b/c;
+            nom_b *= denom_a/c;
+            // compute highest
+            return nom_a > nom_b ? a : b;
+        }
+    }
+
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_negate, MTBDD, a, size_t, k)
+{
+    // if a is false, then it is a partial function. Keep partial!
+    if (a == mtbdd_false) return mtbdd_false;
+
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            int64_t v = mtbdd_getint64(a);
+            return mtbdd_int64(-v);
+        } else if (mtbddnode_gettype(na) == 1) {
+            double d = mtbdd_getdouble(a);
+            return mtbdd_double(-d);
+        } else if (mtbddnode_gettype(na) == 2) {
+            uint64_t v = mtbddnode_getvalue(na);
+            return mtbdd_fraction(-(int32_t)(v>>32), (uint32_t)v);
+        }
+    }
+
+    return mtbdd_invalid;
+    (void)k; // unused variable
+}
+
+/**
+ * Compute IF <f> THEN <g> ELSE <h>.
+ * <f> must be a Boolean MTBDD (or standard BDD).
+ */
+TASK_IMPL_3(MTBDD, mtbdd_ite, MTBDD, f, MTBDD, g, MTBDD, h)
+{
+    /* Terminal cases */
+    if (f == mtbdd_true) return g;
+    if (f == mtbdd_false) return h;
+    if (g == h) return g;
+    if (g == mtbdd_true && h == mtbdd_false) return f;
+    if (h == mtbdd_true && g == mtbdd_false) return MTBDD_TOGGLEMARK(f);
+
+    // If all MTBDD's are Boolean, then there could be further optimizations (see sylvan_bdd.c)
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_ITE, f, g, h, &result)) return result;
+
+    /* Get top variable */
+    int lg = mtbdd_isleaf(g);
+    int lh = mtbdd_isleaf(h);
+    mtbddnode_t nf = GETNODE(f);
+    mtbddnode_t ng = lg ? 0 : GETNODE(g);
+    mtbddnode_t nh = lh ? 0 : GETNODE(h);
+    uint32_t vf = mtbddnode_getvariable(nf);
+    uint32_t vg = lg ? 0 : mtbddnode_getvariable(ng);
+    uint32_t vh = lh ? 0 : mtbddnode_getvariable(nh);
+    uint32_t v = vf;
+    if (!lg && vg < v) v = vg;
+    if (!lh && vh < v) v = vh;
+
+    /* Get cofactors */
+    MTBDD flow, fhigh, glow, ghigh, hlow, hhigh;
+    flow = (vf == v) ? node_getlow(f, nf) : f;
+    fhigh = (vf == v) ? node_gethigh(f, nf) : f;
+    glow = (!lg && vg == v) ? node_getlow(g, ng) : g;
+    ghigh = (!lg && vg == v) ? node_gethigh(g, ng) : g;
+    hlow = (!lh && vh == v) ? node_getlow(h, nh) : h;
+    hhigh = (!lh && vh == v) ? node_gethigh(h, nh) : h;
+
+    /* Recursive calls */
+    mtbdd_refs_spawn(SPAWN(mtbdd_ite, fhigh, ghigh, hhigh));
+    MTBDD low = mtbdd_refs_push(CALL(mtbdd_ite, flow, glow, hlow));
+    MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_ite));
+    mtbdd_refs_pop(1);
+    result = mtbdd_makenode(v, low, high);
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_ITE, f, g, h, result);
+    return result;
+}
+
+/**
+ * Monad that converts double/fraction to a Boolean MTBDD, translate terminals >= value to 1 and to 0 otherwise;
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_threshold_double, MTBDD, a, size_t, svalue)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_invalid;
+
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        double value = *(double*)&svalue;
+        if (mtbddnode_gettype(na) == 1) {
+            return mtbdd_getdouble(a) >= value ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2) {
+            double d = (double)mtbdd_getnumer(a);
+            d /= mtbdd_getdenom(a);
+            return d >= value ? mtbdd_true : mtbdd_false;
+        }
+    }
+
+    return mtbdd_invalid;
+}
+
+/**
+ * Monad that converts double/fraction to a Boolean BDD, translate terminals > value to 1 and to 0 otherwise;
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_strict_threshold_double, MTBDD, a, size_t, svalue)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_invalid;
+
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+
+    if (mtbddnode_isleaf(na)) {
+        double value = *(double*)&svalue;
+        if (mtbddnode_gettype(na) == 1) {
+            return mtbdd_getdouble(a) > value ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2) {
+            double d = (double)mtbdd_getnumer(a);
+            d /= mtbdd_getdenom(a);
+            return d > value ? mtbdd_true : mtbdd_false;
+        }
+    }
+
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_threshold_double, MTBDD, dd, double, d)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_threshold_double), *(size_t*)&d);
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_strict_threshold_double, MTBDD, dd, double, d)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_strict_threshold_double), *(size_t*)&d);
+}
+
+/**
+ * Compare two Double MTBDDs, returns Boolean True if they are equal within some value epsilon
+ */
+TASK_4(MTBDD, mtbdd_equal_norm_d2, MTBDD, a, MTBDD, b, size_t, svalue, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_true;
+    if (a == mtbdd_false) return mtbdd_false;
+    if (b == mtbdd_false) return mtbdd_false;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        // assume Double MTBDD
+        double va = mtbdd_getdouble(a);
+        double vb = mtbdd_getdouble(b);
+        va -= vb;
+        if (va < 0) va = -va;
+        return (va < *(double*)&svalue) ? mtbdd_true : mtbdd_false;
+    }
+
+    if (b < a) {
+        MTBDD t = a;
+        a = b;
+        b = t;
+    }
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_EQUAL_NORM, a, b, svalue, &result)) return result;
+
+    /* Get top variable */
+    uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+    uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+    uint32_t var = va < vb ? va : vb;
+
+    /* Get cofactors */
+    MTBDD alow, ahigh, blow, bhigh;
+    alow  = va == var ? node_getlow(a, na)  : a;
+    ahigh = va == var ? node_gethigh(a, na) : a;
+    blow  = vb == var ? node_getlow(b, nb)  : b;
+    bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+    SPAWN(mtbdd_equal_norm_d2, ahigh, bhigh, svalue, shortcircuit);
+    result = CALL(mtbdd_equal_norm_d2, alow, blow, svalue, shortcircuit);
+    if (result == mtbdd_false) *shortcircuit = 1;
+    if (result != SYNC(mtbdd_equal_norm_d2)) result = mtbdd_false;
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_EQUAL_NORM, a, b, svalue, result);
+    return result;
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_equal_norm_d, MTBDD, a, MTBDD, b, double, d)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_equal_norm_d2, a, b, *(size_t*)&d, &shortcircuit);
+}
+
+/**
+ * Compare two Double MTBDDs, returns Boolean True if they are equal within some value epsilon
+ * This version computes the relative difference vs the value in a.
+ */
+TASK_4(MTBDD, mtbdd_equal_norm_rel_d2, MTBDD, a, MTBDD, b, size_t, svalue, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_true;
+    if (a == mtbdd_false) return mtbdd_false;
+    if (b == mtbdd_false) return mtbdd_false;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        // assume Double MTBDD
+        double va = mtbdd_getdouble(a);
+        double vb = mtbdd_getdouble(b);
+        if (va == 0) return mtbdd_false;
+        va = (va - vb) / va;
+        if (va < 0) va = -va;
+        return (va < *(double*)&svalue) ? mtbdd_true : mtbdd_false;
+    }
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_EQUAL_NORM_REL, a, b, svalue, &result)) return result;
+
+    /* Get top variable */
+    uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+    uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+    uint32_t var = va < vb ? va : vb;
+
+    /* Get cofactors */
+    MTBDD alow, ahigh, blow, bhigh;
+    alow  = va == var ? node_getlow(a, na)  : a;
+    ahigh = va == var ? node_gethigh(a, na) : a;
+    blow  = vb == var ? node_getlow(b, nb)  : b;
+    bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+    SPAWN(mtbdd_equal_norm_rel_d2, ahigh, bhigh, svalue, shortcircuit);
+    result = CALL(mtbdd_equal_norm_rel_d2, alow, blow, svalue, shortcircuit);
+    if (result == mtbdd_false) *shortcircuit = 1;
+    if (result != SYNC(mtbdd_equal_norm_rel_d2)) result = mtbdd_false;
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_EQUAL_NORM_REL, a, b, svalue, result);
+    return result;
+}
+
+TASK_IMPL_3(MTBDD, mtbdd_equal_norm_rel_d, MTBDD, a, MTBDD, b, double, d)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_equal_norm_rel_d2, a, b, *(size_t*)&d, &shortcircuit);
+}
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) <= b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_3(MTBDD, mtbdd_leq_rec, MTBDD, a, MTBDD, b, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_true;
+
+    /* For partial functions, just return true */
+    if (a == mtbdd_false) return mtbdd_true;
+    if (b == mtbdd_false) return mtbdd_true;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_LEQ, a, b, 0, &result)) return result;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        uint64_t va = mtbddnode_getvalue(na);
+        uint64_t vb = mtbddnode_getvalue(nb);
+
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // type 0 = integer
+            result = *(int64_t*)(&va) <= *(int64_t*)(&vb) ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // type 1 = double
+            double vva = *(double*)&va;
+            double vvb = *(double*)&vb;
+            result = vva <= vvb ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // type 2 = fraction
+            int64_t nom_a = (int32_t)(va>>32);
+            int64_t nom_b = (int32_t)(vb>>32);
+            uint64_t da = va&0xffffffff;
+            uint64_t db = vb&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(da, db);
+            nom_a *= db/c;
+            nom_b *= da/c;
+            result = nom_a <= nom_b ? mtbdd_true : mtbdd_false;
+        }
+    } else {
+        /* Get top variable */
+        uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+        uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+        uint32_t var = va < vb ? va : vb;
+
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = va == var ? node_getlow(a, na)  : a;
+        ahigh = va == var ? node_gethigh(a, na) : a;
+        blow  = vb == var ? node_getlow(b, nb)  : b;
+        bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+        SPAWN(mtbdd_leq_rec, ahigh, bhigh, shortcircuit);
+        result = CALL(mtbdd_leq_rec, alow, blow, shortcircuit);
+        if (result != SYNC(mtbdd_leq_rec)) result = mtbdd_false;
+    }
+
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_LEQ, a, b, 0, result);
+    return result;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_leq, MTBDD, a, MTBDD, b)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_leq_rec, a, b, &shortcircuit);
+}
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) < b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_3(MTBDD, mtbdd_less_rec, MTBDD, a, MTBDD, b, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_false;
+
+    /* For partial functions, just return true */
+    if (a == mtbdd_false) return mtbdd_true;
+    if (b == mtbdd_false) return mtbdd_true;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_LESS, a, b, 0, &result)) return result;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        uint64_t va = mtbddnode_getvalue(na);
+        uint64_t vb = mtbddnode_getvalue(nb);
+
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // type 0 = integer
+            result = *(int64_t*)(&va) < *(int64_t*)(&vb) ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // type 1 = double
+            double vva = *(double*)&va;
+            double vvb = *(double*)&vb;
+            result = vva < vvb ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // type 2 = fraction
+            int64_t nom_a = (int32_t)(va>>32);
+            int64_t nom_b = (int32_t)(vb>>32);
+            uint64_t da = va&0xffffffff;
+            uint64_t db = vb&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(da, db);
+            nom_a *= db/c;
+            nom_b *= da/c;
+            result = nom_a < nom_b ? mtbdd_true : mtbdd_false;
+        }
+    } else {
+        /* Get top variable */
+        uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+        uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+        uint32_t var = va < vb ? va : vb;
+
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = va == var ? node_getlow(a, na)  : a;
+        ahigh = va == var ? node_gethigh(a, na) : a;
+        blow  = vb == var ? node_getlow(b, nb)  : b;
+        bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+        SPAWN(mtbdd_less_rec, ahigh, bhigh, shortcircuit);
+        result = CALL(mtbdd_less_rec, alow, blow, shortcircuit);
+        if (result != SYNC(mtbdd_less_rec)) result = mtbdd_false;
+    }
+
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_LESS, a, b, 0, result);
+    return result;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_less, MTBDD, a, MTBDD, b)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_less_rec, a, b, &shortcircuit);
+}
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) >= b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_3(MTBDD, mtbdd_geq_rec, MTBDD, a, MTBDD, b, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_true;
+
+    /* For partial functions, just return true */
+    if (a == mtbdd_false) return mtbdd_true;
+    if (b == mtbdd_false) return mtbdd_true;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_GEQ, a, b, 0, &result)) return result;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        uint64_t va = mtbddnode_getvalue(na);
+        uint64_t vb = mtbddnode_getvalue(nb);
+
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // type 0 = integer
+            result = *(int64_t*)(&va) >= *(int64_t*)(&vb) ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // type 1 = double
+            double vva = *(double*)&va;
+            double vvb = *(double*)&vb;
+            result = vva >= vvb ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // type 2 = fraction
+            int64_t nom_a = (int32_t)(va>>32);
+            int64_t nom_b = (int32_t)(vb>>32);
+            uint64_t da = va&0xffffffff;
+            uint64_t db = vb&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(da, db);
+            nom_a *= db/c;
+            nom_b *= da/c;
+            result = nom_a >= nom_b ? mtbdd_true : mtbdd_false;
+        }
+    } else {
+        /* Get top variable */
+        uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+        uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+        uint32_t var = va < vb ? va : vb;
+
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = va == var ? node_getlow(a, na)  : a;
+        ahigh = va == var ? node_gethigh(a, na) : a;
+        blow  = vb == var ? node_getlow(b, nb)  : b;
+        bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+        SPAWN(mtbdd_geq_rec, ahigh, bhigh, shortcircuit);
+        result = CALL(mtbdd_geq_rec, alow, blow, shortcircuit);
+        if (result != SYNC(mtbdd_geq_rec)) result = mtbdd_false;
+    }
+
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_GEQ, a, b, 0, result);
+    return result;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_geq, MTBDD, a, MTBDD, b)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_geq_rec, a, b, &shortcircuit);
+}
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) > b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_3(MTBDD, mtbdd_greater_rec, MTBDD, a, MTBDD, b, int*, shortcircuit)
+{
+    /* Check short circuit */
+    if (*shortcircuit) return mtbdd_false;
+
+    /* Check terminal case */
+    if (a == b) return mtbdd_false;
+
+    /* For partial functions, just return true */
+    if (a == mtbdd_false) return mtbdd_true;
+    if (b == mtbdd_false) return mtbdd_true;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_GREATER, a, b, 0, &result)) return result;
+
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    int la = mtbddnode_isleaf(na);
+    int lb = mtbddnode_isleaf(nb);
+
+    if (la && lb) {
+        uint64_t va = mtbddnode_getvalue(na);
+        uint64_t vb = mtbddnode_getvalue(nb);
+
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            // type 0 = integer
+            result = *(int64_t*)(&va) > *(int64_t*)(&vb) ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // type 1 = double
+            double vva = *(double*)&va;
+            double vvb = *(double*)&vb;
+            result = vva > vvb ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // type 2 = fraction
+            int64_t nom_a = (int32_t)(va>>32);
+            int64_t nom_b = (int32_t)(vb>>32);
+            uint64_t da = va&0xffffffff;
+            uint64_t db = vb&0xffffffff;
+            // equalize denominators
+            uint32_t c = gcd(da, db);
+            nom_a *= db/c;
+            nom_b *= da/c;
+            result = nom_a > nom_b ? mtbdd_true : mtbdd_false;
+        }
+    } else {
+        /* Get top variable */
+        uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+        uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+        uint32_t var = va < vb ? va : vb;
+
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = va == var ? node_getlow(a, na)  : a;
+        ahigh = va == var ? node_gethigh(a, na) : a;
+        blow  = vb == var ? node_getlow(b, nb)  : b;
+        bhigh = vb == var ? node_gethigh(b, nb) : b;
+
+        SPAWN(mtbdd_greater_rec, ahigh, bhigh, shortcircuit);
+        result = CALL(mtbdd_greater_rec, alow, blow, shortcircuit);
+        if (result != SYNC(mtbdd_greater_rec)) result = mtbdd_false;
+    }
+
+    if (result == mtbdd_false) *shortcircuit = 1;
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_GREATER, a, b, 0, result);
+    return result;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_greater, MTBDD, a, MTBDD, b)
+{
+    /* the implementation checks shortcircuit in every task and if the two
+       MTBDDs are not equal module epsilon, then the computation tree quickly aborts */
+    int shortcircuit = 0;
+    return CALL(mtbdd_greater_rec, a, b, &shortcircuit);
+}
+
+/**
+ * Multiply <a> and <b>, and abstract variables <vars> using summation.
+ * This is similar to the "and_exists" operation in BDDs.
+ */
+TASK_IMPL_3(MTBDD, mtbdd_and_exists, MTBDD, a, MTBDD, b, MTBDD, v)
+{
+    /* Check terminal case */
+    if (v == mtbdd_true) return mtbdd_apply(a, b, TASK(mtbdd_op_times));
+    MTBDD result = CALL(mtbdd_op_times, &a, &b);
+    if (result != mtbdd_invalid) {
+        mtbdd_refs_push(result);
+        result = mtbdd_abstract(result, v, TASK(mtbdd_abstract_op_plus));
+        mtbdd_refs_pop(1);
+        return result;
+    }
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    if (cache_get3(CACHE_MTBDD_AND_EXISTS, a, b, v, &result)) return result;
+
+    /* Now, v is not a constant, and either a or b is not a constant */
+
+    /* Get top variable */
+    int la = mtbdd_isleaf(a);
+    int lb = mtbdd_isleaf(b);
+    mtbddnode_t na = la ? 0 : GETNODE(a);
+    mtbddnode_t nb = lb ? 0 : GETNODE(b);
+    uint32_t va = la ? 0xffffffff : mtbddnode_getvariable(na);
+    uint32_t vb = lb ? 0xffffffff : mtbddnode_getvariable(nb);
+    uint32_t var = va < vb ? va : vb;
+
+    mtbddnode_t nv = GETNODE(v);
+    uint32_t vv = mtbddnode_getvariable(nv);
+
+    if (vv < var) {
+        /* Recursive, then abstract result */
+        result = CALL(mtbdd_and_exists, a, b, node_gethigh(v, nv));
+        mtbdd_refs_push(result);
+        result = mtbdd_apply(result, result, TASK(mtbdd_op_plus));
+        mtbdd_refs_pop(1);
+    } else {
+        /* Get cofactors */
+        MTBDD alow, ahigh, blow, bhigh;
+        alow  = (!la && va == var) ? node_getlow(a, na)  : a;
+        ahigh = (!la && va == var) ? node_gethigh(a, na) : a;
+        blow  = (!lb && vb == var) ? node_getlow(b, nb)  : b;
+        bhigh = (!lb && vb == var) ? node_gethigh(b, nb) : b;
+
+        if (vv == var) {
+            /* Recursive, then abstract result */
+            mtbdd_refs_spawn(SPAWN(mtbdd_and_exists, ahigh, bhigh, node_gethigh(v, nv)));
+            MTBDD low = mtbdd_refs_push(CALL(mtbdd_and_exists, alow, blow, node_gethigh(v, nv)));
+            MTBDD high = mtbdd_refs_push(mtbdd_refs_sync(SYNC(mtbdd_and_exists)));
+            result = CALL(mtbdd_apply, low, high, TASK(mtbdd_op_plus));
+            mtbdd_refs_pop(2);
+        } else /* vv > v */ {
+            /* Recursive, then create node */
+            mtbdd_refs_spawn(SPAWN(mtbdd_and_exists, ahigh, bhigh, v));
+            MTBDD low = mtbdd_refs_push(CALL(mtbdd_and_exists, alow, blow, v));
+            MTBDD high = mtbdd_refs_sync(SYNC(mtbdd_and_exists));
+            mtbdd_refs_pop(1);
+            result = mtbdd_makenode(var, low, high);
+        }
+    }
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_AND_EXISTS, a, b, v, result);
+    return result;
+}
+
+/**
+ * Calculate the support of a MTBDD, i.e. the cube of all variables that appear in the MTBDD nodes.
+ */
+TASK_IMPL_1(MTBDD, mtbdd_support, MTBDD, dd)
+{
+    /* Terminal case */
+    if (mtbdd_isleaf(dd)) return mtbdd_true;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_SUPPORT, dd, 0, 0, &result)) return result;
+
+    /* Recursive calls */
+    mtbddnode_t n = GETNODE(dd);
+    mtbdd_refs_spawn(SPAWN(mtbdd_support, node_getlow(dd, n)));
+    MTBDD high = mtbdd_refs_push(CALL(mtbdd_support, node_gethigh(dd, n)));
+    MTBDD low = mtbdd_refs_push(mtbdd_refs_sync(SYNC(mtbdd_support)));
+
+    /* Compute result */
+    result = mtbdd_makenode(mtbddnode_getvariable(n), mtbdd_false, mtbdd_times(low, high));
+    mtbdd_refs_pop(2);
+
+    /* Write to cache */
+    cache_put3(CACHE_MTBDD_SUPPORT, dd, 0, 0, result);
+    return result;
+}
+
+/**
+ * Function composition, for each node with variable <key> which has a <key,value> pair in <map>,
+ * replace the node by the result of mtbdd_ite(<value>, <low>, <high>).
+ * Each <value> in <map> must be a Boolean MTBDD.
+ */
+TASK_IMPL_2(MTBDD, mtbdd_compose, MTBDD, a, MTBDDMAP, map)
+{
+    /* Terminal case */
+    if (mtbdd_isleaf(a) || mtbdd_map_isempty(map)) return a;
+
+    /* Determine top level */
+    mtbddnode_t n = GETNODE(a);
+    uint32_t v = mtbddnode_getvariable(n);
+
+    /* Find in map */
+    while (mtbdd_map_key(map) < v) {
+        map = mtbdd_map_next(map);
+        if (mtbdd_map_isempty(map)) return a;
+    }
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_COMPOSE, a, map, 0, &result)) return result;
+
+    /* Recursive calls */
+    mtbdd_refs_spawn(SPAWN(mtbdd_compose, node_getlow(a, n), map));
+    MTBDD high = mtbdd_refs_push(CALL(mtbdd_compose, node_gethigh(a, n), map));
+    MTBDD low = mtbdd_refs_push(mtbdd_refs_sync(SYNC(mtbdd_compose)));
+
+    /* Calculate result */
+    MTBDD r = mtbdd_map_key(map) == v ? mtbdd_map_value(map) : mtbdd_makenode(v, mtbdd_false, mtbdd_true);
+    mtbdd_refs_push(r);
+    result = CALL(mtbdd_ite, r, high, low);
+    mtbdd_refs_pop(3);
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_COMPOSE, a, map, 0, result);
+    return result;
+}
+
+/**
+ * Compute minimum leaf in the MTBDD (for Integer, Double, Rational MTBDDs)
+ */
+TASK_IMPL_1(MTBDD, mtbdd_minimum, MTBDD, a)
+{
+    /* Check terminal case */
+    if (a == mtbdd_false) return mtbdd_false;
+    mtbddnode_t na = GETNODE(a);
+    if (mtbddnode_isleaf(na)) return a;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_MINIMUM, a, 0, 0, &result)) return result;
+
+    /* Call recursive */
+    SPAWN(mtbdd_minimum, node_getlow(a, na));
+    MTBDD high = CALL(mtbdd_minimum, node_gethigh(a, na));
+    MTBDD low = SYNC(mtbdd_minimum);
+
+    /* Determine lowest */
+    mtbddnode_t nl = GETNODE(low);
+    mtbddnode_t nh = GETNODE(high);
+
+    if (mtbddnode_gettype(nl) == 0 && mtbddnode_gettype(nh) == 0) {
+        result = mtbdd_getint64(low) < mtbdd_getint64(high) ? low : high;
+    } else if (mtbddnode_gettype(nl) == 1 && mtbddnode_gettype(nh) == 1) {
+        result = mtbdd_getdouble(low) < mtbdd_getdouble(high) ? low : high;
+    } else if (mtbddnode_gettype(nl) == 2 && mtbddnode_gettype(nh) == 2) {
+        // type 2 = fraction
+        int64_t nom_l = mtbdd_getnumer(low);
+        int64_t nom_h = mtbdd_getnumer(high);
+        uint64_t denom_l = mtbdd_getdenom(low);
+        uint64_t denom_h = mtbdd_getdenom(high);
+        // equalize denominators
+        uint32_t c = gcd(denom_l, denom_h);
+        nom_l *= denom_h/c;
+        nom_h *= denom_l/c;
+        result = nom_l < nom_h ? low : high;
+    }
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_MINIMUM, a, 0, 0, result);
+    return result;
+}
+
+/**
+ * Compute maximum leaf in the MTBDD (for Integer, Double, Rational MTBDDs)
+ */
+TASK_IMPL_1(MTBDD, mtbdd_maximum, MTBDD, a)
+{
+    /* Check terminal case */
+    if (a == mtbdd_false) return mtbdd_false;
+    mtbddnode_t na = GETNODE(a);
+    if (mtbddnode_isleaf(na)) return a;
+
+    /* Maybe perform garbage collection */
+    sylvan_gc_test();
+
+    /* Check cache */
+    MTBDD result;
+    if (cache_get3(CACHE_MTBDD_MAXIMUM, a, 0, 0, &result)) return result;
+
+    /* Call recursive */
+    SPAWN(mtbdd_maximum, node_getlow(a, na));
+    MTBDD high = CALL(mtbdd_maximum, node_gethigh(a, na));
+    MTBDD low = SYNC(mtbdd_maximum);
+
+    /* Determine highest */
+    mtbddnode_t nl = GETNODE(low);
+    mtbddnode_t nh = GETNODE(high);
+
+    if (mtbddnode_gettype(nl) == 0 && mtbddnode_gettype(nh) == 0) {
+        result = mtbdd_getint64(low) > mtbdd_getint64(high) ? low : high;
+    } else if (mtbddnode_gettype(nl) == 1 && mtbddnode_gettype(nh) == 1) {
+        result = mtbdd_getdouble(low) > mtbdd_getdouble(high) ? low : high;
+    } else if (mtbddnode_gettype(nl) == 2 && mtbddnode_gettype(nh) == 2) {
+        // type 2 = fraction
+        int64_t nom_l = mtbdd_getnumer(low);
+        int64_t nom_h = mtbdd_getnumer(high);
+        uint64_t denom_l = mtbdd_getdenom(low);
+        uint64_t denom_h = mtbdd_getdenom(high);
+        // equalize denominators
+        uint32_t c = gcd(denom_l, denom_h);
+        nom_l *= denom_h/c;
+        nom_h *= denom_l/c;
+        result = nom_l > nom_h ? low : high;
+    }
+
+    /* Store in cache */
+    cache_put3(CACHE_MTBDD_MAXIMUM, a, 0, 0, result);
+    return result;
+}
+
+/**
+ * Calculate the number of satisfying variable assignments according to <variables>.
+ */
+TASK_IMPL_2(double, mtbdd_satcount, MTBDD, dd, size_t, nvars)
+{
+    /* Trivial cases */
+    if (dd == mtbdd_false) return 0.0;
+    if (mtbdd_isleaf(dd)) return powl(2.0L, nvars);
+
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+
+    union {
+        double d;
+        uint64_t s;
+    } hack;
+
+    /* Consult cache */
+    if (cache_get3(CACHE_BDD_SATCOUNT, dd, 0, nvars, &hack.s)) {
+        sylvan_stats_count(BDD_SATCOUNT_CACHED);
+        return hack.d;
+    }
+
+    SPAWN(mtbdd_satcount, mtbdd_gethigh(dd), nvars-1);
+    double low = CALL(mtbdd_satcount, mtbdd_getlow(dd), nvars-1);
+    hack.d = low + SYNC(mtbdd_satcount);
+
+    cache_put3(CACHE_BDD_SATCOUNT, dd, 0, nvars, hack.s);
+    return hack.d;
+}
+
+MTBDD
+mtbdd_enum_first(MTBDD dd, MTBDD variables, uint8_t *arr, mtbdd_enum_filter_cb filter_cb)
+{
+    if (dd == mtbdd_false) {
+        // the leaf dd is skipped
+        return mtbdd_false;
+    } else if (mtbdd_isleaf(dd)) {
+        // a leaf for which the filter returns 0 is skipped
+        if (filter_cb != NULL && filter_cb(dd) == 0) return mtbdd_false;
+        // ok, we have a leaf that is not skipped, go for it!
+        while (variables != mtbdd_true) {
+            *arr++ = 2;
+            variables = mtbdd_gethigh(variables);
+        }
+        return dd;
+    } else {
+        // if variables == true, then dd must be a leaf. But then this line is unreachable.
+        // if this assertion fails, then <variables> is not the support of <dd>.
+        assert(variables != mtbdd_true);
+
+        // get next variable from <variables>
+        uint32_t v = mtbdd_getvar(variables);
+        variables = mtbdd_gethigh(variables);
+
+        // check if MTBDD is on this variable
+        mtbddnode_t n = GETNODE(dd);
+        if (mtbddnode_getvariable(n) != v) {
+            *arr = 2;
+            return mtbdd_enum_first(dd, variables, arr+1, filter_cb);
+        }
+
+        // first maybe follow low
+        MTBDD res = mtbdd_enum_first(node_getlow(dd, n), variables, arr+1, filter_cb);
+        if (res != mtbdd_false) {
+            *arr = 0;
+            return res;
+        }
+
+        // if not low, try following high
+        res = mtbdd_enum_first(node_gethigh(dd, n), variables, arr+1, filter_cb);
+        if (res != mtbdd_false) {
+            *arr = 1;
+            return res;
+        }
+        
+        // we've tried low and high, return false
+        return mtbdd_false;
+    }
+}
+
+MTBDD
+mtbdd_enum_next(MTBDD dd, MTBDD variables, uint8_t *arr, mtbdd_enum_filter_cb filter_cb)
+{
+    if (mtbdd_isleaf(dd)) {
+        // we find the leaf in 'enum_next', then we've seen it before...
+        return mtbdd_false;
+    } else {
+        // if variables == true, then dd must be a leaf. But then this line is unreachable.
+        // if this assertion fails, then <variables> is not the support of <dd>.
+        assert(variables != mtbdd_true);
+
+        variables = mtbdd_gethigh(variables);
+
+        if (*arr == 0) {
+            // previous was low
+            mtbddnode_t n = GETNODE(dd);
+            MTBDD res = mtbdd_enum_next(node_getlow(dd, n), variables, arr+1, filter_cb);
+            if (res != mtbdd_false) {
+                return res;
+            } else {
+                // try to find new in high branch
+                res = mtbdd_enum_first(node_gethigh(dd, n), variables, arr+1, filter_cb);
+                if (res != mtbdd_false) {
+                    *arr = 1;
+                    return res;
+                } else {
+                    return mtbdd_false;
+                }
+            }
+        } else if (*arr == 1) {
+            // previous was high
+            mtbddnode_t n = GETNODE(dd);
+            return mtbdd_enum_next(node_gethigh(dd, n), variables, arr+1, filter_cb);
+        } else {
+            // previous was either
+            return mtbdd_enum_next(dd, variables, arr+1, filter_cb);
+        }
+    }
+}
+
+/**
+ * Helper function for recursive unmarking
+ */
+static void
+mtbdd_unmark_rec(MTBDD mtbdd)
+{
+    mtbddnode_t n = GETNODE(mtbdd);
+    if (!mtbddnode_getmark(n)) return;
+    mtbddnode_setmark(n, 0);
+    if (mtbddnode_isleaf(n)) return;
+    mtbdd_unmark_rec(mtbddnode_getlow(n));
+    mtbdd_unmark_rec(mtbddnode_gethigh(n));
+}
+
+/**
+ * Count number of leaves in MTBDD
+ */
+
+static size_t
+mtbdd_leafcount_mark(MTBDD mtbdd)
+{
+    if (mtbdd == mtbdd_true) return 0; // do not count true/false leaf
+    if (mtbdd == mtbdd_false) return 0; // do not count true/false leaf
+    mtbddnode_t n = GETNODE(mtbdd);
+    if (mtbddnode_getmark(n)) return 0;
+    mtbddnode_setmark(n, 1);
+    if (mtbddnode_isleaf(n)) return 1; // count leaf as 1
+    return mtbdd_leafcount_mark(mtbddnode_getlow(n)) + mtbdd_leafcount_mark(mtbddnode_gethigh(n));
+}
+
+size_t
+mtbdd_leafcount(MTBDD mtbdd)
+{
+    size_t result = mtbdd_leafcount_mark(mtbdd);
+    mtbdd_unmark_rec(mtbdd);
+    return result;
+}
+
+/**
+ * Count number of nodes in MTBDD
+ */
+
+static size_t
+mtbdd_nodecount_mark(MTBDD mtbdd)
+{
+    if (mtbdd == mtbdd_true) return 0; // do not count true/false leaf
+    if (mtbdd == mtbdd_false) return 0; // do not count true/false leaf
+    mtbddnode_t n = GETNODE(mtbdd);
+    if (mtbddnode_getmark(n)) return 0;
+    mtbddnode_setmark(n, 1);
+    if (mtbddnode_isleaf(n)) return 1; // count leaf as 1
+    return 1 + mtbdd_nodecount_mark(mtbddnode_getlow(n)) + mtbdd_nodecount_mark(mtbddnode_gethigh(n));
+}
+
+size_t
+mtbdd_nodecount(MTBDD mtbdd)
+{
+    size_t result = mtbdd_nodecount_mark(mtbdd);
+    mtbdd_unmark_rec(mtbdd);
+    return result;
+}
+
+TASK_2(int, mtbdd_test_isvalid_rec, MTBDD, dd, uint32_t, parent_var)
+{
+    // check if True/False leaf
+    if (dd == mtbdd_true || dd == mtbdd_false) return 1;
+
+    // check if index is in array
+    uint64_t index = dd & (~mtbdd_complement);
+    assert(index > 1 && index < nodes->table_size);
+    if (index <= 1 || index >= nodes->table_size) return 0;
+
+    // check if marked
+    int marked = llmsset_is_marked(nodes, index);
+    assert(marked);
+    if (marked == 0) return 0;
+
+    // check if leaf
+    mtbddnode_t n = GETNODE(dd);
+    if (mtbddnode_isleaf(n)) return 1; // we're fine
+
+    // check variable order
+    uint32_t var = mtbddnode_getvariable(n);
+    assert(var > parent_var);
+    if (var <= parent_var) return 0;
+
+    // check cache
+    uint64_t result;
+    if (cache_get3(CACHE_BDD_ISBDD, dd, 0, 0, &result)) {
+        return result;
+    }
+
+    // check recursively
+    SPAWN(mtbdd_test_isvalid_rec, node_getlow(dd, n), var);
+    result = (uint64_t)CALL(mtbdd_test_isvalid_rec, node_gethigh(dd, n), var);
+    if (!SYNC(mtbdd_test_isvalid_rec)) result = 0;
+
+    // put in cache and return result
+    cache_put3(CACHE_BDD_ISBDD, dd, 0, 0, result);
+    return result;
+}
+
+TASK_IMPL_1(int, mtbdd_test_isvalid, MTBDD, dd)
+{
+    // check if True/False leaf
+    if (dd == mtbdd_true || dd == mtbdd_false) return 1;
+
+    // check if index is in array
+    uint64_t index = dd & (~mtbdd_complement);
+    assert(index > 1 && index < nodes->table_size);
+    if (index <= 1 || index >= nodes->table_size) return 0;
+
+    // check if marked
+    int marked = llmsset_is_marked(nodes, index);
+    assert(marked);
+    if (marked == 0) return 0;
+
+    // check if leaf
+    mtbddnode_t n = GETNODE(dd);
+    if (mtbddnode_isleaf(n)) return 1; // we're fine
+
+    // check recursively
+    uint32_t var = mtbddnode_getvariable(n);
+    SPAWN(mtbdd_test_isvalid_rec, node_getlow(dd, n), var);
+    int result = CALL(mtbdd_test_isvalid_rec, node_gethigh(dd, n), var);
+    if (!SYNC(mtbdd_test_isvalid_rec)) result = 0;
+    return result;
+}
+
+/**
+ * Export to .dot file
+ */
+
+static void
+mtbdd_fprintdot_rec(FILE *out, MTBDD mtbdd, print_terminal_label_cb cb)
+{
+    mtbddnode_t n = GETNODE(mtbdd); // also works for mtbdd_false
+    if (mtbddnode_getmark(n)) return;
+    mtbddnode_setmark(n, 1);
+
+    if (mtbdd == mtbdd_true || mtbdd == mtbdd_false) {
+        fprintf(out, "0 [shape=box, style=filled, label=\"F\"];\n");
+    } else if (mtbddnode_isleaf(n)) {
+        uint32_t type = mtbddnode_gettype(n);
+        uint64_t value = mtbddnode_getvalue(n);
+        fprintf(out, "%" PRIu64 " [shape=box, style=filled, label=\"", MTBDD_STRIPMARK(mtbdd));
+        switch (type) {
+        case 0:
+            fprintf(out, "%" PRIu64, value);
+            break;
+        case 1:
+            fprintf(out, "%f", *(double*)&value);
+            break;
+        case 2:
+            fprintf(out, "%u/%u", (uint32_t)(value>>32), (uint32_t)value);
+            break;
+        default:
+            cb(out, type, value);
+            break;
+        }
+        fprintf(out, "\"];\n");
+    } else {
+        fprintf(out, "%" PRIu64 " [label=\"%" PRIu32 "\"];\n",
+                MTBDD_STRIPMARK(mtbdd), mtbddnode_getvariable(n));
+
+        mtbdd_fprintdot_rec(out, mtbddnode_getlow(n), cb);
+        mtbdd_fprintdot_rec(out, mtbddnode_gethigh(n), cb);
+
+        fprintf(out, "%" PRIu64 " -> %" PRIu64 " [style=dashed];\n",
+                MTBDD_STRIPMARK(mtbdd), mtbddnode_getlow(n));
+        fprintf(out, "%" PRIu64 " -> %" PRIu64 " [style=solid dir=both arrowtail=%s];\n",
+                MTBDD_STRIPMARK(mtbdd), MTBDD_STRIPMARK(mtbddnode_gethigh(n)),
+                mtbddnode_getcomp(n) ? "dot" : "none");
+    }
+}
+
+void
+mtbdd_fprintdot(FILE *out, MTBDD mtbdd, print_terminal_label_cb cb)
+{
+    fprintf(out, "digraph \"DD\" {\n");
+    fprintf(out, "graph [dpi = 300];\n");
+    fprintf(out, "center = true;\n");
+    fprintf(out, "edge [dir = forward];\n");
+    fprintf(out, "root [style=invis];\n");
+    fprintf(out, "root -> %" PRIu64 " [style=solid dir=both arrowtail=%s];\n",
+            MTBDD_STRIPMARK(mtbdd), MTBDD_HASMARK(mtbdd) ? "dot" : "none");
+
+    mtbdd_fprintdot_rec(out, mtbdd, cb);
+    mtbdd_unmark_rec(mtbdd);
+
+    fprintf(out, "}\n");
+}
+
+/**
+ * Return 1 if the map contains the key, 0 otherwise.
+ */
+int
+mtbdd_map_contains(MTBDDMAP map, uint32_t key)
+{
+    while (!mtbdd_map_isempty(map)) {
+        mtbddnode_t n = GETNODE(map);
+        uint32_t k = mtbddnode_getvariable(n);
+        if (k == key) return 1;
+        if (k > key) return 0;
+        map = node_getlow(map, n);
+    }
+
+    return 0;
+}
+
+/**
+ * Retrieve the number of keys in the map.
+ */
+size_t
+mtbdd_map_count(MTBDDMAP map)
+{
+    size_t r = 0;
+
+    while (!mtbdd_map_isempty(map)) {
+        r++;
+        map = mtbdd_map_next(map);
+    }
+
+    return r;
+}
+
+/**
+ * Add the pair <key,value> to the map, overwrites if key already in map.
+ */
+MTBDDMAP
+mtbdd_map_add(MTBDDMAP map, uint32_t key, MTBDD value)
+{
+    if (mtbdd_map_isempty(map)) return mtbdd_makenode(key, mtbdd_map_empty(), value);
+
+    mtbddnode_t n = GETNODE(map);
+    uint32_t k = mtbddnode_getvariable(n);
+
+    if (k < key) {
+        // add recursively and rebuild tree
+        MTBDDMAP low = mtbdd_map_add(node_getlow(map, n), key, value);
+        return mtbdd_makenode(k, low, node_gethigh(map, n));
+    } else if (k > key) {
+        return mtbdd_makenode(key, map, value);
+    } else {
+        // replace old
+        return mtbdd_makenode(key, node_getlow(map, n), value);
+    }
+}
+
+/**
+ * Add all values from map2 to map1, overwrites if key already in map1.
+ */
+MTBDDMAP
+mtbdd_map_addall(MTBDDMAP map1, MTBDDMAP map2)
+{
+    if (mtbdd_map_isempty(map1)) return map2;
+    if (mtbdd_map_isempty(map2)) return map1;
+
+    mtbddnode_t n1 = GETNODE(map1);
+    mtbddnode_t n2 = GETNODE(map2);
+    uint32_t k1 = mtbddnode_getvariable(n1);
+    uint32_t k2 = mtbddnode_getvariable(n2);
+
+    MTBDDMAP result;
+    if (k1 < k2) {
+        MTBDDMAP low = mtbdd_map_addall(node_getlow(map1, n1), map2);
+        result = mtbdd_makenode(k1, low, node_gethigh(map1, n1));
+    } else if (k1 > k2) {
+        MTBDDMAP low = mtbdd_map_addall(map1, node_getlow(map2, n2));
+        result = mtbdd_makenode(k2, low, node_gethigh(map2, n2));
+    } else {
+        MTBDDMAP low = mtbdd_map_addall(node_getlow(map1, n1), node_getlow(map2, n2));
+        result = mtbdd_makenode(k2, low, node_gethigh(map2, n2));
+    }
+
+    return result;
+}
+
+/**
+ * Remove the key <key> from the map and return the result
+ */
+MTBDDMAP
+mtbdd_map_remove(MTBDDMAP map, uint32_t key)
+{
+    if (mtbdd_map_isempty(map)) return map;
+
+    mtbddnode_t n = GETNODE(map);
+    uint32_t k = mtbddnode_getvariable(n);
+
+    if (k < key) {
+        MTBDDMAP low = mtbdd_map_remove(node_getlow(map, n), key);
+        return mtbdd_makenode(k, low, node_gethigh(map, n));
+    } else if (k > key) {
+        return map;
+    } else {
+        return node_getlow(map, n);
+    }
+}
+
+/**
+ * Remove all keys in the cube <variables> from the map and return the result
+ */
+MTBDDMAP
+mtbdd_map_removeall(MTBDDMAP map, MTBDD variables)
+{
+    if (mtbdd_map_isempty(map)) return map;
+    if (variables == mtbdd_true) return map;
+
+    mtbddnode_t n1 = GETNODE(map);
+    mtbddnode_t n2 = GETNODE(variables);
+    uint32_t k1 = mtbddnode_getvariable(n1);
+    uint32_t k2 = mtbddnode_getvariable(n2);
+
+    if (k1 < k2) {
+        MTBDDMAP low = mtbdd_map_removeall(node_getlow(map, n1), variables);
+        return mtbdd_makenode(k1, low, node_gethigh(map, n1));
+    } else if (k1 > k2) {
+        return mtbdd_map_removeall(map, node_gethigh(variables, n2));
+    } else {
+        return mtbdd_map_removeall(node_getlow(map, n1), node_gethigh(variables, n2));
+    }
+}
+
+#include "sylvan_mtbdd_storm.c"
diff --git a/src/sylvan_mtbdd.h b/src/sylvan_mtbdd.h
new file mode 100644
index 000000000..1a7de3f57
--- /dev/null
+++ b/src/sylvan_mtbdd.h
@@ -0,0 +1,608 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is an implementation of Multi-Terminal Binary Decision Diagrams.
+ * They encode functions on Boolean variables to any domain.
+ *
+ * Three domains are supported by default: Boolean, Integer and Real.
+ * Boolean MTBDDs are identical to BDDs (as supported by the bdd subpackage).
+ * Integer MTBDDs are encoded using "int64_t" terminals.
+ * Real MTBDDs are encoded using "double" terminals.
+ *
+ * Labels of Boolean variables of MTBDD nodes are 24-bit integers.
+ *
+ * Custom terminals are supported.
+ *
+ * Terminal type "0" is the Integer type, type "1" is the Real type.
+ * Type "2" is the Fraction type, consisting of two 32-bit integers (numerator and denominator)
+ * For non-Boolean MTBDDs, mtbdd_false is used for partial functions, i.e. mtbdd_false
+ * indicates that the function is not defined for a certain input.
+ */
+
+/* Do not include this file directly. Instead, include sylvan.h */
+
+#ifndef SYLVAN_MTBDD_H
+#define SYLVAN_MTBDD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * An MTBDD is a 64-bit value. The low 40 bits are an index into the unique table.
+ * The highest 1 bit is the complement edge, indicating negation.
+ * For Boolean MTBDDs, this means "not X", for Integer and Real MTBDDs, this means "-X".
+ */
+typedef uint64_t MTBDD;
+typedef MTBDD MTBDDMAP;
+
+/**
+ * mtbdd_true is only used in Boolean MTBDDs. mtbdd_false has multiple roles (see above).
+ */
+#define mtbdd_complement    ((MTBDD)0x8000000000000000LL)
+#define mtbdd_false         ((MTBDD)0)
+#define mtbdd_true          (mtbdd_false|mtbdd_complement)
+#define mtbdd_invalid       ((MTBDD)0xffffffffffffffffLL)
+
+/**
+ * Initialize MTBDD functionality.
+ * This initializes internal and external referencing datastructures,
+ * and registers them in the garbage collection framework.
+ */
+void sylvan_init_mtbdd();
+
+/**
+ * Create a MTBDD terminal of type <type> and value <value>.
+ * For custom types, the value could be a pointer to some external struct.
+ */
+MTBDD mtbdd_makeleaf(uint32_t type, uint64_t value);
+
+/**
+ * Create an internal MTBDD node of Boolean variable <var>, with low edge <low> and high edge <high>.
+ * <var> is a 24-bit integer.
+ */
+MTBDD mtbdd_makenode(uint32_t var, MTBDD low, MTBDD high);
+
+/**
+ * Returns 1 is the MTBDD is a terminal, or 0 otherwise.
+ */
+int mtbdd_isleaf(MTBDD mtbdd);
+#define mtbdd_isnode(mtbdd) (mtbdd_isleaf(mtbdd) ? 0 : 1)
+
+/**
+ * For MTBDD terminals, returns <type> and <value>
+ */
+uint32_t mtbdd_gettype(MTBDD terminal);
+uint64_t mtbdd_getvalue(MTBDD terminal);
+
+/**
+ * For internal MTBDD nodes, returns <var>, <low> and <high>
+ */
+uint32_t mtbdd_getvar(MTBDD node);
+MTBDD mtbdd_getlow(MTBDD node);
+MTBDD mtbdd_gethigh(MTBDD node);
+
+/**
+ * Compute the complement of the MTBDD.
+ * For Boolean MTBDDs, this means "not X".
+ */
+#define mtbdd_hascomp(dd) ((dd & mtbdd_complement) ? 1 : 0)
+#define mtbdd_comp(dd) (dd ^ mtbdd_complement)
+#define mtbdd_not(dd) (dd ^ mtbdd_complement)
+
+/**
+ * Create terminals representing int64_t (type 0), double (type 1), or fraction (type 2) values
+ */
+MTBDD mtbdd_int64(int64_t value);
+MTBDD mtbdd_double(double value);
+MTBDD mtbdd_fraction(int64_t numer, uint64_t denom);
+
+/**
+ * Get the value of a terminal (for Integer, Real and Fraction terminals, types 0, 1 and 2)
+ */
+int64_t mtbdd_getint64(MTBDD terminal);
+double mtbdd_getdouble(MTBDD terminal);
+#define mtbdd_getnumer(terminal) ((int32_t)(mtbdd_getvalue(terminal)>>32))
+#define mtbdd_getdenom(terminal) ((uint32_t)(mtbdd_getvalue(terminal)&0xffffffff))
+
+/**
+ * Create the conjunction of variables in arr.
+ * I.e. arr[0] \and arr[1] \and ... \and arr[length-1]
+ */
+MTBDD mtbdd_fromarray(uint32_t* arr, size_t length);
+
+/**
+ * Create a MTBDD cube representing the conjunction of variables in their positive or negative
+ * form depending on whether the cube[idx] equals 0 (negative), 1 (positive) or 2 (any).
+ * Use cube[idx]==3 for "s=s'" in interleaved variables (matches with next variable)
+ * <variables> is the cube of variables (var1 \and var2 \and ... \and varn)
+ */
+MTBDD mtbdd_cube(MTBDD variables, uint8_t *cube, MTBDD terminal);
+
+/**
+ * Same as mtbdd_cube, but extends <mtbdd> with the assignment <cube> \to <terminal>.
+ * If <mtbdd> already assigns a value to the cube, the new value <terminal> is taken.
+ * Does not support cube[idx]==3.
+ */
+#define mtbdd_union_cube(mtbdd, variables, cube, terminal) CALL(mtbdd_union_cube, mtbdd, variables, cube, terminal)
+TASK_DECL_4(BDD, mtbdd_union_cube, MTBDD, MTBDD, uint8_t*, MTBDD);
+
+/**
+ * Count the number of satisfying assignments (minterms) leading to a non-false leaf
+ */
+TASK_DECL_2(double, mtbdd_satcount, MTBDD, size_t);
+#define mtbdd_satcount(dd, nvars) CALL(mtbdd_satcount, dd, nvars)
+
+/**
+ * Count the number of MTBDD leaves (excluding mtbdd_false and mtbdd_true) in the MTBDD
+ */
+size_t mtbdd_leafcount(MTBDD mtbdd);
+
+/**
+ * Count the number of MTBDD nodes and terminals (excluding mtbdd_false and mtbdd_true) in a MTBDD
+ */
+size_t mtbdd_nodecount(MTBDD mtbdd);
+
+/**
+ * Callback function types for binary ("dyadic") and unary ("monadic") operations.
+ * The callback function returns either the MTBDD that is the result of applying op to the MTBDDs,
+ * or mtbdd_invalid if op cannot be applied.
+ * The binary function may swap the two parameters (if commutative) to improve caching.
+ * The unary function is allowed an extra parameter (be careful of caching)
+ */
+LACE_TYPEDEF_CB(MTBDD, mtbdd_apply_op, MTBDD*, MTBDD*);
+LACE_TYPEDEF_CB(MTBDD, mtbdd_applyp_op, MTBDD*, MTBDD*, size_t);
+LACE_TYPEDEF_CB(MTBDD, mtbdd_uapply_op, MTBDD, size_t);
+
+/**
+ * Apply a binary operation <op> to <a> and <b>.
+ * Callback <op> is consulted before the cache, thus the application to terminals is not cached.
+ */
+TASK_DECL_3(MTBDD, mtbdd_apply, MTBDD, MTBDD, mtbdd_apply_op);
+#define mtbdd_apply(a, b, op) CALL(mtbdd_apply, a, b, op)
+
+/**
+ * Apply a binary operation <op> with id <opid> to <a> and <b> with parameter <p>
+ * Callback <op> is consulted before the cache, thus the application to terminals is not cached.
+ */
+TASK_DECL_5(MTBDD, mtbdd_applyp, MTBDD, MTBDD, size_t, mtbdd_applyp_op, uint64_t);
+#define mtbdd_applyp(a, b, p, op, opid) CALL(mtbdd_applyp, a, b, p, op, opid)
+
+/**
+ * Apply a unary operation <op> to <dd>.
+ * Callback <op> is consulted after the cache, thus the application to a terminal is cached.
+ */
+TASK_DECL_3(MTBDD, mtbdd_uapply, MTBDD, mtbdd_uapply_op, size_t);
+#define mtbdd_uapply(dd, op, param) CALL(mtbdd_uapply, dd, op, param)
+
+/**
+ * Callback function types for abstraction.
+ * MTBDD mtbdd_abstract_op(MTBDD a, MTBDD b, int k).
+ * The function is either called with k==0 (apply to two arguments) or k>0 (k skipped BDD variables)
+ * k == 0  =>  res := apply op to a and b
+ * k  > 0  =>  res := apply op to op(a, a, k-1) and op(a, a, k-1)
+ */
+LACE_TYPEDEF_CB(MTBDD, mtbdd_abstract_op, MTBDD, MTBDD, int);
+
+/**
+ * Abstract the variables in <v> from <a> using the binary operation <op>.
+ */
+TASK_DECL_3(MTBDD, mtbdd_abstract, MTBDD, MTBDD, mtbdd_abstract_op);
+#define mtbdd_abstract(a, v, op) CALL(mtbdd_abstract, a, v, op)
+
+/**
+ * Unary operation Negate.
+ * Supported domains: Integer, Real, Fraction
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_negate, MTBDD, size_t);
+
+/**
+ * Binary operation Plus (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDDs, mtbdd_false is interpreted as "0" or "0.0".
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_plus, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, mtbdd_abstract_op_plus, MTBDD, MTBDD, int);
+
+/**
+ * Binary operation Minus (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDDs, mtbdd_false is interpreted as "0" or "0.0".
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_minus, MTBDD*, MTBDD*);
+
+/**
+ * Binary operation Times (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_times, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, mtbdd_abstract_op_times, MTBDD, MTBDD, int);
+
+/**
+ * Binary operation Minimum (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_min, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, mtbdd_abstract_op_min, MTBDD, MTBDD, int);
+
+/**
+ * Binary operation Maximum (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_max, MTBDD*, MTBDD*);
+TASK_DECL_3(MTBDD, mtbdd_abstract_op_max, MTBDD, MTBDD, int);
+
+/**
+ * Compute -a
+ */
+#define mtbdd_negate(a) mtbdd_uapply(a, TASK(mtbdd_op_negate), 0)
+
+/**
+ * Compute a + b
+ */
+#define mtbdd_plus(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_plus))
+
+/**
+ * Compute a - b
+ */
+#define mtbdd_minus(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_minus))
+
+/**
+ * Compute a * b
+ */
+#define mtbdd_times(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_times))
+
+/**
+ * Compute min(a, b)
+ */
+#define mtbdd_min(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_min))
+
+/**
+ * Compute max(a, b)
+ */
+#define mtbdd_max(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_max))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the sum of all values
+ */
+#define mtbdd_abstract_plus(dd, v) mtbdd_abstract(dd, v, TASK(mtbdd_abstract_op_plus))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the product of all values
+ */
+#define mtbdd_abstract_times(dd, v) mtbdd_abstract(dd, v, TASK(mtbdd_abstract_op_times))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the minimum of all values
+ */
+#define mtbdd_abstract_min(dd, v) mtbdd_abstract(dd, v, TASK(mtbdd_abstract_op_min))
+
+/**
+ * Abstract the variables in <v> from <a> by taking the maximum of all values
+ */
+#define mtbdd_abstract_max(dd, v) mtbdd_abstract(dd, v, TASK(mtbdd_abstract_op_max))
+
+/**
+ * Compute IF <f> THEN <g> ELSE <h>.
+ * <f> must be a Boolean MTBDD (or standard BDD).
+ */
+TASK_DECL_3(MTBDD, mtbdd_ite, MTBDD, MTBDD, MTBDD);
+#define mtbdd_ite(f, g, h) CALL(mtbdd_ite, f, g, h);
+
+/**
+ * Multiply <a> and <b>, and abstract variables <vars> using summation.
+ * This is similar to the "and_exists" operation in BDDs.
+ */
+TASK_DECL_3(MTBDD, mtbdd_and_exists, MTBDD, MTBDD, MTBDD);
+#define mtbdd_and_exists(a, b, vars) CALL(mtbdd_and_exists, a, b, vars)
+
+/**
+ * Monad that converts double to a Boolean MTBDD, translate terminals >= value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_threshold_double, MTBDD, size_t)
+
+/**
+ * Monad that converts double to a Boolean MTBDD, translate terminals > value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_strict_threshold_double, MTBDD, size_t)
+
+/**
+ * Convert double to a Boolean MTBDD, translate terminals >= value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_threshold_double, MTBDD, double);
+#define mtbdd_threshold_double(dd, value) CALL(mtbdd_threshold_double, dd, value)
+
+/**
+ * Convert double to a Boolean MTBDD, translate terminals > value to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_strict_threshold_double, MTBDD, double);
+#define mtbdd_strict_threshold_double(dd, value) CALL(mtbdd_strict_threshold_double, dd, value)
+
+/**
+ * For two Double MTBDDs, calculate whether they are equal module some value epsilon
+ * i.e. abs(a-b) < e
+ */
+TASK_DECL_3(MTBDD, mtbdd_equal_norm_d, MTBDD, MTBDD, double);
+#define mtbdd_equal_norm_d(a, b, epsilon) CALL(mtbdd_equal_norm_d, a, b, epsilon)
+
+/**
+ * For two Double MTBDDs, calculate whether they are equal modulo some value epsilon
+ * This version computes the relative difference vs the value in a.
+ * i.e. abs((a-b)/a) < e
+ */
+TASK_DECL_3(MTBDD, mtbdd_equal_norm_rel_d, MTBDD, MTBDD, double);
+#define mtbdd_equal_norm_rel_d(a, b, epsilon) CALL(mtbdd_equal_norm_rel_d, a, b, epsilon)
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) <= b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_DECL_2(MTBDD, mtbdd_leq, MTBDD, MTBDD);
+#define mtbdd_leq(a, b) CALL(mtbdd_leq, a, b)
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) < b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_DECL_2(MTBDD, mtbdd_less, MTBDD, MTBDD);
+#define mtbdd_less(a, b) CALL(mtbdd_less, a, b)
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) >= b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_DECL_2(MTBDD, mtbdd_geq, MTBDD, MTBDD);
+#define mtbdd_geq(a, b) CALL(mtbdd_geq, a, b)
+
+/**
+ * For two MTBDDs a, b, return mtbdd_true if all common assignments a(s) > b(s), mtbdd_false otherwise.
+ * For domains not in a / b, assume True.
+ */
+TASK_DECL_2(MTBDD, mtbdd_greater, MTBDD, MTBDD);
+#define mtbdd_greater(a, b) CALL(mtbdd_greater, a, b)
+
+/**
+ * Calculate the support of a MTBDD, i.e. the cube of all variables that appear in the MTBDD nodes.
+ */
+TASK_DECL_1(MTBDD, mtbdd_support, MTBDD);
+#define mtbdd_support(dd) CALL(mtbdd_support, dd)
+
+/**
+ * Function composition, for each node with variable <key> which has a <key,value> pair in <map>,
+ * replace the node by the result of mtbdd_ite(<value>, <low>, <high>).
+ * Each <value> in <map> must be a Boolean MTBDD.
+ */
+TASK_DECL_2(MTBDD, mtbdd_compose, MTBDD, MTBDDMAP);
+#define mtbdd_compose(dd, map) CALL(mtbdd_compose, dd, map)
+
+/**
+ * Compute minimal leaf in the MTBDD (for Integer, Double, Rational MTBDDs)
+ */
+TASK_DECL_1(MTBDD, mtbdd_minimum, MTBDD);
+#define mtbdd_minimum(dd) CALL(mtbdd_minimum, dd)
+
+/**
+ * Compute maximal leaf in the MTBDD (for Integer, Double, Rational MTBDDs)
+ */
+TASK_DECL_1(MTBDD, mtbdd_maximum, MTBDD);
+#define mtbdd_maximum(dd) CALL(mtbdd_maximum, dd)
+
+/**
+ * Given a MTBDD <dd> and a cube of variables <variables> expected in <dd>,
+ * mtbdd_enum_first and mtbdd_enum_next enumerates the unique paths in <dd> that lead to a non-False leaf.
+ * 
+ * The function returns the leaf (or mtbdd_false if no new path is found) and encodes the path
+ * in the supplied array <arr>: 0 for a low edge, 1 for a high edge, and 2 if the variable is skipped.
+ *
+ * The supplied array <arr> must be large enough for all variables in <variables>.
+ *
+ * Usage:
+ * MTBDD leaf = mtbdd_enum_first(dd, variables, arr, NULL);
+ * while (leaf != mtbdd_false) {
+ *     .... // do something with arr/leaf
+ *     leaf = mtbdd_enum_next(dd, variables, arr, NULL);
+ * }
+ *
+ * The callback is an optional function that returns 0 when the given terminal node should be skipped.
+ */
+typedef int (*mtbdd_enum_filter_cb)(MTBDD);
+MTBDD mtbdd_enum_first(MTBDD dd, MTBDD variables, uint8_t *arr, mtbdd_enum_filter_cb filter_cb);
+MTBDD mtbdd_enum_next(MTBDD dd, MTBDD variables, uint8_t *arr, mtbdd_enum_filter_cb filter_cb);
+
+/**
+ * For debugging.
+ * Tests if all nodes in the MTBDD are correctly ``marked'' in the nodes table.
+ * Tests if variables in the internal nodes appear in-order.
+ * In Debug mode, this will cause assertion failures instead of returning 0.
+ * Returns 1 if all is fine, or 0 otherwise.
+ */
+TASK_DECL_1(int, mtbdd_test_isvalid, MTBDD);
+#define mtbdd_test_isvalid(mtbdd) CALL(mtbdd_test_isvalid, mtbdd)
+
+/**
+ * Write a DOT representation of a MTBDD
+ * The callback function is required for custom terminals.
+ */
+typedef void (*print_terminal_label_cb)(FILE *out, uint32_t type, uint64_t value);
+void mtbdd_fprintdot(FILE *out, MTBDD mtbdd, print_terminal_label_cb cb);
+#define mtbdd_printdot(mtbdd, cb) mtbdd_fprintdot(stdout, mtbdd, cb)
+
+/**
+ * MTBDDMAP, maps uint32_t variables to MTBDDs.
+ * A MTBDDMAP node has variable level, low edge going to the next MTBDDMAP, high edge to the mapped MTBDD
+ */
+#define mtbdd_map_empty() mtbdd_false
+#define mtbdd_map_isempty(map) (map == mtbdd_false ? 1 : 0)
+#define mtbdd_map_key(map) mtbdd_getvar(map)
+#define mtbdd_map_value(map) mtbdd_gethigh(map)
+#define mtbdd_map_next(map) mtbdd_getlow(map)
+
+/**
+ * Return 1 if the map contains the key, 0 otherwise.
+ */
+int mtbdd_map_contains(MTBDDMAP map, uint32_t key);
+
+/**
+ * Retrieve the number of keys in the map.
+ */
+size_t mtbdd_map_count(MTBDDMAP map);
+
+/**
+ * Add the pair <key,value> to the map, overwrites if key already in map.
+ */
+MTBDDMAP mtbdd_map_add(MTBDDMAP map, uint32_t key, MTBDD value);
+
+/**
+ * Add all values from map2 to map1, overwrites if key already in map1.
+ */
+MTBDDMAP mtbdd_map_addall(MTBDDMAP map1, MTBDDMAP map2);
+
+/**
+ * Remove the key <key> from the map and return the result
+ */
+MTBDDMAP mtbdd_map_remove(MTBDDMAP map, uint32_t key);
+
+/**
+ * Remove all keys in the cube <variables> from the map and return the result
+ */
+MTBDDMAP mtbdd_map_removeall(MTBDDMAP map, MTBDD variables);
+
+/**
+ * Custom node types
+ * Overrides standard hash/equality/notify_on_dead behavior
+ * hash(value, seed) return hash version
+ * equals(value1, value2) return 1 if equal, 0 if not equal
+ * create(&value) replace value by new value for object allocation
+ * destroy(value)
+ * NOTE: equals(value1, value2) must imply: hash(value1, seed) == hash(value2,seed)
+ * NOTE: new value of create must imply: equals(old, new)
+ */
+typedef uint64_t (*mtbdd_hash_cb)(uint64_t, uint64_t);
+typedef int (*mtbdd_equals_cb)(uint64_t, uint64_t);
+typedef void (*mtbdd_create_cb)(uint64_t*);
+typedef void (*mtbdd_destroy_cb)(uint64_t);
+
+/**
+ * Registry callback handlers for <type>.
+ */
+uint32_t mtbdd_register_custom_leaf(mtbdd_hash_cb hash_cb, mtbdd_equals_cb equals_cb, mtbdd_create_cb create_cb, mtbdd_destroy_cb destroy_cb);
+
+/**
+ * Garbage collection
+ * Sylvan supplies two default methods to handle references to nodes, but the user
+ * is encouraged to implement custom handling. Simply add a handler using sylvan_gc_add_mark
+ * and let the handler call mtbdd_gc_mark_rec for every MTBDD that should be saved
+ * during garbage collection.
+ */
+
+/**
+ * Call mtbdd_gc_mark_rec for every mtbdd you want to keep in your custom mark functions.
+ */
+VOID_TASK_DECL_1(mtbdd_gc_mark_rec, MTBDD);
+#define mtbdd_gc_mark_rec(mtbdd) CALL(mtbdd_gc_mark_rec, mtbdd)
+
+/**
+ * Default external referencing. During garbage collection, MTBDDs marked with mtbdd_ref will
+ * be kept in the forest.
+ * It is recommended to prefer mtbdd_protect and mtbdd_unprotect.
+ */
+MTBDD mtbdd_ref(MTBDD a);
+void mtbdd_deref(MTBDD a);
+size_t mtbdd_count_refs();
+
+/**
+ * Default external pointer referencing. During garbage collection, the pointers are followed and the MTBDD
+ * that they refer to are kept in the forest.
+ */
+void mtbdd_protect(MTBDD* ptr);
+void mtbdd_unprotect(MTBDD* ptr);
+size_t mtbdd_count_protected();
+
+/**
+ * If sylvan_set_ondead is set to a callback, then this function marks MTBDDs (terminals).
+ * When they are dead after the mark phase in garbage collection, the callback is called for marked MTBDDs.
+ * The ondead callback can either perform cleanup or resurrect dead terminals.
+ */
+#define mtbdd_notify_ondead(dd) llmsset_notify_ondead(nodes, dd&~mtbdd_complement)
+
+/**
+ * Infrastructure for internal references (per-thread, e.g. during MTBDD operations)
+ * Use mtbdd_refs_push and mtbdd_refs_pop to put MTBDDs on a thread-local reference stack.
+ * Use mtbdd_refs_spawn and mtbdd_refs_sync around SPAWN and SYNC operations when the result
+ * of the spawned Task is a MTBDD that must be kept during garbage collection.
+ */
+typedef struct mtbdd_refs_internal
+{
+    size_t r_size, r_count;
+    size_t s_size, s_count;
+    MTBDD *results;
+    Task **spawns;
+} *mtbdd_refs_internal_t;
+
+extern DECLARE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+
+static inline MTBDD
+mtbdd_refs_push(MTBDD mtbdd)
+{
+    LOCALIZE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+    if (mtbdd_refs_key->r_count >= mtbdd_refs_key->r_size) {
+        mtbdd_refs_key->r_size *= 2;
+        mtbdd_refs_key->results = (MTBDD*)realloc(mtbdd_refs_key->results, sizeof(MTBDD) * mtbdd_refs_key->r_size);
+    }
+    mtbdd_refs_key->results[mtbdd_refs_key->r_count++] = mtbdd;
+    return mtbdd;
+}
+
+static inline void
+mtbdd_refs_pop(int amount)
+{
+    LOCALIZE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+    mtbdd_refs_key->r_count-=amount;
+}
+
+static inline void
+mtbdd_refs_spawn(Task *t)
+{
+    LOCALIZE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+    if (mtbdd_refs_key->s_count >= mtbdd_refs_key->s_size) {
+        mtbdd_refs_key->s_size *= 2;
+        mtbdd_refs_key->spawns = (Task**)realloc(mtbdd_refs_key->spawns, sizeof(Task*) * mtbdd_refs_key->s_size);
+    }
+    mtbdd_refs_key->spawns[mtbdd_refs_key->s_count++] = t;
+}
+
+static inline MTBDD
+mtbdd_refs_sync(MTBDD result)
+{
+    LOCALIZE_THREAD_LOCAL(mtbdd_refs_key, mtbdd_refs_internal_t);
+    mtbdd_refs_key->s_count--;
+    return result;
+}
+
+#include "sylvan_mtbdd_storm.h"
+    
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/src/sylvan_mtbdd_int.h b/src/sylvan_mtbdd_int.h
new file mode 100644
index 000000000..940250b9a
--- /dev/null
+++ b/src/sylvan_mtbdd_int.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internals for MTBDDs
+ */
+
+#ifndef SYLVAN_MTBDD_INT_H
+#define SYLVAN_MTBDD_INT_H
+
+/**
+ * MTBDD node structure
+ */
+typedef struct __attribute__((packed)) mtbddnode {
+    uint64_t a, b;
+} * mtbddnode_t; // 16 bytes
+
+#define GETNODE(mtbdd) ((mtbddnode_t)llmsset_index_to_ptr(nodes, mtbdd&0x000000ffffffffff))
+
+/**
+ * Complement handling macros
+ */
+#define MTBDD_HASMARK(s)              (s&mtbdd_complement?1:0)
+#define MTBDD_TOGGLEMARK(s)           (s^mtbdd_complement)
+#define MTBDD_STRIPMARK(s)            (s&~mtbdd_complement)
+#define MTBDD_TRANSFERMARK(from, to)  (to ^ (from & mtbdd_complement))
+// Equal under mark
+#define MTBDD_EQUALM(a, b)            ((((a)^(b))&(~mtbdd_complement))==0)
+
+// Leaf: a = L=1, M, type; b = value
+// Node: a = L=0, C, M, high; b = variable, low
+// Only complement edge on "high"
+
+static inline int
+mtbddnode_isleaf(mtbddnode_t n)
+{
+    return n->a & 0x4000000000000000 ? 1 : 0;
+}
+
+static inline uint32_t
+mtbddnode_gettype(mtbddnode_t n)
+{
+    return n->a & 0x00000000ffffffff;
+}
+
+static inline uint64_t
+mtbddnode_getvalue(mtbddnode_t n)
+{
+    return n->b;
+}
+
+static inline int
+mtbddnode_getcomp(mtbddnode_t n)
+{
+    return n->a & 0x8000000000000000 ? 1 : 0;
+}
+
+static inline uint64_t
+mtbddnode_getlow(mtbddnode_t n)
+{
+    return n->b & 0x000000ffffffffff; // 40 bits
+}
+
+static inline uint64_t
+mtbddnode_gethigh(mtbddnode_t n)
+{
+    return n->a & 0x800000ffffffffff; // 40 bits plus high bit of first
+}
+
+static inline uint32_t
+mtbddnode_getvariable(mtbddnode_t n)
+{
+    return (uint32_t)(n->b >> 40);
+}
+
+static inline int
+mtbddnode_getmark(mtbddnode_t n)
+{
+    return n->a & 0x2000000000000000 ? 1 : 0;
+}
+
+static inline void
+mtbddnode_setmark(mtbddnode_t n, int mark)
+{
+    if (mark) n->a |= 0x2000000000000000;
+    else n->a &= 0xdfffffffffffffff;
+}
+
+static inline void
+mtbddnode_makeleaf(mtbddnode_t n, uint32_t type, uint64_t value)
+{
+    n->a = 0x4000000000000000 | (uint64_t)type;
+    n->b = value;
+}
+
+static inline void
+mtbddnode_makenode(mtbddnode_t n, uint32_t var, uint64_t low, uint64_t high)
+{
+    n->a = high;
+    n->b = ((uint64_t)var)<<40 | low;
+}
+
+static MTBDD
+node_getlow(MTBDD mtbdd, mtbddnode_t node)
+{
+    return MTBDD_TRANSFERMARK(mtbdd, mtbddnode_getlow(node));
+}
+
+static MTBDD
+node_gethigh(MTBDD mtbdd, mtbddnode_t node)
+{
+    return MTBDD_TRANSFERMARK(mtbdd, mtbddnode_gethigh(node));
+}
+
+#endif
diff --git a/src/sylvan_mtbdd_storm.c b/src/sylvan_mtbdd_storm.c
new file mode 100644
index 000000000..dab4860c0
--- /dev/null
+++ b/src/sylvan_mtbdd_storm.c
@@ -0,0 +1,514 @@
+/**
+ * Generate SHA2 structural hashes.
+ * Hashes are independent of location.
+ * Mainly useful for debugging purposes.
+ */
+static void
+mtbdd_sha2_rec(MTBDD mtbdd, SHA256_CTX *ctx)
+{
+    if (mtbdd == sylvan_true || mtbdd == sylvan_false) {
+        SHA256_Update(ctx, (void*)&mtbdd, sizeof(MTBDD));
+        return;
+    }
+    
+    mtbddnode_t node = GETNODE(mtbdd);
+    if (mtbddnode_isleaf(node)) {
+        uint64_t val = mtbddnode_getvalue(node);
+        SHA256_Update(ctx, (void*)&val, sizeof(uint64_t));
+    } else if (mtbddnode_getmark(node) == 0) {
+        mtbddnode_setmark(node, 1);
+        uint32_t level = mtbddnode_getvariable(node);
+        if (MTBDD_STRIPMARK(mtbddnode_gethigh(node))) level |= 0x80000000;
+        SHA256_Update(ctx, (void*)&level, sizeof(uint32_t));
+        mtbdd_sha2_rec(mtbddnode_gethigh(node), ctx);
+        mtbdd_sha2_rec(mtbddnode_getlow(node), ctx);
+    }
+}
+
+void
+mtbdd_getsha(MTBDD mtbdd, char *target)
+{
+    SHA256_CTX ctx;
+    SHA256_Init(&ctx);
+    mtbdd_sha2_rec(mtbdd, &ctx);
+    if (mtbdd != sylvan_true && mtbdd != sylvan_false) mtbdd_unmark_rec(mtbdd);
+    SHA256_End(&ctx, target);
+}
+
+/**
+ * Binary operation Times (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Integer or Double.
+ * If either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_divide, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false || b == mtbdd_false) return mtbdd_false;
+    
+    // Do not handle Boolean MTBDDs...
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+
+            if (va == 0) return a;
+            else if (vb == 0) return b;
+            else {
+                MTBDD result;
+                if (va == 1) result = b;
+                else if (vb == 1) result = a;
+                else result = mtbdd_int64(va*vb);
+                return result;
+            }
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            if (vval_a == 0.0) return a;
+            else if (vval_b == 0.0) return b;
+            else {
+                MTBDD result;
+                if (vval_a == 0.0 || vval_b == 1.0) result = a;
+                result = mtbdd_double(vval_a / vval_b);
+                return result;
+            }
+        }
+        else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            uint64_t nom_a = val_a>>32;
+            uint64_t nom_b = val_b>>32;
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            // multiply!
+            uint32_t c = gcd(denom_b, denom_a);
+            uint32_t d = gcd(nom_a, nom_b);
+            nom_a /= d;
+            denom_a /= c;
+            nom_a *= (denom_b/c);
+            denom_a *= (nom_b/d);
+            // compute result
+            MTBDD result = mtbdd_fraction(nom_a, denom_a);
+            return result;
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Equals (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_equals, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false && b == mtbdd_false) return mtbdd_true;
+    if (a == mtbdd_true && b == mtbdd_true) return mtbdd_true;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+            if (va == vb) return mtbdd_true;
+            return mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            if (vval_a == vval_b) return mtbdd_true;
+            return mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            uint64_t nom_a = val_a>>32;
+            uint64_t nom_b = val_b>>32;
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            if (nom_a == nom_b && denom_a == denom_b) return mtbdd_true;
+            return mtbdd_false;
+        }
+    }
+    
+    if (a < b) {
+        *pa = b;
+        *pb = a;
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Equals (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_less, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false && b == mtbdd_false) return mtbdd_true;
+    if (a == mtbdd_true && b == mtbdd_true) return mtbdd_true;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+            if (va < vb) return mtbdd_true;
+            return mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            if (vval_a < vval_b) return mtbdd_true;
+            return mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            uint64_t nom_a = val_a>>32;
+            uint64_t nom_b = val_b>>32;
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            return nom_a * denom_b < nom_b * denom_a ? mtbdd_true : mtbdd_false;
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Equals (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_less_or_equal, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    if (a == mtbdd_false && b == mtbdd_false) return mtbdd_true;
+    if (a == mtbdd_true && b == mtbdd_true) return mtbdd_true;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            int64_t va = *(int64_t*)(&val_a);
+            int64_t vb = *(int64_t*)(&val_b);
+            return va <= vb ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            if (vval_a <= vval_b) return mtbdd_true;
+            return mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            // both fraction
+            uint64_t nom_a = val_a>>32;
+            uint64_t nom_b = val_b>>32;
+            uint64_t denom_a = val_a&0xffffffff;
+            uint64_t denom_b = val_b&0xffffffff;
+            nom_a *= denom_b;
+            nom_b *= denom_a;
+            return nom_a <= nom_b ? mtbdd_true : mtbdd_false;
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Pow (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_pow, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            assert(0);
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            return mtbdd_double(pow(vval_a, vval_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            assert(0);
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Mod (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_mod, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            assert(0);
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            return mtbdd_double(fmod(vval_a, vval_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            assert(0);
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+/**
+ * Binary operation Log (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_IMPL_2(MTBDD, mtbdd_op_logxy, MTBDD*, pa, MTBDD*, pb)
+{
+    MTBDD a = *pa, b = *pb;
+    
+    mtbddnode_t na = GETNODE(a);
+    mtbddnode_t nb = GETNODE(b);
+    
+    if (mtbddnode_isleaf(na) && mtbddnode_isleaf(nb)) {
+        uint64_t val_a = mtbddnode_getvalue(na);
+        uint64_t val_b = mtbddnode_getvalue(nb);
+        if (mtbddnode_gettype(na) == 0 && mtbddnode_gettype(nb) == 0) {
+            assert(0);
+        } else if (mtbddnode_gettype(na) == 1 && mtbddnode_gettype(nb) == 1) {
+            // both double
+            double vval_a = *(double*)&val_a;
+            double vval_b = *(double*)&val_b;
+            return mtbdd_double(log(vval_a) / log(vval_b));
+        } else if (mtbddnode_gettype(na) == 2 && mtbddnode_gettype(nb) == 2) {
+            assert(0);
+        }
+    }
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_not_zero, MTBDD, a, size_t, v)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+    
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+    
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            return mtbdd_getint64(a) != 0 ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 1) {
+            return mtbdd_getdouble(a) != 0.0 ? mtbdd_true : mtbdd_false;
+        } else if (mtbddnode_gettype(na) == 2) {
+            return mtbdd_getnumer(a) != 0 ? mtbdd_true : mtbdd_false;
+        }
+    }
+    
+    // Ugly hack to get rid of the error "unused variable v" (because there is no version of uapply without a parameter).
+    (void)v;
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_1(MTBDD, mtbdd_not_zero, MTBDD, dd)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_not_zero), 0);
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_floor, MTBDD, a, size_t, v)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+    
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+    
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            return a;
+        } else if (mtbddnode_gettype(na) == 1) {
+            MTBDD result = mtbdd_double(floor(mtbdd_getdouble(a)));
+            return result;
+        } else if (mtbddnode_gettype(na) == 2) {
+            MTBDD result = mtbdd_fraction(mtbdd_getnumer(a) / mtbdd_getdenom(a), 1);
+            return result;
+        }
+    }
+    
+    // Ugly hack to get rid of the error "unused variable v" (because there is no version of uapply without a parameter).
+    (void)v;
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_1(MTBDD, mtbdd_floor, MTBDD, dd)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_floor), 0);
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_ceil, MTBDD, a, size_t, v)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_false;
+    if (a == mtbdd_true) return mtbdd_true;
+    
+    // a != constant
+    mtbddnode_t na = GETNODE(a);
+    
+    if (mtbddnode_isleaf(na)) {
+        if (mtbddnode_gettype(na) == 0) {
+            return a;
+        } else if (mtbddnode_gettype(na) == 1) {
+            MTBDD result = mtbdd_double(ceil(mtbdd_getdouble(a)));
+            return result;
+        } else if (mtbddnode_gettype(na) == 2) {
+            MTBDD result = mtbdd_fraction(mtbdd_getnumer(a) / mtbdd_getdenom(a) + 1, 1);
+            return result;
+        }
+    }
+
+    // Ugly hack to get rid of the error "unused variable v" (because there is no version of uapply without a parameter).
+    (void)v;
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_1(MTBDD, mtbdd_ceil, MTBDD, dd)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_ceil), 0);
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_bool_to_double, MTBDD, a, size_t, v)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_double(0);
+    if (a == mtbdd_true) return mtbdd_double(1.0);
+    
+    // Ugly hack to get rid of the error "unused variable v" (because there is no version of uapply without a parameter).
+    (void)v;
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_1(MTBDD, mtbdd_bool_to_double, MTBDD, dd)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_bool_to_double), 0);
+}
+
+TASK_IMPL_2(MTBDD, mtbdd_op_bool_to_int64, MTBDD, a, size_t, v)
+{
+    /* We only expect "double" terminals, or false */
+    if (a == mtbdd_false) return mtbdd_int64(0);
+    if (a == mtbdd_true) return mtbdd_int64(1);
+    
+    // Ugly hack to get rid of the error "unused variable v" (because there is no version of uapply without a parameter).
+    (void)v;
+    
+    return mtbdd_invalid;
+}
+
+TASK_IMPL_1(MTBDD, mtbdd_bool_to_int64, MTBDD, dd)
+{
+    return mtbdd_uapply(dd, TASK(mtbdd_op_bool_to_int64), 0);
+}
+
+/**
+ * Calculate the number of satisfying variable assignments according to <variables>.
+ */
+TASK_IMPL_2(double, mtbdd_non_zero_count, MTBDD, dd, size_t, nvars)
+{
+    /* Trivial cases */
+    if (dd == mtbdd_false) return 0.0;
+
+    mtbddnode_t na = GETNODE(dd);
+    
+    if (mtbdd_isleaf(dd)) {
+        if (mtbddnode_gettype(na) == 0) {
+            return mtbdd_getint64(dd) != 0 ? powl(2.0L, nvars) : 0.0;
+        } else if (mtbddnode_gettype(na) == 1) {
+            return mtbdd_getdouble(dd) != 0 ? powl(2.0L, nvars) : 0.0;
+        } else if (mtbddnode_gettype(na) == 2) {
+            return mtbdd_getnumer(dd) != 0 ? powl(2.0L, nvars) : 0.0;
+        }
+    }
+    
+    /* Perhaps execute garbage collection */
+    sylvan_gc_test();
+    
+    union {
+        double d;
+        uint64_t s;
+    } hack;
+    
+    /* Consult cache */
+    if (cache_get3(CACHE_MTBDD_NONZERO_COUNT, dd, 0, nvars, &hack.s)) {
+        sylvan_stats_count(CACHE_MTBDD_NONZERO_COUNT);
+        return hack.d;
+    }
+    
+    SPAWN(mtbdd_non_zero_count, mtbdd_gethigh(dd), nvars-1);
+    double low = CALL(mtbdd_non_zero_count, mtbdd_getlow(dd), nvars-1);
+    hack.d = low + SYNC(mtbdd_non_zero_count);
+    
+    cache_put3(CACHE_MTBDD_NONZERO_COUNT, dd, 0, nvars, hack.s);
+    return hack.d;
+}
+
+int mtbdd_iszero(MTBDD dd) {
+    if (mtbdd_gettype(dd) == 0) {
+        return mtbdd_getint64(dd) == 0;
+    } else if (mtbdd_gettype(dd) == 1) {
+        return mtbdd_getdouble(dd) == 0;
+    } else if (mtbdd_gettype(dd) == 2) {
+        return mtbdd_getnumer(dd) == 0;
+    }
+    return 0;
+}
+
+int mtbdd_isnonzero(MTBDD dd) {
+    return mtbdd_iszero(dd) ? 0 : 1;
+}
\ No newline at end of file
diff --git a/src/sylvan_mtbdd_storm.h b/src/sylvan_mtbdd_storm.h
new file mode 100644
index 000000000..38fa6668b
--- /dev/null
+++ b/src/sylvan_mtbdd_storm.h
@@ -0,0 +1,111 @@
+void mtbdd_getsha(MTBDD mtbdd, char *target); // target must be at least 65 bytes...
+
+/**
+ * Binary operation Divide (for MTBDDs of same type)
+ * Only for MTBDDs where all leaves are Integer or Double.
+ * If either operand is mtbdd_false (not defined),
+ * then the result is mtbdd_false (i.e. not defined).
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_divide, MTBDD*, MTBDD*);
+#define mtbdd_divide(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_divide))
+
+/**
+ * Binary operation equals (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_equals, MTBDD*, MTBDD*);
+#define mtbdd_equals(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_equals))
+
+/**
+ * Binary operation Less (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_less, MTBDD*, MTBDD*);
+#define mtbdd_less_as_bdd(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_less))
+
+/**
+ * Binary operation Less (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Boolean, or Integer, or Double.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_less_or_equal, MTBDD*, MTBDD*);
+#define mtbdd_less_or_equal_as_bdd(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_less_or_equal))
+
+/**
+ * Binary operation Pow (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Integer, Double or a Fraction.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_pow, MTBDD*, MTBDD*);
+#define mtbdd_pow(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_pow))
+
+/**
+ * Binary operation Mod (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Integer, Double or a Fraction.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_mod, MTBDD*, MTBDD*);
+#define mtbdd_mod(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_mod))
+
+/**
+ * Binary operation Log (for MTBDDs of same type)
+ * Only for MTBDDs where either all leaves are Double or a Fraction.
+ * For Integer/Double MTBDD, if either operand is mtbdd_false (not defined),
+ * then the result is the other operand.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_logxy, MTBDD*, MTBDD*);
+#define mtbdd_logxy(a, b) mtbdd_apply(a, b, TASK(mtbdd_op_logxy))
+
+/**
+ * Monad that converts double to a Boolean MTBDD, translate terminals != 0 to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_not_zero, MTBDD, size_t)
+TASK_DECL_1(MTBDD, mtbdd_not_zero, MTBDD)
+#define mtbdd_not_zero(dd) CALL(mtbdd_not_zero, dd)
+
+/**
+ * Monad that floors all values Double and Fraction values.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_floor, MTBDD, size_t)
+TASK_DECL_1(MTBDD, mtbdd_floor, MTBDD)
+#define mtbdd_floor(dd) CALL(mtbdd_floor, dd)
+
+/**
+ * Monad that ceils all values Double and Fraction values.
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_ceil, MTBDD, size_t)
+TASK_DECL_1(MTBDD, mtbdd_ceil, MTBDD)
+#define mtbdd_ceil(dd) CALL(mtbdd_ceil, dd)
+
+/**
+ * Monad that converts Boolean to a Double MTBDD, translate terminals true to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_bool_to_double, MTBDD, size_t)
+TASK_DECL_1(MTBDD, mtbdd_bool_to_double, MTBDD)
+#define mtbdd_bool_to_double(dd) CALL(mtbdd_bool_to_double, dd)
+
+/**
+ * Monad that converts Boolean to a uint MTBDD, translate terminals true to 1 and to 0 otherwise;
+ */
+TASK_DECL_2(MTBDD, mtbdd_op_bool_to_int64, MTBDD, size_t)
+TASK_DECL_1(MTBDD, mtbdd_bool_to_int64, MTBDD)
+#define mtbdd_bool_to_int64(dd) CALL(mtbdd_bool_to_int64, dd)
+
+/**
+ * Count the number of assignments (minterms) leading to a non-zero
+ */
+TASK_DECL_2(double, mtbdd_non_zero_count, MTBDD, size_t);
+#define mtbdd_non_zero_count(dd, nvars) CALL(mtbdd_non_zero_count, dd, nvars)
+
+// Checks whether the given MTBDD (does represents a zero leaf.
+int mtbdd_iszero(MTBDD);
+int mtbdd_isnonzero(MTBDD);
+
+#define mtbdd_regular(dd) (dd & ~mtbdd_complement)
diff --git a/src/sylvan_obj.cpp b/src/sylvan_obj.cpp
new file mode 100644
index 000000000..a573c7a49
--- /dev/null
+++ b/src/sylvan_obj.cpp
@@ -0,0 +1,1039 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sylvan_obj.hpp>
+
+using namespace sylvan;
+
+/***
+ * Implementation of class Bdd
+ */
+
+int
+Bdd::operator==(const Bdd& other) const
+{
+    return bdd == other.bdd;
+}
+
+int
+Bdd::operator!=(const Bdd& other) const
+{
+    return bdd != other.bdd;
+}
+
+Bdd
+Bdd::operator=(const Bdd& right)
+{
+    bdd = right.bdd;
+    return *this;
+}
+
+int
+Bdd::operator<=(const Bdd& other) const
+{
+    // TODO: better implementation, since we are not interested in the BDD result
+    LACE_ME;
+    BDD r = sylvan_ite(this->bdd, sylvan_not(other.bdd), sylvan_false);
+    return r == sylvan_false;
+}
+
+int
+Bdd::operator>=(const Bdd& other) const
+{
+    // TODO: better implementation, since we are not interested in the BDD result
+    return other <= *this;
+}
+
+int
+Bdd::operator<(const Bdd& other) const
+{
+    return bdd != other.bdd && *this <= other;
+}
+
+int
+Bdd::operator>(const Bdd& other) const
+{
+    return bdd != other.bdd && *this >= other;
+}
+
+Bdd
+Bdd::operator!() const
+{
+    return Bdd(sylvan_not(bdd));
+}
+
+Bdd
+Bdd::operator~() const
+{
+    return Bdd(sylvan_not(bdd));
+}
+
+Bdd
+Bdd::operator*(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_and(bdd, other.bdd));
+}
+
+Bdd
+Bdd::operator*=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_and(bdd, other.bdd);
+    return *this;
+}
+
+Bdd
+Bdd::operator&(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_and(bdd, other.bdd));
+}
+
+Bdd
+Bdd::operator&=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_and(bdd, other.bdd);
+    return *this;
+}
+
+Bdd
+Bdd::operator+(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_or(bdd, other.bdd));
+}
+
+Bdd
+Bdd::operator+=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_or(bdd, other.bdd);
+    return *this;
+}
+
+Bdd
+Bdd::operator|(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_or(bdd, other.bdd));
+}
+
+Bdd
+Bdd::operator|=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_or(bdd, other.bdd);
+    return *this;
+}
+
+Bdd
+Bdd::operator^(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_xor(bdd, other.bdd));
+}
+
+Bdd
+Bdd::operator^=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_xor(bdd, other.bdd);
+    return *this;
+}
+
+Bdd
+Bdd::operator-(const Bdd& other) const
+{
+    LACE_ME;
+    return Bdd(sylvan_and(bdd, sylvan_not(other.bdd)));
+}
+
+Bdd
+Bdd::operator-=(const Bdd& other)
+{
+    LACE_ME;
+    bdd = sylvan_and(bdd, sylvan_not(other.bdd));
+    return *this;
+}
+
+Bdd
+Bdd::AndAbstract(const Bdd &g, const BddSet &cube) const
+{
+    LACE_ME;
+    return sylvan_and_exists(bdd, g.bdd, cube.set.bdd);
+}
+
+Bdd
+Bdd::ExistAbstract(const BddSet &cube) const
+{
+    LACE_ME;
+    return sylvan_exists(bdd, cube.set.bdd);
+}
+
+Bdd
+Bdd::UnivAbstract(const BddSet &cube) const
+{
+    LACE_ME;
+    return sylvan_forall(bdd, cube.set.bdd);
+}
+
+Bdd
+Bdd::Ite(const Bdd &g, const Bdd &h) const
+{
+    LACE_ME;
+    return sylvan_ite(bdd, g.bdd, h.bdd);
+}
+
+Bdd
+Bdd::And(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_and(bdd, g.bdd);
+}
+
+Bdd
+Bdd::Or(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_or(bdd, g.bdd);
+}
+
+Bdd
+Bdd::Nand(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_nand(bdd, g.bdd);
+}
+
+Bdd
+Bdd::Nor(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_nor(bdd, g.bdd);
+}
+
+Bdd
+Bdd::Xor(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_xor(bdd, g.bdd);
+}
+
+Bdd
+Bdd::Xnor(const Bdd &g) const
+{
+    LACE_ME;
+    return sylvan_equiv(bdd, g.bdd);
+}
+
+int
+Bdd::Leq(const Bdd &g) const
+{
+    // TODO: better implementation, since we are not interested in the BDD result
+    LACE_ME;
+    BDD r = sylvan_ite(bdd, sylvan_not(g.bdd), sylvan_false);
+    return r == sylvan_false;
+}
+
+Bdd
+Bdd::RelPrev(const Bdd& relation, const BddSet& cube) const
+{
+    LACE_ME;
+    return sylvan_relprev(relation.bdd, bdd, cube.set.bdd);
+}
+
+Bdd
+Bdd::RelNext(const Bdd &relation, const BddSet &cube) const
+{
+    LACE_ME;
+    return sylvan_relnext(bdd, relation.bdd, cube.set.bdd);
+}
+
+Bdd
+Bdd::Closure() const
+{
+    LACE_ME;
+    return sylvan_closure(bdd);
+}
+
+Bdd
+Bdd::Constrain(const Bdd &c) const
+{
+    LACE_ME;
+    return sylvan_constrain(bdd, c.bdd);
+}
+
+Bdd
+Bdd::Restrict(const Bdd &c) const
+{
+    LACE_ME;
+    return sylvan_restrict(bdd, c.bdd);
+}
+
+Bdd
+Bdd::Compose(const BddMap &m) const
+{
+    LACE_ME;
+    return sylvan_compose(bdd, m.bdd);
+}
+
+Bdd
+Bdd::Permute(const std::vector<uint32_t>& from, const std::vector<uint32_t>& to) const
+{
+    LACE_ME;
+
+    /* Create a map */
+    BddMap map;
+    for (int i=from.size()-1; i>=0; i--) {
+        map.put(from[i], Bdd::bddVar(to[i]));
+    }
+
+    return sylvan_compose(bdd, map.bdd);
+}
+
+Bdd
+Bdd::Support() const
+{
+    LACE_ME;
+    return sylvan_support(bdd);
+}
+
+BDD
+Bdd::GetBDD() const
+{
+    return bdd;
+}
+
+void
+Bdd::PrintDot(FILE *out) const
+{
+    sylvan_fprintdot(out, bdd);
+}
+
+void
+Bdd::GetShaHash(char *string) const
+{
+    sylvan_getsha(bdd, string);
+}
+
+std::string
+Bdd::GetShaHash() const
+{
+    char buf[65];
+    sylvan_getsha(bdd, buf);
+    return std::string(buf);
+}
+
+double
+Bdd::SatCount(const BddSet &variables) const
+{
+    LACE_ME;
+    return sylvan_satcount(bdd, variables.set.bdd);
+}
+
+double
+Bdd::SatCount(size_t nvars) const
+{
+    LACE_ME;
+    // Note: the mtbdd_satcount can be called without initializing the MTBDD module.
+    return mtbdd_satcount(bdd, nvars);
+}
+
+void
+Bdd::PickOneCube(const BddSet &variables, uint8_t *values) const
+{
+    LACE_ME;
+    sylvan_sat_one(bdd, variables.set.bdd, values);
+}
+
+std::vector<bool>
+Bdd::PickOneCube(const BddSet &variables) const
+{
+    std::vector<bool> result = std::vector<bool>();
+
+    BDD bdd = this->bdd;
+    BDD vars = variables.set.bdd;
+
+    if (bdd == sylvan_false) return result;
+
+    for (; !sylvan_set_isempty(vars); vars = sylvan_set_next(vars)) {
+        uint32_t var = sylvan_set_var(vars);
+        if (bdd == sylvan_true) {
+            // pick 0
+            result.push_back(false);
+        } else {
+            if (sylvan_var(bdd) != var) {
+                // pick 0
+                result.push_back(false);
+            } else {
+                if (sylvan_low(bdd) == sylvan_false) {
+                    // pick 1
+                    result.push_back(true);
+                    bdd = sylvan_high(bdd);
+                } else {
+                    // pick 0
+                    result.push_back(false);
+                    bdd = sylvan_low(bdd);
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+Bdd
+Bdd::PickOneCube() const
+{
+    LACE_ME;
+    return Bdd(sylvan_sat_one_bdd(bdd));
+}
+
+Bdd
+Bdd::UnionCube(const BddSet &variables, uint8_t *values) const
+{
+    LACE_ME;
+    return sylvan_union_cube(bdd, variables.set.bdd, values);
+}
+
+Bdd
+Bdd::UnionCube(const BddSet &variables, std::vector<uint8_t> values) const
+{
+    LACE_ME;
+    uint8_t *data = values.data();
+    return sylvan_union_cube(bdd, variables.set.bdd, data);
+}
+
+/**
+ * @brief Generate a cube representing a set of variables
+ */
+Bdd
+Bdd::VectorCube(const std::vector<Bdd> variables)
+{
+    Bdd result = Bdd::bddOne();
+    for (int i=variables.size()-1; i>=0; i--) {
+        result *= variables[i];
+    }
+    return result;
+}
+
+/**
+ * @brief Generate a cube representing a set of variables
+ */
+Bdd
+Bdd::VariablesCube(std::vector<uint32_t> variables)
+{
+    BDD result = sylvan_true;
+    for (int i=variables.size()-1; i>=0; i--) {
+        result = sylvan_makenode(variables[i], sylvan_false, result);
+    }
+    return result;
+}
+
+size_t
+Bdd::NodeCount() const
+{
+    return sylvan_nodecount(bdd);
+}
+
+Bdd
+Bdd::bddOne()
+{
+    return sylvan_true;
+}
+
+Bdd
+Bdd::bddZero()
+{
+    return sylvan_false;
+}
+
+Bdd
+Bdd::bddVar(uint32_t index)
+{
+    LACE_ME;
+    return sylvan_ithvar(index);
+}
+
+Bdd
+Bdd::bddCube(const BddSet &variables, uint8_t *values)
+{
+    LACE_ME;
+    return sylvan_cube(variables.set.bdd, values);
+}
+
+Bdd
+Bdd::bddCube(const BddSet &variables, std::vector<uint8_t> values)
+{
+    LACE_ME;
+    uint8_t *data = values.data();
+    return sylvan_cube(variables.set.bdd, data);
+}
+
+int
+Bdd::isConstant() const
+{
+    return bdd == sylvan_true || bdd == sylvan_false;
+}
+
+int
+Bdd::isTerminal() const
+{
+    return bdd == sylvan_true || bdd == sylvan_false;
+}
+
+int
+Bdd::isOne() const
+{
+    return bdd == sylvan_true;
+}
+
+int
+Bdd::isZero() const
+{
+    return bdd == sylvan_false;
+}
+
+uint32_t
+Bdd::TopVar() const
+{
+    return sylvan_var(bdd);
+}
+
+Bdd
+Bdd::Then() const
+{
+    return Bdd(sylvan_high(bdd));
+}
+
+Bdd
+Bdd::Else() const
+{
+    return Bdd(sylvan_low(bdd));
+}
+
+/***
+ * Implementation of class BddMap
+ */
+
+BddMap::BddMap(uint32_t key_variable, const Bdd value)
+{
+    bdd = sylvan_map_add(sylvan_map_empty(), key_variable, value.bdd);
+}
+
+
+BddMap
+BddMap::operator+(const Bdd& other) const
+{
+    return BddMap(sylvan_map_addall(bdd, other.bdd));
+}
+
+BddMap
+BddMap::operator+=(const Bdd& other)
+{
+    bdd = sylvan_map_addall(bdd, other.bdd);
+    return *this;
+}
+
+BddMap
+BddMap::operator-(const Bdd& other) const
+{
+    return BddMap(sylvan_map_removeall(bdd, other.bdd));
+}
+
+BddMap
+BddMap::operator-=(const Bdd& other)
+{
+    bdd = sylvan_map_removeall(bdd, other.bdd);
+    return *this;
+}
+
+void
+BddMap::put(uint32_t key, Bdd value)
+{
+    bdd = sylvan_map_add(bdd, key, value.bdd);
+}
+
+void
+BddMap::removeKey(uint32_t key)
+{
+    bdd = sylvan_map_remove(bdd, key);
+}
+
+size_t
+BddMap::size() const
+{
+    return sylvan_map_count(bdd);
+}
+
+int
+BddMap::isEmpty() const
+{
+    return sylvan_map_isempty(bdd);
+}
+
+
+/***
+ * Implementation of class Mtbdd
+ */
+
+Mtbdd
+Mtbdd::int64Terminal(int64_t value)
+{
+    return mtbdd_int64(value);
+}
+
+Mtbdd
+Mtbdd::doubleTerminal(double value)
+{
+    return mtbdd_double(value);
+}
+
+Mtbdd
+Mtbdd::fractionTerminal(int64_t nominator, uint64_t denominator)
+{
+    return mtbdd_fraction(nominator, denominator);
+}
+
+Mtbdd
+Mtbdd::terminal(uint32_t type, uint64_t value)
+{
+    return mtbdd_makeleaf(type, value);
+}
+
+Mtbdd
+Mtbdd::mtbddVar(uint32_t variable)
+{
+    return mtbdd_makenode(variable, mtbdd_false, mtbdd_true);
+}
+
+Mtbdd
+Mtbdd::mtbddOne()
+{
+    return mtbdd_true;
+}
+
+Mtbdd
+Mtbdd::mtbddZero()
+{
+    return mtbdd_false;
+}
+
+Mtbdd
+Mtbdd::mtbddCube(const BddSet &variables, uint8_t *values, const Mtbdd &terminal)
+{
+    LACE_ME;
+    return mtbdd_cube(variables.set.bdd, values, terminal.mtbdd);
+}
+
+Mtbdd
+Mtbdd::mtbddCube(const BddSet &variables, std::vector<uint8_t> values, const Mtbdd &terminal)
+{
+    LACE_ME;
+    uint8_t *data = values.data();
+    return mtbdd_cube(variables.set.bdd, data, terminal.mtbdd);
+}
+
+int
+Mtbdd::isTerminal() const
+{
+    return mtbdd_isleaf(mtbdd);
+}
+
+int
+Mtbdd::isLeaf() const
+{
+    return mtbdd_isleaf(mtbdd);
+}
+
+int
+Mtbdd::isOne() const
+{
+    return mtbdd == mtbdd_true;
+}
+
+int
+Mtbdd::isZero() const
+{
+    return mtbdd == mtbdd_false;
+}
+
+uint32_t
+Mtbdd::TopVar() const
+{
+    return mtbdd_getvar(mtbdd);
+}
+
+Mtbdd
+Mtbdd::Then() const
+{
+    return mtbdd_isnode(mtbdd) ? mtbdd_gethigh(mtbdd) : mtbdd;
+}
+
+Mtbdd
+Mtbdd::Else() const
+{
+    return mtbdd_isnode(mtbdd) ? mtbdd_getlow(mtbdd) : mtbdd;
+}
+
+Mtbdd
+Mtbdd::Negate() const
+{
+    LACE_ME;
+    return mtbdd_negate(mtbdd);
+}
+
+Mtbdd
+Mtbdd::Apply(const Mtbdd &other, mtbdd_apply_op op) const
+{
+    LACE_ME;
+    return mtbdd_apply(mtbdd, other.mtbdd, op);
+}
+
+Mtbdd
+Mtbdd::UApply(mtbdd_uapply_op op, size_t param) const
+{
+    LACE_ME;
+    return mtbdd_uapply(mtbdd, op, param);
+}
+
+Mtbdd
+Mtbdd::Abstract(const BddSet &variables, mtbdd_abstract_op op) const
+{
+    LACE_ME;
+    return mtbdd_abstract(mtbdd, variables.set.bdd, op);
+}
+
+Mtbdd
+Mtbdd::Ite(const Mtbdd &g, const Mtbdd &h) const
+{
+    LACE_ME;
+    return mtbdd_ite(mtbdd, g.mtbdd, h.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Plus(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_plus(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Times(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_times(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Min(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_min(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Max(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_max(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::AbstractPlus(const BddSet &variables) const
+{
+    LACE_ME;
+    return mtbdd_abstract_plus(mtbdd, variables.set.bdd);
+}
+
+Mtbdd
+Mtbdd::AbstractTimes(const BddSet &variables) const
+{
+    LACE_ME;
+    return mtbdd_abstract_times(mtbdd, variables.set.bdd);
+}
+
+Mtbdd
+Mtbdd::AbstractMin(const BddSet &variables) const
+{
+    LACE_ME;
+    return mtbdd_abstract_min(mtbdd, variables.set.bdd);
+}
+
+Mtbdd
+Mtbdd::AbstractMax(const BddSet &variables) const
+{
+    LACE_ME;
+    return mtbdd_abstract_max(mtbdd, variables.set.bdd);
+}
+
+Mtbdd
+Mtbdd::AndExists(const Mtbdd &other, const BddSet &variables) const
+{
+    LACE_ME;
+    return mtbdd_and_exists(mtbdd, other.mtbdd, variables.set.bdd);
+}
+
+int
+Mtbdd::operator==(const Mtbdd& other) const
+{
+    return mtbdd == other.mtbdd;
+}
+
+int
+Mtbdd::operator!=(const Mtbdd& other) const
+{
+    return mtbdd != other.mtbdd;
+}
+
+Mtbdd
+Mtbdd::operator=(const Mtbdd& right)
+{
+    mtbdd = right.mtbdd;
+    return *this;
+}
+
+Mtbdd
+Mtbdd::operator!() const
+{
+    return mtbdd_not(mtbdd);
+}
+
+Mtbdd
+Mtbdd::operator~() const
+{
+    return mtbdd_not(mtbdd);
+}
+
+Mtbdd
+Mtbdd::operator*(const Mtbdd& other) const
+{
+    LACE_ME;
+    return mtbdd_times(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::operator*=(const Mtbdd& other)
+{
+    LACE_ME;
+    mtbdd = mtbdd_times(mtbdd, other.mtbdd);
+    return *this;
+}
+
+Mtbdd
+Mtbdd::operator+(const Mtbdd& other) const
+{
+    LACE_ME;
+    return mtbdd_plus(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::operator+=(const Mtbdd& other)
+{
+    LACE_ME;
+    mtbdd = mtbdd_plus(mtbdd, other.mtbdd);
+    return *this;
+}
+
+Mtbdd
+Mtbdd::operator-(const Mtbdd& other) const
+{
+    LACE_ME;
+    return mtbdd_minus(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::operator-=(const Mtbdd& other)
+{
+    LACE_ME;
+    mtbdd = mtbdd_minus(mtbdd, other.mtbdd);
+    return *this;
+}
+
+Mtbdd
+Mtbdd::MtbddThreshold(double value) const
+{
+    LACE_ME;
+    return mtbdd_threshold_double(mtbdd, value);
+}
+
+Mtbdd
+Mtbdd::MtbddStrictThreshold(double value) const
+{
+    LACE_ME;
+    return mtbdd_strict_threshold_double(mtbdd, value);
+}
+
+Bdd
+Mtbdd::BddThreshold(double value) const
+{
+    LACE_ME;
+    return mtbdd_threshold_double(mtbdd, value);
+}
+
+Bdd
+Mtbdd::BddStrictThreshold(double value) const
+{
+    LACE_ME;
+    return mtbdd_strict_threshold_double(mtbdd, value);
+}
+
+Mtbdd
+Mtbdd::Support() const
+{
+    LACE_ME;
+    return mtbdd_support(mtbdd);
+}
+
+MTBDD
+Mtbdd::GetMTBDD() const
+{
+    return mtbdd;
+}
+
+Mtbdd
+Mtbdd::Compose(MtbddMap &m) const
+{
+    LACE_ME;
+    return mtbdd_compose(mtbdd, m.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Permute(const std::vector<uint32_t>& from, const std::vector<uint32_t>& to) const
+{
+    LACE_ME;
+
+    /* Create a map */
+    MtbddMap map;
+    for (int i=from.size()-1; i>=0; i--) {
+        map.put(from[i], Bdd::bddVar(to[i]));
+    }
+
+    return mtbdd_compose(mtbdd, map.mtbdd);
+}
+
+double
+Mtbdd::SatCount(size_t nvars) const
+{
+    LACE_ME;
+    return mtbdd_satcount(mtbdd, nvars);
+}
+
+double
+Mtbdd::SatCount(const BddSet &variables) const
+{
+    return SatCount(sylvan_set_count(variables.set.bdd));
+}
+
+size_t
+Mtbdd::NodeCount() const
+{
+    LACE_ME;
+    return mtbdd_nodecount(mtbdd);
+}
+
+
+/***
+ * Implementation of class MtbddMap
+ */
+
+MtbddMap::MtbddMap(uint32_t key_variable, Mtbdd value)
+{
+    mtbdd = mtbdd_map_add(mtbdd_map_empty(), key_variable, value.mtbdd);
+}
+
+MtbddMap
+MtbddMap::operator+(const Mtbdd& other) const
+{
+    return MtbddMap(mtbdd_map_addall(mtbdd, other.mtbdd));
+}
+
+MtbddMap
+MtbddMap::operator+=(const Mtbdd& other)
+{
+    mtbdd = mtbdd_map_addall(mtbdd, other.mtbdd);
+    return *this;
+}
+
+MtbddMap
+MtbddMap::operator-(const Mtbdd& other) const
+{
+    return MtbddMap(mtbdd_map_removeall(mtbdd, other.mtbdd));
+}
+
+MtbddMap
+MtbddMap::operator-=(const Mtbdd& other)
+{
+    mtbdd = mtbdd_map_removeall(mtbdd, other.mtbdd);
+    return *this;
+}
+
+void
+MtbddMap::put(uint32_t key, Mtbdd value)
+{
+    mtbdd = mtbdd_map_add(mtbdd, key, value.mtbdd);
+}
+
+void
+MtbddMap::removeKey(uint32_t key)
+{
+    mtbdd = mtbdd_map_remove(mtbdd, key);
+}
+
+size_t
+MtbddMap::size()
+{
+    return mtbdd_map_count(mtbdd);
+}
+
+int
+MtbddMap::isEmpty()
+{
+    return mtbdd_map_isempty(mtbdd);
+}
+
+
+/***
+ * Implementation of class Sylvan
+ */
+
+void
+Sylvan::initPackage(size_t initialTableSize, size_t maxTableSize, size_t initialCacheSize, size_t maxCacheSize)
+{
+    sylvan_init_package(initialTableSize, maxTableSize, initialCacheSize, maxCacheSize);
+}
+
+void
+Sylvan::initBdd(int granularity)
+{
+    sylvan_init_bdd(granularity);
+}
+
+void
+Sylvan::initMtbdd()
+{
+    sylvan_init_mtbdd();
+}
+
+void
+Sylvan::quitPackage()
+{
+    sylvan_quit();
+}
+
+#include "sylvan_obj_storm.cpp"
diff --git a/src/sylvan_obj.hpp b/src/sylvan_obj.hpp
new file mode 100644
index 000000000..e7f77f6ca
--- /dev/null
+++ b/src/sylvan_obj.hpp
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2011-2015 Formal Methods and Tools, University of Twente
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SYLVAN_OBJ_H
+#define SYLVAN_OBJ_H
+
+#include <string>
+#include <vector>
+
+#include <lace.h>
+#include <sylvan.h>
+
+namespace sylvan {
+
+class BddSet;
+class BddMap;
+class Mtbdd;
+
+class Bdd {
+    friend class Sylvan;
+    friend class BddSet;
+    friend class BddMap;
+    friend class Mtbdd;
+
+public:
+    Bdd() { bdd = sylvan_false; sylvan_protect(&bdd); }
+    Bdd(const BDD from) : bdd(from) { sylvan_protect(&bdd); }
+    Bdd(const Bdd &from) : bdd(from.bdd) { sylvan_protect(&bdd); }
+    Bdd(const uint32_t var) { bdd = sylvan_ithvar(var); sylvan_protect(&bdd); }
+    ~Bdd() { sylvan_unprotect(&bdd); }
+
+    /**
+     * @brief Creates a Bdd representing just the variable index in its positive form
+     * The variable index must be a 0<=index<=2^23 (we use 24 bits internally)
+     */
+    static Bdd bddVar(uint32_t index);
+
+    /**
+     * @brief Returns the Bdd representing "True"
+     */
+    static Bdd bddOne();
+
+    /**
+     * @brief Returns the Bdd representing "False"
+     */
+    static Bdd bddZero();
+
+    /**
+     * @brief Returns the Bdd representing a cube of variables, according to the given values.
+     * @param variables the variables that will be in the cube in their positive or negative form
+     * @param values a character array describing how the variables will appear in the result
+     * The length of string must be equal to the number of variables in the cube.
+     * For every ith char in string, if it is 0, the corresponding variable will appear in its negative form,
+     * if it is 1, it will appear in its positive form, and if it is 2, it will appear as "any", thus it will
+     * be skipped.
+     */
+    static Bdd bddCube(const BddSet &variables, unsigned char *values);
+
+    /**
+     * @brief Returns the Bdd representing a cube of variables, according to the given values.
+     * @param variables the variables that will be in the cube in their positive or negative form
+     * @param string a character array describing how the variables will appear in the result
+     * The length of string must be equal to the number of variables in the cube.
+     * For every ith char in string, if it is 0, the corresponding variable will appear in its negative form,
+     * if it is 1, it will appear in its positive form, and if it is 2, it will appear as "any", thus it will
+     * be skipped.
+     */
+    static Bdd bddCube(const BddSet &variables, std::vector<uint8_t> values);
+
+    int operator==(const Bdd& other) const;
+    int operator!=(const Bdd& other) const;
+    Bdd operator=(const Bdd& right);
+    int operator<=(const Bdd& other) const;
+    int operator>=(const Bdd& other) const;
+    int operator<(const Bdd& other) const;
+    int operator>(const Bdd& other) const;
+    Bdd operator!() const;
+    Bdd operator~() const;
+    Bdd operator*(const Bdd& other) const;
+    Bdd operator*=(const Bdd& other);
+    Bdd operator&(const Bdd& other) const;
+    Bdd operator&=(const Bdd& other);
+    Bdd operator+(const Bdd& other) const;
+    Bdd operator+=(const Bdd& other);
+    Bdd operator|(const Bdd& other) const;
+    Bdd operator|=(const Bdd& other);
+    Bdd operator^(const Bdd& other) const;
+    Bdd operator^=(const Bdd& other);
+    Bdd operator-(const Bdd& other) const;
+    Bdd operator-=(const Bdd& other);
+
+    /**
+     * @brief Returns non-zero if this Bdd is bddOne() or bddZero()
+     */
+    int isConstant() const;
+
+    /**
+     * @brief Returns non-zero if this Bdd is bddOne() or bddZero()
+     */
+    int isTerminal() const;
+
+    /**
+     * @brief Returns non-zero if this Bdd is bddOne()
+     */
+    int isOne() const;
+
+    /**
+     * @brief Returns non-zero if this Bdd is bddZero()
+     */
+    int isZero() const;
+
+    /**
+     * @brief Returns the top variable index of this Bdd (the variable in the root node)
+     */
+    uint32_t TopVar() const;
+
+    /**
+     * @brief Follows the high edge ("then") of the root node of this Bdd
+     */
+    Bdd Then() const;
+
+    /**
+     * @brief Follows the low edge ("else") of the root node of this Bdd
+     */
+    Bdd Else() const;
+
+    /**
+     * @brief Computes \exists cube: f \and g
+     */
+    Bdd AndAbstract(const Bdd& g, const BddSet& cube) const;
+
+    /**
+     * @brief Computes \exists cube: f
+     */
+    Bdd ExistAbstract(const BddSet& cube) const;
+
+    /**
+     * @brief Computes \forall cube: f
+     */
+    Bdd UnivAbstract(const BddSet& cube) const;
+
+    /**
+     * @brief Computes if f then g else h
+     */
+    Bdd Ite(const Bdd& g, const Bdd& h) const;
+
+    /**
+     * @brief Computes f \and g
+     */
+    Bdd And(const Bdd& g) const;
+
+    /**
+     * @brief Computes f \or g
+     */
+    Bdd Or(const Bdd& g) const;
+
+    /**
+     * @brief Computes \not (f \and g)
+     */
+    Bdd Nand(const Bdd& g) const;
+
+    /**
+     * @brief Computes \not (f \or g)
+     */
+    Bdd Nor(const Bdd& g) const;
+
+    /**
+     * @brief Computes f \xor g
+     */
+    Bdd Xor(const Bdd& g) const;
+
+    /**
+     * @brief Computes \not (f \xor g), i.e. f \equiv g
+     */
+    Bdd Xnor(const Bdd& g) const;
+
+    /**
+     * @brief Returns whether all elements in f are also in g
+     */
+    int Leq(const Bdd& g) const;
+
+    /**
+     * @brief Computes the reverse application of a transition relation to this set.
+     * @param relation the transition relation to apply
+     * @param cube the variables that are in the transition relation
+     * This function assumes that s,t are interleaved with s even and t odd (s+1).
+     * Other variables in the relation are ignored (by existential quantification)
+     * Set cube to "false" (illegal cube) to assume all encountered variables are in s,t
+     *
+     * Use this function to concatenate two relations   --> -->
+     * or to take the 'previous' of a set               -->  S
+     */
+    Bdd RelPrev(const Bdd& relation, const BddSet& cube) const;
+
+    /**
+     * @brief Computes the application of a transition relation to this set.
+     * @param relation the transition relation to apply
+     * @param cube the variables that are in the transition relation
+     * This function assumes that s,t are interleaved with s even and t odd (s+1).
+     * Other variables in the relation are ignored (by existential quantification)
+     * Set cube to "false" (illegal cube) to assume all encountered variables are in s,t
+     *
+     * Use this function to take the 'next' of a set     S  -->
+     */
+    Bdd RelNext(const Bdd& relation, const BddSet& cube) const;
+
+    /**
+     * @brief Computes the transitive closure by traversing the BDD recursively.
+     * See Y. Matsunaga, P. C. McGeer, R. K. Brayton
+     *     On Computing the Transitive Closre of a State Transition Relation
+     *     30th ACM Design Automation Conference, 1993.
+     */
+    Bdd Closure() const;
+
+    /**
+     * @brief Computes the constrain f @ c
+     */
+    Bdd Constrain(const Bdd &c) const;
+
+    /**
+     * @brief Computes the BDD restrict according to Coudert and Madre's algorithm (ICCAD90).
+     */
+    Bdd Restrict(const Bdd &c) const;
+
+    /**
+     * @brief Functional composition. Whenever a variable v in the map m is found in the BDD,
+     *        it is substituted by the associated function.
+     * You can also use this function to implement variable reordering.
+     */
+    Bdd Compose(const BddMap &m) const;
+
+    /**
+     * @brief Substitute all variables in the array from by the corresponding variables in to.
+     */
+    Bdd Permute(const std::vector<uint32_t>& from, const std::vector<uint32_t>& to) const;
+
+    /**
+     * @brief Computes the support of a Bdd.
+     */
+    Bdd Support() const;
+
+    /**
+     * @brief Gets the BDD of this Bdd (for C functions)
+     */
+    BDD GetBDD() const;
+
+    /**
+     * @brief Writes .dot file of this Bdd. Not thread-safe!
+     */
+    void PrintDot(FILE *out) const;
+
+    /**
+     * @brief Gets a SHA2 hash that describes the structure of this Bdd.
+     * @param string a character array of at least 65 characters (includes zero-termination)
+     * This hash is 64 characters long and is independent of the memory locations of BDD nodes.
+     */
+    void GetShaHash(char *string) const;
+
+    std::string GetShaHash() const;
+
+    /**
+     * @brief Computes the number of satisfying variable assignments, using variables in cube.
+     */
+    double SatCount(const BddSet &cube) const;
+
+    /**
+     * @brief Compute the number of satisfying variable assignments, using the given number of variables.
+     */
+    double SatCount(const size_t nvars) const;
+
+    /**
+     * @brief Gets one satisfying assignment according to the variables.
+     * @param variables The set of variables to be assigned, must include the support of the Bdd.
+     */
+    void PickOneCube(const BddSet &variables, uint8_t *string) const;
+
+    /**
+     * @brief Gets one satisfying assignment according to the variables.
+     * @param variables The set of variables to be assigned, must include the support of the Bdd.
+     * Returns an empty vector when either this Bdd equals bddZero() or the cube is empty.
+     */
+    std::vector<bool> PickOneCube(const BddSet &variables) const;
+
+    /**
+     * @brief Gets a cube that satisfies this Bdd.
+     */
+    Bdd PickOneCube() const;
+
+    /**
+     * @brief Faster version of: *this + Sylvan::bddCube(variables, values);
+     */
+    Bdd UnionCube(const BddSet &variables, uint8_t *values) const;
+
+    /**
+     * @brief Faster version of: *this + Sylvan::bddCube(variables, values);
+     */
+    Bdd UnionCube(const BddSet &variables, std::vector<uint8_t> values) const;
+
+    /**
+     * @brief Generate a cube representing a set of variables
+     */
+    static Bdd VectorCube(const std::vector<Bdd> variables);
+
+    /**
+     * @brief Generate a cube representing a set of variables
+     * @param variables An sorted set of variable indices
+     */
+    static Bdd VariablesCube(const std::vector<uint32_t> variables);
+
+    /**
+     * @brief Gets the number of nodes in this Bdd. Not thread-safe!
+     */
+    size_t NodeCount() const;
+
+#include "sylvan_obj_bdd_storm.hpp"
+
+private:
+    BDD bdd;
+};
+
+class BddSet
+{
+    friend class Bdd;
+    friend class Mtbdd;
+    Bdd set;
+
+public:
+    /**
+     * @brief Create a new empty set.
+     */
+    BddSet() : set(Bdd::bddOne()) {}
+
+    /**
+     * @brief Wrap the BDD cube <other> in a set.
+     */
+    BddSet(const Bdd &other) : set(other) {}
+
+    /**
+     * @brief Create a copy of the set <other>.
+     */
+    BddSet(const BddSet &other) : set(other.set) {}
+
+    /**
+     * @brief Add the variable <variable> to this set.
+     */
+    void add(uint32_t variable) {
+        set *= Bdd::bddVar(variable);
+    }
+
+    /**
+     * @brief Add all variables in the set <other> to this set.
+     */
+    void add(BddSet &other) {
+        set *= other.set;
+    }
+
+    /**
+     * @brief Remove the variable <variable> from this set.
+     */
+    void remove(uint32_t variable) {
+        set = set.ExistAbstract(Bdd::bddVar(variable));
+    }
+
+    /**
+     * @brief Remove all variables in the set <other> from this set.
+     */
+    void remove(BddSet &other) {
+        set = set.ExistAbstract(other.set);
+    }
+
+    /**
+     * @brief Retrieve the head of the set. (The first variable.)
+     */
+    uint32_t TopVar() const {
+        return set.TopVar();
+    }
+
+    /**
+     * @brief Retrieve the tail of the set. (The set containing all but the first variables.)
+     */
+    BddSet Next() const {
+        Bdd then = set.Then();
+        return BddSet(then);
+    }
+
+    /**
+     * @brief Return true if this set is empty, or false otherwise.
+     */
+    bool isEmpty() const {
+        return set.isOne();
+    }
+
+    /**
+     * @brief Return true if this set contains the variable <variable>, or false otherwise.
+     */
+    bool contains(uint32_t variable) const {
+        if (isEmpty()) return false;
+        else if (TopVar() == variable) return true;
+        else return Next().contains(variable);
+    }
+
+    /**
+     * @brief Return the number of variables in this set.
+     */
+    size_t size() const {
+        if (isEmpty()) return 0;
+        else return 1 + Next().size();
+    }
+
+    /**
+     * @brief Create a set containing the <length> variables in <arr>.
+     * It is advised to have the variables in <arr> in ascending order.
+     */
+    static BddSet fromArray(BDDVAR *arr, size_t length) {
+        BddSet set;
+        for (size_t i = 0; i < length; i++) {
+            set.add(arr[length-i-1]);
+        }
+        return set;
+    }
+
+    /**
+     * @brief Create a set containing the variables in <variables>.
+     * It is advised to have the variables in <arr> in ascending order.
+     */
+    static BddSet fromVector(const std::vector<Bdd> variables) {
+        BddSet set;
+        for (int i=variables.size()-1; i>=0; i--) {
+            set.set *= variables[i];
+        }
+        return set;
+    }
+
+    /**
+     * @brief Create a set containing the variables in <variables>.
+     * It is advised to have the variables in <arr> in ascending order.
+     */
+    static BddSet fromVector(const std::vector<uint32_t> variables) {
+        BddSet set;
+        for (int i=variables.size()-1; i>=0; i--) {
+            set.add(variables[i]);
+        }
+        return set;
+    }
+
+    /**
+     * @brief Write all variables in this set to <arr>.
+     * @param arr An array of at least size this.size().
+     */
+    void toArray(BDDVAR *arr) const {
+        if (!isEmpty()) {
+            *arr = TopVar();
+            Next().toArray(arr+1);
+        }
+    }
+
+    /**
+     * @brief Return the vector of all variables in this set.
+     */
+    std::vector<uint32_t> toVector() const {
+        std::vector<uint32_t> result;
+        Bdd x = set;
+        while (!x.isOne()) {
+            result.push_back(x.TopVar());
+            x = x.Then();
+        }
+        return result;
+    }
+};
+
+class BddMap
+{
+    friend class Bdd;
+    BDD bdd;
+    BddMap(const BDD from) : bdd(from) { sylvan_protect(&bdd); }
+    BddMap(const Bdd &from) : bdd(from.bdd) { sylvan_protect(&bdd); }
+public:
+    BddMap() : bdd(sylvan_map_empty()) { sylvan_protect(&bdd); }
+    ~BddMap() { sylvan_unprotect(&bdd); }
+
+    BddMap(uint32_t key_variable, const Bdd value);
+
+    BddMap operator+(const Bdd& other) const;
+    BddMap operator+=(const Bdd& other);
+    BddMap operator-(const Bdd& other) const;
+    BddMap operator-=(const Bdd& other);
+
+    /**
+     * @brief Adds a key-value pair to the map
+     */
+    void put(uint32_t key, Bdd value);
+
+    /**
+     * @brief Removes a key-value pair from the map
+     */
+    void removeKey(uint32_t key);
+
+    /**
+     * @brief Returns the number of key-value pairs in this map
+     */
+    size_t size() const;
+
+    /**
+     * @brief Returns non-zero when this map is empty
+     */
+    int isEmpty() const;
+};
+
+class MtbddMap;
+
+class Mtbdd {
+    friend class Sylvan;
+    friend class MtbddMap;
+
+public:
+    Mtbdd() { mtbdd = sylvan_false; mtbdd_protect(&mtbdd); }
+    Mtbdd(const MTBDD from) : mtbdd(from) { mtbdd_protect(&mtbdd); }
+    Mtbdd(const Mtbdd &from) : mtbdd(from.mtbdd) { mtbdd_protect(&mtbdd); }
+    Mtbdd(const Bdd &from) : mtbdd(from.bdd) { mtbdd_protect(&mtbdd); }
+    ~Mtbdd() { mtbdd_unprotect(&mtbdd); }
+
+    /**
+     * @brief Creates a Mtbdd leaf representing the int64 value <value>
+     */
+    static Mtbdd int64Terminal(int64_t value);
+
+    /**
+     * @brief Creates a Mtbdd leaf representing the floating-point value <value>
+     */
+    static Mtbdd doubleTerminal(double value);
+
+    /**
+     * @brief Creates a Mtbdd leaf representing the fraction value <nominator>/<denominator>
+     * Internally, Sylvan uses 32-bit values and reports overflows to stderr.
+     */
+    static Mtbdd fractionTerminal(int64_t nominator, uint64_t denominator);
+
+    /**
+     * @brief Creates a Mtbdd leaf of type <type> holding value <value>
+     * This is useful for custom Mtbdd types.
+     */
+    static Mtbdd terminal(uint32_t type, uint64_t value);
+
+    /**
+     * @brief Creates a Boolean Mtbdd representing jsut the variable index in its positive form
+     * The variable index must be 0<=index<=2^23 (Sylvan uses 24 bits internally)
+     */
+    static Mtbdd mtbddVar(uint32_t variable);
+
+    /**
+     * @brief Returns the Boolean Mtbdd representing "True"
+     */
+    static Mtbdd mtbddOne();
+
+    /**
+     * @brief Returns the Boolean Mtbdd representing "False"
+     */
+    static Mtbdd mtbddZero();
+
+    /**
+     * @brief Returns the Mtbdd representing a cube of variables, according to the given values.
+     * @param variables the variables that will be in the cube in their positive or negative form
+     * @param values a character array describing how the variables will appear in the result
+     * @param terminal the leaf of the cube
+     * The length of string must be equal to the number of variables in the cube.
+     * For every ith char in string, if it is 0, the corresponding variable will appear in its negative form,
+     * if it is 1, it will appear in its positive form, and if it is 2, it will appear as "any", thus it will
+     * be skipped.
+     */
+    static Mtbdd mtbddCube(const BddSet &variables, unsigned char *values, const Mtbdd &terminal);
+
+     /**
+     * @brief Returns the Mtbdd representing a cube of variables, according to the given values.
+     * @param variables the variables that will be in the cube in their positive or negative form
+     * @param values a character array describing how the variables will appear in the result
+     * @param terminal the leaf of the cube
+     * The length of string must be equal to the number of variables in the cube.
+     * For every ith char in string, if it is 0, the corresponding variable will appear in its negative form,
+     * if it is 1, it will appear in its positive form, and if it is 2, it will appear as "any", thus it will
+     * be skipped.
+     */
+    static Mtbdd mtbddCube(const BddSet &variables, std::vector<uint8_t> values, const Mtbdd &terminal);
+
+    int operator==(const Mtbdd& other) const;
+    int operator!=(const Mtbdd& other) const;
+    Mtbdd operator=(const Mtbdd& right);
+    Mtbdd operator!() const;
+    Mtbdd operator~() const;
+    Mtbdd operator*(const Mtbdd& other) const;
+    Mtbdd operator*=(const Mtbdd& other);
+    Mtbdd operator+(const Mtbdd& other) const;
+    Mtbdd operator+=(const Mtbdd& other);
+    Mtbdd operator-(const Mtbdd& other) const;
+    Mtbdd operator-=(const Mtbdd& other);
+
+    // not implemented (compared to Bdd): <=, >=, <, >, &, &=, |, |=, ^, ^=
+
+    /**
+     * @brief Returns non-zero if this Mtbdd is a leaf
+     */
+    int isTerminal() const;
+
+    /**
+     * @brief Returns non-zero if this Mtbdd is a leaf
+     */
+    int isLeaf() const;
+
+    /**
+     * @brief Returns non-zero if this Mtbdd is mtbddOne()
+     */
+    int isOne() const;
+
+    /**
+     * @brief Returns non-zero if this Mtbdd is mtbddZero()
+     */
+    int isZero() const;
+
+    /**
+     * @brief Returns the top variable index of this Mtbdd (the variable in the root node)
+     */
+    uint32_t TopVar() const;
+
+    /**
+     * @brief Follows the high edge ("then") of the root node of this Mtbdd
+     */
+    Mtbdd Then() const;
+
+    /**
+     * @brief Follows the low edge ("else") of the root node of this Mtbdd
+     */
+    Mtbdd Else() const;
+
+    /**
+     * @brief Returns the negation of the MTBDD (every terminal negative)
+     * Do not use this for Boolean MTBDDs, only for Integer/Double/Fraction MTBDDs.
+     */
+    Mtbdd Negate() const;
+
+    /**
+     * @brief Applies the binary operation <op>
+     */
+    Mtbdd Apply(const Mtbdd &other, mtbdd_apply_op op) const;
+
+    /**
+     * @brief Applies the unary operation <op> with parameter <param>
+     */
+    Mtbdd UApply(mtbdd_uapply_op op, size_t param) const;
+
+    /**
+     * @brief Computers the abstraction on variables <variables> using operator <op>.
+     * See also: AbstractPlus, AbstractTimes, AbstractMin, AbstractMax
+     */
+    Mtbdd Abstract(const BddSet &variables, mtbdd_abstract_op op) const;
+
+    /**
+     * @brief Computes if f then g else h
+     * This Mtbdd must be a Boolean Mtbdd
+     */
+    Mtbdd Ite(const Mtbdd &g, const Mtbdd &h) const;
+
+    /**
+     * @brief Computes f + g
+     */
+    Mtbdd Plus(const Mtbdd &other) const;
+
+    /**
+     * @brief Computes f * g
+     */
+    Mtbdd Times(const Mtbdd &other) const;
+
+    /**
+     * @brief Computes min(f, g)
+     */
+    Mtbdd Min(const Mtbdd &other) const;
+
+    /**
+     * @brief Computes max(f, g)
+     */
+    Mtbdd Max(const Mtbdd &other) const;
+
+    /**
+     * @brief Computes abstraction by summation (existential quantification)
+     */
+    Mtbdd AbstractPlus(const BddSet &variables) const;
+
+    /**
+     * @brief Computes abstraction by multiplication (universal quantification)
+     */
+    Mtbdd AbstractTimes(const BddSet &variables) const;
+
+    /**
+     * @brief Computes abstraction by minimum
+     */
+    Mtbdd AbstractMin(const BddSet &variables) const;
+
+    /**
+     * @brief Computes abstraction by maximum
+     */
+    Mtbdd AbstractMax(const BddSet &variables) const;
+
+    /**
+     * @brief Computes abstraction by summation of f \times g
+     */
+    Mtbdd AndExists(const Mtbdd &other, const BddSet &variables) const;
+
+    /**
+     * @brief Convert floating-point/fraction Mtbdd to a Boolean Mtbdd, leaf >= value ? true : false
+     */
+    Mtbdd MtbddThreshold(double value) const;
+
+    /**
+     * @brief Convert floating-point/fraction Mtbdd to a Boolean Mtbdd, leaf > value ? true : false
+     */
+    Mtbdd MtbddStrictThreshold(double value) const;
+
+    /**
+     * @brief Convert floating-point/fraction Mtbdd to a Boolean Mtbdd, leaf >= value ? true : false
+     * Same as MtbddThreshold (Bdd = Boolean Mtbdd)
+     */
+    Bdd BddThreshold(double value) const;
+
+    /**
+     * @brief Convert floating-point/fraction Mtbdd to a Boolean Mtbdd, leaf > value ? true : false
+     * Same as MtbddStrictThreshold (Bdd = Boolean Mtbdd)
+     */
+    Bdd BddStrictThreshold(double value) const;
+
+    /**
+     * @brief Computes the support of a Mtbdd.
+     */
+    Mtbdd Support() const;
+
+    /**
+     * @brief Gets the MTBDD of this Mtbdd (for C functions)
+     */
+    MTBDD GetMTBDD() const;
+
+    /**
+     * @brief Functional composition. Whenever a variable v in the map m is found in the MTBDD,
+     *        it is substituted by the associated function (which should be a Boolean MTBDD)
+     * You can also use this function to implement variable reordering.
+     */
+    Mtbdd Compose(MtbddMap &m) const;
+
+    /**
+     * @brief Substitute all variables in the array from by the corresponding variables in to.
+     */
+    Mtbdd Permute(const std::vector<uint32_t>& from, const std::vector<uint32_t>& to) const;
+
+    /**
+     * @brief Compute the number of satisfying variable assignments, using variables in cube.
+     */
+    double SatCount(const BddSet &variables) const;
+
+    /**
+     * @brief Compute the number of satisfying variable assignments, using the given number of variables.
+     */
+    double SatCount(const size_t nvars) const;
+
+    /**
+     * @brief Gets the number of nodes in this Bdd. Not thread-safe!
+     */
+    size_t NodeCount() const;
+
+#include "sylvan_obj_mtbdd_storm.hpp"
+
+private:
+    MTBDD mtbdd;
+};
+
+class MtbddMap
+{
+    friend class Mtbdd;
+    MTBDD mtbdd;
+    MtbddMap(MTBDD from) : mtbdd(from) { mtbdd_protect(&mtbdd); }
+    MtbddMap(Mtbdd &from) : mtbdd(from.mtbdd) { mtbdd_protect(&mtbdd); }
+public:
+    MtbddMap() : mtbdd(mtbdd_map_empty()) { mtbdd_protect(&mtbdd); }
+    ~MtbddMap() { mtbdd_unprotect(&mtbdd); }
+
+    MtbddMap(uint32_t key_variable, Mtbdd value);
+
+    MtbddMap operator+(const Mtbdd& other) const;
+    MtbddMap operator+=(const Mtbdd& other);
+    MtbddMap operator-(const Mtbdd& other) const;
+    MtbddMap operator-=(const Mtbdd& other);
+
+    /**
+     * @brief Adds a key-value pair to the map
+     */
+    void put(uint32_t key, Mtbdd value);
+
+    /**
+     * @brief Removes a key-value pair from the map
+     */
+    void removeKey(uint32_t key);
+
+    /**
+     * @brief Returns the number of key-value pairs in this map
+     */
+    size_t size();
+
+    /**
+     * @brief Returns non-zero when this map is empty
+     */
+    int isEmpty();
+};
+
+class Sylvan {
+public:
+    /**
+     * @brief Initializes the Sylvan framework, call this only once in your program.
+     * @param initialTableSize The initial size of the nodes table. Must be a power of two.
+     * @param maxTableSize The maximum size of the nodes table. Must be a power of two.
+     * @param initialCacheSize The initial size of the operation cache. Must be a power of two.
+     * @param maxCacheSize The maximum size of the operation cache. Must be a power of two.
+     */
+    static void initPackage(size_t initialTableSize, size_t maxTableSize, size_t initialCacheSize, size_t maxCacheSize);
+
+    /**
+     * @brief Initializes the BDD module of the Sylvan framework.
+     * @param granularity determins operation cache behavior; for higher values (2+) it will use the operation cache less often.
+     * Values of 3-7 may result in better performance, since occasionally not using the operation cache is fine in practice.
+     * A granularity of 1 means that every BDD operation will be cached at every variable level.
+     */
+    static void initBdd(int granularity);
+
+    /**
+     * @brief Initializes the MTBDD module of the Sylvan framework.
+     */
+    static void initMtbdd();
+
+    /**
+     * @brief Frees all memory in use by Sylvan.
+     * Warning: if you have any Bdd objects which are not bddZero() or bddOne() after this, your program may crash!
+     */
+    static void quitPackage();
+};
+
+}
+
+#endif
diff --git a/src/sylvan_obj_bdd_storm.hpp b/src/sylvan_obj_bdd_storm.hpp
new file mode 100644
index 000000000..393ce988c
--- /dev/null
+++ b/src/sylvan_obj_bdd_storm.hpp
@@ -0,0 +1,3 @@
+    Mtbdd toDoubleMtbdd() const;
+    Mtbdd toInt64Mtbdd() const;
+    Mtbdd Ite(Mtbdd const& thenDd, Mtbdd const& elseDd) const;
diff --git a/src/sylvan_obj_mtbdd_storm.hpp b/src/sylvan_obj_mtbdd_storm.hpp
new file mode 100644
index 000000000..26e5ea4be
--- /dev/null
+++ b/src/sylvan_obj_mtbdd_storm.hpp
@@ -0,0 +1,51 @@
+    /**
+     * @brief Computes f - g
+     */
+    Mtbdd Minus(const Mtbdd &other) const;
+
+    /**
+     * @brief Computes f / g
+     */
+    Mtbdd Divide(const Mtbdd &other) const;
+    
+    Bdd NotZero() const;
+    
+    Bdd Equals(const Mtbdd& other) const;
+    
+    Bdd Less(const Mtbdd& other) const;
+
+    Bdd LessOrEqual(const Mtbdd& other) const;
+
+    Mtbdd Minimum() const;
+
+    Mtbdd Maximum() const;
+
+    bool EqualNorm(const Mtbdd& other, double epsilon) const;
+
+    bool EqualNormRel(const Mtbdd& other, double epsilon) const;
+    
+    Mtbdd Floor() const;
+
+    Mtbdd Ceil() const;
+    
+    Mtbdd Pow(const Mtbdd& other) const;
+
+    Mtbdd Mod(const Mtbdd& other) const;
+
+    Mtbdd Logxy(const Mtbdd& other) const;
+    
+    size_t CountLeaves() const;
+
+    /**
+     * @brief Compute the number of non-zero variable assignments, using variables in cube.
+     */
+    double NonZeroCount(size_t variableCount) const;
+
+    bool isValid() const;
+
+    /**
+     * @brief Writes .dot file of this Bdd. Not thread-safe!
+     */
+    void PrintDot(FILE *out) const;
+
+    std::string GetShaHash() const;
diff --git a/src/sylvan_obj_storm.cpp b/src/sylvan_obj_storm.cpp
new file mode 100644
index 000000000..27706dcdd
--- /dev/null
+++ b/src/sylvan_obj_storm.cpp
@@ -0,0 +1,141 @@
+Mtbdd
+Bdd::toDoubleMtbdd() const {
+    LACE_ME;
+    return mtbdd_bool_to_double(bdd);
+}
+
+Mtbdd
+Bdd::toInt64Mtbdd() const {
+    LACE_ME;
+    return mtbdd_bool_to_int64(bdd);
+}
+
+Mtbdd
+Bdd::Ite(Mtbdd const& thenDd, Mtbdd const& elseDd) const {
+    LACE_ME;
+    return mtbdd_ite(bdd, thenDd.GetMTBDD(), elseDd.GetMTBDD());
+}
+
+Mtbdd
+Mtbdd::Minus(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_minus(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Divide(const Mtbdd &other) const
+{
+    LACE_ME;
+    return mtbdd_divide(mtbdd, other.mtbdd);
+}
+
+Bdd
+Mtbdd::NotZero() const
+{
+    LACE_ME;
+    return mtbdd_not_zero(mtbdd);
+}
+
+Bdd
+Mtbdd::Equals(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_equals(mtbdd, other.mtbdd);
+}
+
+Bdd
+Mtbdd::Less(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_less_as_bdd(mtbdd, other.mtbdd);
+}
+
+Bdd
+Mtbdd::LessOrEqual(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_less_or_equal_as_bdd(mtbdd, other.mtbdd);
+}
+
+bool
+Mtbdd::EqualNorm(const Mtbdd& other, double epsilon) const {
+    LACE_ME;
+    return mtbdd_equal_norm_d(mtbdd, other.mtbdd, epsilon);
+}
+
+bool
+Mtbdd::EqualNormRel(const Mtbdd& other, double epsilon) const {
+    LACE_ME;
+    return mtbdd_equal_norm_rel_d(mtbdd, other.mtbdd, epsilon);
+}
+
+Mtbdd
+Mtbdd::Floor() const {
+    LACE_ME;
+    return mtbdd_floor(mtbdd);
+}
+
+Mtbdd
+Mtbdd::Ceil() const {
+    LACE_ME;
+    return mtbdd_ceil(mtbdd);
+}
+
+Mtbdd
+Mtbdd::Pow(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_pow(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Mod(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_mod(mtbdd, other.mtbdd);
+}
+
+Mtbdd
+Mtbdd::Logxy(const Mtbdd& other) const {
+    LACE_ME;
+    return mtbdd_logxy(mtbdd, other.mtbdd);
+}
+
+size_t
+Mtbdd::CountLeaves() const {
+    LACE_ME;
+    return mtbdd_leafcount(mtbdd);
+}
+
+double
+Mtbdd::NonZeroCount(size_t variableCount) const {
+    LACE_ME;
+    return mtbdd_non_zero_count(mtbdd, variableCount);
+}
+
+bool
+Mtbdd::isValid() const {
+    LACE_ME;
+    return mtbdd_test_isvalid(mtbdd) == 1;
+}
+
+Mtbdd
+Mtbdd::Minimum() const {
+    LACE_ME;
+    return mtbdd_minimum(mtbdd);
+}
+
+Mtbdd
+Mtbdd::Maximum() const {
+    LACE_ME;
+    return mtbdd_maximum(mtbdd);
+}
+
+void
+Mtbdd::PrintDot(FILE *out) const {
+    mtbdd_fprintdot(out, mtbdd, NULL);
+}
+
+std::string
+Mtbdd::GetShaHash() const {
+    char buf[65];
+    mtbdd_getsha(mtbdd, buf);
+    return std::string(buf);
+}
+
diff --git a/src/tls.h b/src/tls.h
new file mode 100644
index 000000000..80fdfe7e5
--- /dev/null
+++ b/src/tls.h
@@ -0,0 +1,35 @@
+/*
+ * Written by Josh Dybnis and released to the public domain, as explained at
+ * http://creativecommons.org/licenses/publicdomain
+ *
+ * A platform independant wrapper around thread-local storage. On platforms that don't support
+ * __thread variables (e.g. Mac OS X), we have to use the pthreads library for thread-local storage
+ */
+#include <assert.h>
+
+#ifndef TLS_H
+#define TLS_H
+
+#ifdef __ELF__ // use gcc thread-local storage (i.e. __thread variables)
+#define DECLARE_THREAD_LOCAL(name, type) __thread type name
+#define INIT_THREAD_LOCAL(name)
+#define SET_THREAD_LOCAL(name, value) name = value
+#define LOCALIZE_THREAD_LOCAL(name, type)
+
+#else//!__ELF__
+
+#include <pthread.h>
+
+#define DECLARE_THREAD_LOCAL(name, type) pthread_key_t name##_KEY
+
+#define INIT_THREAD_LOCAL(name) \
+    do { \
+        if (pthread_key_create(&name##_KEY, NULL) != 0) { assert(0); } \
+    } while (0)
+
+#define SET_THREAD_LOCAL(name, value) pthread_setspecific(name##_KEY, (void *)(size_t)value);
+
+#define LOCALIZE_THREAD_LOCAL(name, type) type name = (type)(size_t)pthread_getspecific(name##_KEY)
+
+#endif//__ELF__
+#endif//TLS_H
diff --git a/test/.gitignore b/test/.gitignore
new file mode 100644
index 000000000..e04430786
--- /dev/null
+++ b/test/.gitignore
@@ -0,0 +1,5 @@
+test
+cmake_install.cmake
+CMakeFiles
+*.o
+.libs
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 000000000..a331904e8
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 2.6)
+project(sylvan C CXX)
+enable_testing()
+
+add_executable(sylvan_test main.c)
+target_link_libraries(sylvan_test sylvan)
+
+add_executable(test_basic test_basic.c)
+target_link_libraries(test_basic sylvan)
+
+add_executable(test_cxx test_cxx.cpp)
+target_link_libraries(test_cxx sylvan stdc++)
+
+add_test(test_cxx test_cxx)
+add_test(test_basic test_basic)
diff --git a/test/main.c b/test/main.c
new file mode 100644
index 000000000..941f60d2c
--- /dev/null
+++ b/test/main.c
@@ -0,0 +1,350 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <inttypes.h>
+
+#include <assert.h>
+#include "test_assert.h"
+
+#include "llmsset.h"
+#include "sylvan.h"
+
+#define BLACK "\33[22;30m"
+#define GRAY "\33[01;30m"
+#define RED "\33[22;31m"
+#define LRED "\33[01;31m"
+#define GREEN "\33[22;32m"
+#define LGREEN "\33[01;32m"
+#define BLUE "\33[22;34m"
+#define LBLUE "\33[01;34m"
+#define BROWN "\33[22;33m"
+#define YELLOW "\33[01;33m"
+#define CYAN "\33[22;36m"
+#define LCYAN "\33[22;36m"
+#define MAGENTA "\33[22;35m"
+#define LMAGENTA "\33[01;35m"
+#define NC "\33[0m"
+#define BOLD "\33[1m"
+#define ULINE "\33[4m" //underline
+#define BLINK "\33[5m"
+#define INVERT "\33[7m"
+
+__thread uint64_t seed = 1;
+
+uint64_t
+xorshift_rand(void)
+{
+    uint64_t x = seed;
+    if (seed == 0) seed = rand();
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    seed = x;
+    return x * 2685821657736338717LL;
+}
+
+double
+uniform_deviate(uint64_t seed)
+{
+    return seed * (1.0 / (0xffffffffffffffffL + 1.0));
+}
+
+int
+rng(int low, int high)
+{
+    return low + uniform_deviate(xorshift_rand()) * (high-low);
+}
+
+static inline BDD
+make_random(int i, int j)
+{
+    if (i == j) return rng(0, 2) ? sylvan_true : sylvan_false;
+
+    BDD yes = make_random(i+1, j);
+    BDD no = make_random(i+1, j);
+    BDD result = sylvan_invalid;
+
+    switch(rng(0, 4)) {
+    case 0:
+        result = no;
+        sylvan_deref(yes);
+        break;
+    case 1:
+        result = yes;
+        sylvan_deref(no);
+        break;
+    case 2:
+        result = sylvan_ref(sylvan_makenode(i, yes, no));
+        sylvan_deref(no);
+        sylvan_deref(yes);
+        break;
+    case 3:
+    default:
+        result = sylvan_ref(sylvan_makenode(i, no, yes));
+        sylvan_deref(no);
+        sylvan_deref(yes);
+        break;
+    }
+
+    return result;
+}
+
+/** GC testing */
+VOID_TASK_2(gctest_fill, int, levels, int, width)
+{
+    if (levels > 1) {
+        int i;
+        for (i=0; i<width; i++) { SPAWN(gctest_fill, levels-1, width); }
+        for (i=0; i<width; i++) { SYNC(gctest_fill); }
+    } else {
+        sylvan_deref(make_random(0, 10));
+    }
+}
+
+void report_table()
+{
+    llmsset_t __sylvan_get_internal_data();
+    llmsset_t tbl = __sylvan_get_internal_data();
+    LACE_ME;
+    size_t filled = llmsset_count_marked(tbl);
+    size_t total = llmsset_get_size(tbl);
+    printf("done, table: %0.1f%% full (%zu nodes).\n", 100.0*(double)filled/total, filled);
+}
+
+int test_gc(int threads)
+{
+    LACE_ME;
+    int N_canaries = 16;
+    BDD canaries[N_canaries];
+    char* hashes[N_canaries];
+    char* hashes2[N_canaries];
+    int i,j;
+    for (i=0;i<N_canaries;i++) {
+        canaries[i] = make_random(0, 10);
+        hashes[i] = (char*)malloc(80);
+        hashes2[i] = (char*)malloc(80);
+        sylvan_getsha(canaries[i], hashes[i]);
+        sylvan_test_isbdd(canaries[i]);
+    }
+    test_assert(sylvan_count_refs() == (size_t)N_canaries);
+    for (j=0;j<10*threads;j++) {
+        CALL(gctest_fill, 6, 5);
+        for (i=0;i<N_canaries;i++) {
+            sylvan_test_isbdd(canaries[i]);
+            sylvan_getsha(canaries[i], hashes2[i]);
+            test_assert(strcmp(hashes[i], hashes2[i]) == 0);
+        }
+    }
+    test_assert(sylvan_count_refs() == (size_t)N_canaries);
+    return 0;
+}
+
+TASK_2(MDD, random_ldd, int, depth, int, count)
+{
+    uint32_t n[depth];
+
+    MDD result = lddmc_false;
+
+    int i, j;
+    for (i=0; i<count; i++) {
+        for (j=0; j<depth; j++) {
+            n[j] = rng(0, 10);
+        }
+        //MDD old = result;
+        result = lddmc_union_cube(result, n, depth);
+        //assert(lddmc_cube(n, depth) != lddmc_true);
+        //assert(result == lddmc_union(old, lddmc_cube(n, depth)));
+        //assert(result != lddmc_true);
+    }
+
+    return result;
+}
+
+VOID_TASK_3(enumer, uint32_t*, values, size_t, count, void*, context)
+{
+    return;
+    (void)values;
+    (void)count;
+    (void)context;
+}
+
+int
+test_lddmc()
+{
+    LACE_ME;
+
+    sylvan_init_package(1LL<<24, 1LL<<24, 1LL<<24, 1LL<<24);
+    sylvan_init_ldd();
+    sylvan_gc_disable();
+
+    MDD a, b, c;
+
+    // Test union, union_cube, member_cube, satcount
+
+    a = lddmc_cube((uint32_t[]){1,2,3,5,4,3}, 6);
+    a = lddmc_union(a,lddmc_cube((uint32_t[]){2,2,3,5,4,3}, 6));
+    c = b = a = lddmc_union_cube(a, (uint32_t[]){2,2,3,5,4,2}, 6);
+    a = lddmc_union_cube(a, (uint32_t[]){2,3,3,5,4,3}, 6);
+    a = lddmc_union(a, lddmc_cube((uint32_t[]){2,3,4,4,4,3}, 6));
+
+    test_assert(lddmc_member_cube(a, (uint32_t[]){2,3,3,5,4,3}, 6));
+    test_assert(lddmc_member_cube(a, (uint32_t[]){1,2,3,5,4,3}, 6));
+    test_assert(lddmc_member_cube(a, (uint32_t[]){2,2,3,5,4,3}, 6));
+    test_assert(lddmc_member_cube(a, (uint32_t[]){2,2,3,5,4,2}, 6));
+
+    test_assert(lddmc_satcount(a) == 5);
+
+    lddmc_sat_all_par(a, TASK(enumer), NULL);
+
+    // Test minus, member_cube, satcount
+
+    a = lddmc_minus(a, b);
+    test_assert(lddmc_member_cube(a, (uint32_t[]){2,3,3,5,4,3}, 6));
+    test_assert(!lddmc_member_cube(a, (uint32_t[]){1,2,3,5,4,3}, 6));
+    test_assert(!lddmc_member_cube(a, (uint32_t[]){2,2,3,5,4,3}, 6));
+    test_assert(!lddmc_member_cube(a, (uint32_t[]){2,2,3,5,4,2}, 6));
+    test_assert(lddmc_member_cube(a, (uint32_t[]){2,3,4,4,4,3}, 6));
+
+    test_assert(lddmc_satcount(a) == 2);
+
+    // Test intersect
+
+    test_assert(lddmc_satcount(lddmc_intersect(a,b)) == 0);
+    test_assert(lddmc_intersect(b,c)==lddmc_intersect(c,b));
+    test_assert(lddmc_intersect(b,c)==c);
+
+    // Test project, project_minus
+    a = lddmc_cube((uint32_t[]){1,2,3,5,4,3}, 6);
+    a = lddmc_union_cube(a, (uint32_t[]){2,2,3,5,4,3}, 6);
+    a = lddmc_union_cube(a, (uint32_t[]){2,2,3,5,4,2}, 6);
+    a = lddmc_union_cube(a, (uint32_t[]){2,3,3,5,4,3}, 6);
+    a = lddmc_union_cube(a, (uint32_t[]){2,3,4,4,4,3}, 6);
+    // a = {<1,2,3,5,4,3>,<2,2,3,5,4,3>,<2,2,3,5,4,2>,<2,3,3,5,4,3>,<2,3,4,4,4,3>}
+    MDD proj = lddmc_cube((uint32_t[]){1,1,-2},3);
+    b = lddmc_cube((uint32_t[]){1,2}, 2);
+    b = lddmc_union_cube(b, (uint32_t[]){2,2}, 2);
+    b = lddmc_union_cube(b, (uint32_t[]){2,3}, 2);
+    test_assert(lddmc_project(a, proj)==b);
+    test_assert(lddmc_project_minus(a, proj, lddmc_false)==b);
+    test_assert(lddmc_project_minus(a, proj, b)==lddmc_false);
+
+    // Test relprod
+
+    a = lddmc_cube((uint32_t[]){1},1);
+    b = lddmc_cube((uint32_t[]){1,2},2);
+    proj = lddmc_cube((uint32_t[]){1,2,-1}, 3);
+    test_assert(lddmc_cube((uint32_t[]){2},1) == lddmc_relprod(a, b, proj));
+    test_assert(lddmc_cube((uint32_t[]){3},1) == lddmc_relprod(a, lddmc_cube((uint32_t[]){1,3},2), proj));
+    a = lddmc_union_cube(a, (uint32_t[]){2},1);
+    test_assert(lddmc_satcount(a) == 2);
+    test_assert(lddmc_cube((uint32_t[]){2},1) == lddmc_relprod(a, b, proj));
+    b = lddmc_union_cube(b, (uint32_t[]){2,2},2);
+    test_assert(lddmc_cube((uint32_t[]){2},1) == lddmc_relprod(a, b, proj));
+    b = lddmc_union_cube(b, (uint32_t[]){2,3},2);
+    test_assert(lddmc_satcount(lddmc_relprod(a, b, proj)) == 2);
+    test_assert(lddmc_union(lddmc_cube((uint32_t[]){2},1),lddmc_cube((uint32_t[]){3},1)) == lddmc_relprod(a, b, proj));
+
+    // Test relprev
+    MDD universe = lddmc_union(lddmc_cube((uint32_t[]){1},1), lddmc_cube((uint32_t[]){2},1));
+    a = lddmc_cube((uint32_t[]){2},1);
+    b = lddmc_cube((uint32_t[]){1,2},2);
+    test_assert(lddmc_cube((uint32_t[]){1},1) == lddmc_relprev(a, b, proj, universe));
+    test_assert(lddmc_cube((uint32_t[]){1},1) == lddmc_relprev(a, b, proj, lddmc_cube((uint32_t[]){1},1)));
+    a = lddmc_cube((uint32_t[]){1},1);
+    MDD next = lddmc_relprod(a, b, proj);
+    test_assert(lddmc_relprev(next, b, proj, a) == a);
+
+    // Random tests
+
+    MDD rnd1, rnd2;
+
+    int i;
+    for (i=0; i<200; i++) {
+        int depth = rng(1, 20);
+        rnd1 = CALL(random_ldd, depth, rng(0, 30));
+        rnd2 = CALL(random_ldd, depth, rng(0, 30));
+        test_assert(rnd1 != lddmc_true);
+        test_assert(rnd2 != lddmc_true);
+        test_assert(lddmc_intersect(rnd1,rnd2) == lddmc_intersect(rnd2,rnd1));
+        test_assert(lddmc_union(rnd1,rnd2) == lddmc_union(rnd2,rnd1));
+        MDD tmp = lddmc_union(lddmc_minus(rnd1, rnd2), lddmc_minus(rnd2, rnd1));
+        test_assert(lddmc_intersect(tmp, lddmc_intersect(rnd1, rnd2)) == lddmc_false);
+        test_assert(lddmc_union(tmp, lddmc_intersect(rnd1, rnd2)) == lddmc_union(rnd1, rnd2));
+        test_assert(lddmc_minus(rnd1,rnd2) == lddmc_minus(rnd1, lddmc_intersect(rnd1,rnd2)));
+    }
+
+    // Test file stuff
+    for (i=0; i<10; i++) {
+        FILE *f = fopen("__lddmc_test_bdd", "w+");
+        int N = 20;
+        MDD rnd[N];
+        size_t a[N];
+        char sha[N][65];
+        int j;
+        for (j=0;j<N;j++) rnd[j] = CALL(random_ldd, 5, 500);
+        for (j=0;j<N;j++) lddmc_getsha(rnd[j], sha[j]);
+        for (j=0;j<N;j++) { a[j] = lddmc_serialize_add(rnd[j]); lddmc_serialize_tofile(f); }
+        for (j=0;j<N;j++) test_assert(a[j] == lddmc_serialize_get(rnd[j]));
+        for (j=0;j<N;j++) test_assert(rnd[j] == lddmc_serialize_get_reversed(a[j]));
+        fseek(f, 0, SEEK_SET);
+        lddmc_serialize_reset();
+
+        sylvan_quit();
+        sylvan_init_package(1LL<<24, 1LL<<24, 1LL<<24, 1LL<<24);
+        sylvan_init_ldd();
+        sylvan_gc_disable();
+
+        for (j=0;j<N;j++) lddmc_serialize_fromfile(f);
+        fclose(f);
+        unlink("__lddmc_test_bdd");
+
+        for (j=0;j<N;j++) rnd[j] = lddmc_serialize_get_reversed(a[j]);
+        char sha2[N][65];
+        for (j=0;j<N;j++) lddmc_getsha(rnd[j], sha2[j]);
+        for (j=0;j<N;j++) test_assert(memcmp(sha[j], sha2[j], 64)==0);
+    
+        lddmc_serialize_reset();
+    }
+
+    sylvan_quit();
+    return 0;
+}
+
+int runtests(int threads)
+{
+    lace_init(threads, 100000);
+    lace_startup(0, NULL, NULL);
+
+    printf(BOLD "Testing LDDMC... ");
+    fflush(stdout);
+    if (test_lddmc()) return 1;
+    printf(LGREEN "success" NC "!\n");
+
+    printf(NC "Testing garbage collection... ");
+    fflush(stdout);
+    sylvan_init_package(1LL<<14, 1LL<<14, 1LL<<20, 1LL<<20);
+    sylvan_init_bdd(1);
+    sylvan_gc_enable();
+    if (test_gc(threads)) return 1;
+    sylvan_quit();
+    printf(LGREEN "success" NC "!\n");
+
+    lace_exit();
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    int threads = 2;
+    if (argc > 1) sscanf(argv[1], "%d", &threads);
+
+    if (runtests(threads)) exit(1);
+    printf(NC);
+    exit(0);
+}
diff --git a/test/test_assert.h b/test/test_assert.h
new file mode 100644
index 000000000..8cd18501a
--- /dev/null
+++ b/test/test_assert.h
@@ -0,0 +1,13 @@
+#ifndef test_assert
+#define test_assert(expr)         do {                                  \
+ if (!(expr))                                                           \
+ {                                                                      \
+         fprintf(stderr,                                                \
+                "file %s: line %d (%s): precondition `%s' failed.\n",   \
+                __FILE__,                                               \
+                __LINE__,                                               \
+                __PRETTY_FUNCTION__,                                    \
+                #expr);                                                 \
+         return 1;                                                      \
+ } } while(0)
+#endif
diff --git a/test/test_basic.c b/test/test_basic.c
new file mode 100644
index 000000000..f6cffcc15
--- /dev/null
+++ b/test/test_basic.c
@@ -0,0 +1,331 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <inttypes.h>
+
+#include "llmsset.h"
+#include "sylvan.h"
+#include "test_assert.h"
+
+__thread uint64_t seed = 1;
+
+uint64_t
+xorshift_rand(void)
+{
+    uint64_t x = seed;
+    if (seed == 0) seed = rand();
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    seed = x;
+    return x * 2685821657736338717LL;
+}
+
+double
+uniform_deviate(uint64_t seed)
+{
+    return seed * (1.0 / (0xffffffffffffffffL + 1.0));
+}
+
+int
+rng(int low, int high)
+{
+    return low + uniform_deviate(xorshift_rand()) * (high-low);
+}
+
+static inline BDD
+make_random(int i, int j)
+{
+    if (i == j) return rng(0, 2) ? sylvan_true : sylvan_false;
+
+    BDD yes = make_random(i+1, j);
+    BDD no = make_random(i+1, j);
+    BDD result = sylvan_invalid;
+
+    switch(rng(0, 4)) {
+    case 0:
+        result = no;
+        sylvan_deref(yes);
+        break;
+    case 1:
+        result = yes;
+        sylvan_deref(no);
+        break;
+    case 2:
+        result = sylvan_ref(sylvan_makenode(i, yes, no));
+        sylvan_deref(no);
+        sylvan_deref(yes);
+        break;
+    case 3:
+    default:
+        result = sylvan_ref(sylvan_makenode(i, no, yes));
+        sylvan_deref(no);
+        sylvan_deref(yes);
+        break;
+    }
+
+    return result;
+}
+
+int testEqual(BDD a, BDD b)
+{
+	if (a == b) return 1;
+
+	if (a == sylvan_invalid) {
+		fprintf(stderr, "a is invalid!\n");
+		return 0;
+	}
+
+	if (b == sylvan_invalid) {
+		fprintf(stderr, "b is invalid!\n");
+		return 0;
+	}
+
+    fprintf(stderr, "a and b are not equal!\n");
+
+    sylvan_fprint(stderr, a);fprintf(stderr, "\n");
+    sylvan_fprint(stderr, b);fprintf(stderr, "\n");
+
+	return 0;
+}
+
+int
+test_bdd()
+{
+    test_assert(sylvan_makenode(sylvan_ithvar(1), sylvan_true, sylvan_true) == sylvan_not(sylvan_makenode(sylvan_ithvar(1), sylvan_false, sylvan_false)));
+    test_assert(sylvan_makenode(sylvan_ithvar(1), sylvan_false, sylvan_true) == sylvan_not(sylvan_makenode(sylvan_ithvar(1), sylvan_true, sylvan_false)));
+    test_assert(sylvan_makenode(sylvan_ithvar(1), sylvan_true, sylvan_false) == sylvan_not(sylvan_makenode(sylvan_ithvar(1), sylvan_false, sylvan_true)));
+    test_assert(sylvan_makenode(sylvan_ithvar(1), sylvan_false, sylvan_false) == sylvan_not(sylvan_makenode(sylvan_ithvar(1), sylvan_true, sylvan_true)));
+
+    return 0;
+}
+
+int
+test_cube()
+{
+    LACE_ME;
+    BDDSET vars = sylvan_set_fromarray(((BDDVAR[]){1,2,3,4,6,8}), 6);
+
+    uint8_t cube[6], check[6];
+    int i, j;
+    for (i=0;i<6;i++) cube[i] = rng(0,3);
+    BDD bdd = sylvan_cube(vars, cube);
+
+    sylvan_sat_one(bdd, vars, check);
+    for (i=0; i<6;i++) test_assert(cube[i] == check[i] || (cube[i] == 2 && check[i] == 0));
+
+    BDD picked = sylvan_pick_cube(bdd);
+    test_assert(testEqual(sylvan_and(picked, bdd), picked));
+
+    BDD t1 = sylvan_cube(vars, ((uint8_t[]){1,1,2,2,0,0}));
+    BDD t2 = sylvan_cube(vars, ((uint8_t[]){1,1,1,0,0,2}));
+    test_assert(testEqual(sylvan_union_cube(t1, vars, ((uint8_t[]){1,1,1,0,0,2})), sylvan_or(t1, t2)));
+    t2 = sylvan_cube(vars, ((uint8_t[]){2,2,2,1,1,0}));
+    test_assert(testEqual(sylvan_union_cube(t1, vars, ((uint8_t[]){2,2,2,1,1,0})), sylvan_or(t1, t2)));
+    t2 = sylvan_cube(vars, ((uint8_t[]){1,1,1,0,0,0}));
+    test_assert(testEqual(sylvan_union_cube(t1, vars, ((uint8_t[]){1,1,1,0,0,0})), sylvan_or(t1, t2)));
+
+    bdd = make_random(1, 16);
+    for (j=0;j<10;j++) {
+        for (i=0;i<6;i++) cube[i] = rng(0,3);
+        BDD c = sylvan_cube(vars, cube);
+        test_assert(sylvan_union_cube(bdd, vars, cube) == sylvan_or(bdd, c));
+    }
+
+    for (i=0;i<10;i++) {
+        picked = sylvan_pick_cube(bdd);
+        test_assert(testEqual(sylvan_and(picked, bdd), picked));
+    }
+    return 0;
+}
+
+static int
+test_operators()
+{
+    // We need to test: xor, and, or, nand, nor, imp, biimp, invimp, diff, less
+    LACE_ME;
+
+    //int i;
+    BDD a = sylvan_ithvar(1);
+    BDD b = sylvan_ithvar(2);
+    BDD one = make_random(1, 12);
+    BDD two = make_random(6, 24);
+
+    // Test or
+    test_assert(testEqual(sylvan_or(a, b), sylvan_makenode(1, b, sylvan_true)));
+    test_assert(testEqual(sylvan_or(a, b), sylvan_or(b, a)));
+    test_assert(testEqual(sylvan_or(one, two), sylvan_or(two, one)));
+
+    // Test and
+    test_assert(testEqual(sylvan_and(a, b), sylvan_makenode(1, sylvan_false, b)));
+    test_assert(testEqual(sylvan_and(a, b), sylvan_and(b, a)));
+    test_assert(testEqual(sylvan_and(one, two), sylvan_and(two, one)));
+
+    // Test xor
+    test_assert(testEqual(sylvan_xor(a, b), sylvan_makenode(1, b, sylvan_not(b))));
+    test_assert(testEqual(sylvan_xor(a, b), sylvan_xor(a, b)));
+    test_assert(testEqual(sylvan_xor(a, b), sylvan_xor(b, a)));
+    test_assert(testEqual(sylvan_xor(one, two), sylvan_xor(two, one)));
+    test_assert(testEqual(sylvan_xor(a, b), sylvan_ite(a, sylvan_not(b), b)));
+
+    // Test diff
+    test_assert(testEqual(sylvan_diff(a, b), sylvan_diff(a, b)));
+    test_assert(testEqual(sylvan_diff(a, b), sylvan_diff(a, sylvan_and(a, b))));
+    test_assert(testEqual(sylvan_diff(a, b), sylvan_and(a, sylvan_not(b))));
+    test_assert(testEqual(sylvan_diff(a, b), sylvan_ite(b, sylvan_false, a)));
+    test_assert(testEqual(sylvan_diff(one, two), sylvan_diff(one, two)));
+    test_assert(testEqual(sylvan_diff(one, two), sylvan_diff(one, sylvan_and(one, two))));
+    test_assert(testEqual(sylvan_diff(one, two), sylvan_and(one, sylvan_not(two))));
+    test_assert(testEqual(sylvan_diff(one, two), sylvan_ite(two, sylvan_false, one)));
+
+    // Test biimp
+    test_assert(testEqual(sylvan_biimp(a, b), sylvan_makenode(1, sylvan_not(b), b)));
+    test_assert(testEqual(sylvan_biimp(a, b), sylvan_biimp(b, a)));
+    test_assert(testEqual(sylvan_biimp(one, two), sylvan_biimp(two, one)));
+
+    // Test nand / and
+    test_assert(testEqual(sylvan_not(sylvan_and(a, b)), sylvan_nand(b, a)));
+    test_assert(testEqual(sylvan_not(sylvan_and(one, two)), sylvan_nand(two, one)));
+
+    // Test nor / or
+    test_assert(testEqual(sylvan_not(sylvan_or(a, b)), sylvan_nor(b, a)));
+    test_assert(testEqual(sylvan_not(sylvan_or(one, two)), sylvan_nor(two, one)));
+
+    // Test xor / biimp
+    test_assert(testEqual(sylvan_xor(a, b), sylvan_not(sylvan_biimp(b, a))));
+    test_assert(testEqual(sylvan_xor(one, two), sylvan_not(sylvan_biimp(two, one))));
+
+    // Test imp
+    test_assert(testEqual(sylvan_imp(a, b), sylvan_ite(a, b, sylvan_true)));
+    test_assert(testEqual(sylvan_imp(one, two), sylvan_ite(one, two, sylvan_true)));
+    test_assert(testEqual(sylvan_imp(one, two), sylvan_not(sylvan_diff(one, two))));
+    test_assert(testEqual(sylvan_invimp(one, two), sylvan_not(sylvan_less(one, two))));
+    test_assert(testEqual(sylvan_imp(a, b), sylvan_invimp(b, a)));
+    test_assert(testEqual(sylvan_imp(one, two), sylvan_invimp(two, one)));
+
+    return 0;
+}
+
+int
+test_relprod()
+{
+    LACE_ME;
+
+    BDDVAR vars[] = {0,2,4};
+    BDDVAR all_vars[] = {0,1,2,3,4,5};
+
+    BDDSET vars_set = sylvan_set_fromarray(vars, 3);
+    BDDSET all_vars_set = sylvan_set_fromarray(all_vars, 6);
+
+    BDD s, t, next, prev;
+    BDD zeroes, ones;
+
+    // transition relation: 000 --> 111 and !000 --> 000
+    t = sylvan_false;
+    t = sylvan_union_cube(t, all_vars_set, ((uint8_t[]){0,1,0,1,0,1}));
+    t = sylvan_union_cube(t, all_vars_set, ((uint8_t[]){1,0,2,0,2,0}));
+    t = sylvan_union_cube(t, all_vars_set, ((uint8_t[]){2,0,1,0,2,0}));
+    t = sylvan_union_cube(t, all_vars_set, ((uint8_t[]){2,0,2,0,1,0}));
+
+    s = sylvan_cube(vars_set, (uint8_t[]){0,0,1});
+    zeroes = sylvan_cube(vars_set, (uint8_t[]){0,0,0});
+    ones = sylvan_cube(vars_set, (uint8_t[]){1,1,1});
+
+    next = sylvan_relnext(s, t, all_vars_set);
+    prev = sylvan_relprev(t, next, all_vars_set);
+    test_assert(next == zeroes);
+    test_assert(prev == sylvan_not(zeroes));
+
+    next = sylvan_relnext(next, t, all_vars_set);
+    prev = sylvan_relprev(t, next, all_vars_set);
+    test_assert(next == ones);
+    test_assert(prev == zeroes);
+
+    t = sylvan_cube(all_vars_set, (uint8_t[]){0,0,0,0,0,1});
+    test_assert(sylvan_relprev(t, s, all_vars_set) == zeroes);
+    test_assert(sylvan_relprev(t, sylvan_not(s), all_vars_set) == sylvan_false);
+    test_assert(sylvan_relnext(s, t, all_vars_set) == sylvan_false);
+    test_assert(sylvan_relnext(zeroes, t, all_vars_set) == s);
+
+    t = sylvan_cube(all_vars_set, (uint8_t[]){0,0,0,0,0,2});
+    test_assert(sylvan_relprev(t, s, all_vars_set) == zeroes);
+    test_assert(sylvan_relprev(t, zeroes, all_vars_set) == zeroes);
+    test_assert(sylvan_relnext(sylvan_not(zeroes), t, all_vars_set) == sylvan_false);
+
+    return 0;
+}
+
+int
+test_compose()
+{
+    LACE_ME;
+
+    BDD a = sylvan_ithvar(1);
+    BDD b = sylvan_ithvar(2);
+
+    BDD a_or_b = sylvan_or(a, b);
+
+    BDD one = make_random(3, 16);
+    BDD two = make_random(8, 24);
+
+    BDDMAP map = sylvan_map_empty();
+
+    map = sylvan_map_add(map, 1, one);
+    map = sylvan_map_add(map, 2, two);
+
+    test_assert(sylvan_map_key(map) == 1);
+    test_assert(sylvan_map_value(map) == one);
+    test_assert(sylvan_map_key(sylvan_map_next(map)) == 2);
+    test_assert(sylvan_map_value(sylvan_map_next(map)) == two);
+
+    test_assert(testEqual(one, sylvan_compose(a, map)));
+    test_assert(testEqual(two, sylvan_compose(b, map)));
+
+    test_assert(testEqual(sylvan_or(one, two), sylvan_compose(a_or_b, map)));
+
+    map = sylvan_map_add(map, 2, one);
+    test_assert(testEqual(sylvan_compose(a_or_b, map), one));
+
+    map = sylvan_map_add(map, 1, two);
+    test_assert(testEqual(sylvan_or(one, two), sylvan_compose(a_or_b, map)));
+
+    test_assert(testEqual(sylvan_and(one, two), sylvan_compose(sylvan_and(a, b), map)));
+    return 0;
+}
+
+int runtests()
+{
+    // we are not testing garbage collection
+    sylvan_gc_disable();
+
+    if (test_bdd()) return 1;
+    for (int j=0;j<10;j++) if (test_cube()) return 1;
+    for (int j=0;j<10;j++) if (test_relprod()) return 1;
+    for (int j=0;j<10;j++) if (test_compose()) return 1;
+    for (int j=0;j<10;j++) if (test_operators()) return 1;
+    return 0;
+}
+
+int main()
+{
+    // Standard Lace initialization with 1 worker
+	lace_init(1, 0);
+	lace_startup(0, NULL, NULL);
+
+    // Simple Sylvan initialization, also initialize BDD support
+	sylvan_init_package(1LL<<20, 1LL<<20, 1LL<<16, 1LL<<16);
+	sylvan_init_bdd(1);
+
+    int res = runtests();
+
+    sylvan_quit();
+    lace_exit();
+
+    return res;
+}
diff --git a/test/test_cxx.cpp b/test/test_cxx.cpp
new file mode 100644
index 000000000..c1d984b3f
--- /dev/null
+++ b/test/test_cxx.cpp
@@ -0,0 +1,51 @@
+/**
+ * Just a small test file to ensure that Sylvan can compile in C++
+ */
+
+#include <assert.h>
+#include <sylvan.h>
+#include <sylvan_obj.hpp>
+
+#include "test_assert.h"
+
+using namespace sylvan;
+
+int runtest()
+{
+    Bdd one = Bdd::bddOne();
+    Bdd zero = Bdd::bddZero();
+
+    test_assert(one != zero);
+    test_assert(one == !zero);
+
+    Bdd v1 = Bdd::bddVar(1);
+    Bdd v2 = Bdd::bddVar(2);
+
+    Bdd t = v1 + v2;
+
+    BddMap map;
+    map.put(2, t);
+
+    test_assert(v2.Compose(map) == (v1 + v2));
+    test_assert((t * v2) == v2);
+
+    return 0;
+}
+
+int main()
+{
+    // Standard Lace initialization with 1 worker
+	lace_init(1, 0);
+	lace_startup(0, NULL, NULL);
+
+    // Simple Sylvan initialization, also initialize BDD support
+	sylvan_init_package(1LL<<16, 1LL<<16, 1LL<<16, 1LL<<16);
+	sylvan_init_bdd(1);
+
+    int res = runtest();
+
+    sylvan_quit();
+    lace_exit();
+
+    return res;
+}