initial commit

2025-03-09 15:40:10 +00:00 · 2019-09-07 14:03:22 +04:00 · 2019-09-07 14:03:22 +04:00 · c2da007f40
commit c2da007f40
1610 changed files with 398047 additions and 0 deletions
--- a/tdactor/benchmark/CMakeLists.txt
+++ b/tdactor/benchmark/CMakeLists.txt
@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.0.2 FATAL_ERROR)
+
+set(BENCHMARK_SOURCE
+  benchmark.cpp
+  third_party/mp-queue.c
+
+  third_party/FAAArrayQueue.h
+  third_party/LCRQueue.h
+  third_party/LazyIndexArrayQueue.h
+  third_party/MoodyCamelQueue.h
+)
+add_executable(benchmark ${BENCHMARK_SOURCE})
+target_include_directories(benchmark PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
+target_link_libraries(benchmark PRIVATE tdactor)
+
+if (MSVC)
+  set_property(SOURCE benchmark.cpp APPEND_STRING PROPERTY COMPILE_FLAGS " /wd4457 /wd4316")
+endif()
+
--- a/tdactor/benchmark/benchmark.cpp
+++ b/tdactor/benchmark/benchmark.cpp
--- a/tdactor/benchmark/third_party/FAAArrayQueue.h
+++ b/tdactor/benchmark/third_party/FAAArrayQueue.h
@ -0,0 +1,371 @@
+/******************************************************************************
+ * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Concurrency Freaks nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************
+ */
+
+#ifndef _FAA_ARRAY_QUEUE_HP_H_
+#define _FAA_ARRAY_QUEUE_HP_H_
+
+#include "HazardPointers.h"
+
+#include <atomic>
+#include <stdexcept>
+
+namespace ConcurrencyFreaks {
+/**
+ * <h1> Fetch-And-Add Array Queue </h1>
+ *
+ * Each node has one array but we don't search for a vacant entry. Instead, we
+ * use FAA to obtain an index in the array, for enqueueing or dequeuing.
+ *
+ * There are some similarities between this queue and the basic queue in YMC:
+ * http://chaoran.me/assets/pdf/wfq-ppopp16.pdf
+ * but it's not the same because the queue in listing 1 is obstruction-free, while
+ * our algorithm is lock-free.
+ * In FAAArrayQueue eventually a new node will be inserted (using Michael-Scott's
+ * algorithm) and it will have an item pre-filled in the first position, which means
+ * that at most, after BUFFER_SIZE steps, one item will be enqueued (and it can then
+ * be dequeued). This kind of progress is lock-free.
+ *
+ * Each entry in the array may contain one of three possible values:
+ * - A valid item that has been enqueued;
+ * - nullptr, which means no item has yet been enqueued in that position;
+ * - taken, a special value that means there was an item but it has been dequeued;
+ *
+ * Enqueue algorithm: FAA + CAS(null,item)
+ * Dequeue algorithm: FAA + CAS(item,taken)
+ * Consistency: Linearizable
+ * enqueue() progress: lock-free
+ * dequeue() progress: lock-free
+ * Memory Reclamation: Hazard Pointers (lock-free)
+ * Uncontended enqueue: 1 FAA + 1 CAS + 1 HP
+ * Uncontended dequeue: 1 FAA + 1 CAS + 1 HP
+ *
+ *
+ * <p>
+ * Lock-Free Linked List as described in Maged Michael and Michael Scott's paper:
+ * {@link http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf}
+ * <a href="http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf">
+ * Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms</a>
+ * <p>
+ * The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory
+ * Reclamation for Lock-Free objects" and it is available here:
+ * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf
+ *
+ * @author Pedro Ramalhete
+ * @author Andreia Correia
+ */
+template <typename T>
+class FAAArrayQueue {
+  static const long BUFFER_SIZE = 1024;  // 1024
+
+ private:
+  struct Node {
+    std::atomic<int> deqidx;
+    std::atomic<T*> items[BUFFER_SIZE];
+    std::atomic<int> enqidx;
+    std::atomic<Node*> next;
+
+    // Start with the first entry pre-filled and enqidx at 1
+    Node(T* item) : deqidx{0}, enqidx{1}, next{nullptr} {
+      items[0].store(item, std::memory_order_relaxed);
+      for (long i = 1; i < BUFFER_SIZE; i++) {
+        items[i].store(nullptr, std::memory_order_relaxed);
+      }
+    }
+
+    bool casNext(Node* cmp, Node* val) {
+      return next.compare_exchange_strong(cmp, val);
+    }
+  };
+
+  bool casTail(Node* cmp, Node* val) {
+    return tail.compare_exchange_strong(cmp, val);
+  }
+
+  bool casHead(Node* cmp, Node* val) {
+    return head.compare_exchange_strong(cmp, val);
+  }
+
+  // Pointers to head and tail of the list
+  alignas(128) std::atomic<Node*> head;
+  alignas(128) std::atomic<Node*> tail;
+
+  static const int MAX_THREADS = 128;
+  const int maxThreads;
+
+  T* taken = (T*)new int();  // Muuuahahah !
+
+  // We need just one hazard pointer
+  HazardPointers<Node> hp{1, maxThreads};
+  const int kHpTail = 0;
+  const int kHpHead = 0;
+
+ public:
+  FAAArrayQueue(int maxThreads = MAX_THREADS) : maxThreads{maxThreads} {
+    Node* sentinelNode = new Node(nullptr);
+    sentinelNode->enqidx.store(0, std::memory_order_relaxed);
+    head.store(sentinelNode, std::memory_order_relaxed);
+    tail.store(sentinelNode, std::memory_order_relaxed);
+  }
+
+  ~FAAArrayQueue() {
+    while (dequeue(0) != nullptr)
+      ;                  // Drain the queue
+    delete head.load();  // Delete the last node
+    delete (int*)taken;
+  }
+
+  std::string className() {
+    return "FAAArrayQueue";
+  }
+
+  void enqueue(T* item, const int tid) {
+    while (true) {
+      Node* ltail = hp.protect(kHpTail, tail, tid);
+      const int idx = ltail->enqidx.fetch_add(1);
+      if (idx > BUFFER_SIZE - 1) {  // This node is full
+        if (ltail != tail.load())
+          continue;
+        Node* lnext = ltail->next.load();
+        if (lnext == nullptr) {
+          Node* newNode = new Node(item);
+          if (ltail->casNext(nullptr, newNode)) {
+            casTail(ltail, newNode);
+            hp.clear(tid);
+            return;
+          }
+          delete newNode;
+        } else {
+          casTail(ltail, lnext);
+        }
+        continue;
+      }
+      T* itemnull = nullptr;
+      if (ltail->items[idx].compare_exchange_strong(itemnull, item)) {
+        hp.clear(tid);
+        return;
+      }
+    }
+  }
+
+  T* dequeue(const int tid) {
+    while (true) {
+      Node* lhead = hp.protect(kHpHead, head, tid);
+      if (lhead->deqidx.load() >= lhead->enqidx.load() && lhead->next.load() == nullptr)
+        break;
+      const int idx = lhead->deqidx.fetch_add(1);
+      if (idx > BUFFER_SIZE - 1) {  // This node has been drained, check if there is another one
+        Node* lnext = lhead->next.load();
+        if (lnext == nullptr)
+          break;  // No more nodes in the queue
+        if (casHead(lhead, lnext))
+          hp.retire(lhead, tid);
+        continue;
+      }
+      T* item = lhead->items[idx].exchange(taken);
+      if (item == nullptr)
+        continue;
+      hp.clear(tid);
+      return item;
+    }
+    hp.clear(tid);
+    return nullptr;
+  }
+};
+/**
+ * <h1> Lazy Index Array Queue </h1>
+ *
+ * Same as Linear Array Queue but with lazy indexes for both enqueuers and dequeuers.
+ *
+ * This is a lock-free queue where each node contains an array of items.
+ * Each entry in the array may contain on of three possible values:
+ * - A valid item that has been enqueued;
+ * - nullptr, which means no item has yet been enqueued in that position;
+ * - taken, a special value that means there was an item but it has been dequeued;
+ * The enqueue() searches for the first nullptr entry in the array and tries
+ * to CAS from nullptr to its item.
+ * The dequeue() searches for the first valid item in the array and tries to
+ * CAS from item to "taken".
+ *
+ * Enqueue algorithm: Linear array search starting at lazy index with CAS(nullptr,item)
+ * Dequeue algorithm: Linear array search starting at lazy index with CAS(item,taken)
+ * Consistency: Linearizable
+ * enqueue() progress: lock-free
+ * dequeue() progress: lock-free
+ * Memory Reclamation: Hazard Pointers (lock-free)
+ * Uncontended enqueue: 1 CAS + 1 HP
+ * Uncontended dequeue: 1 CAS + 1 HP
+ *
+ *
+ * <p>
+ * Lock-Free Linked List as described in Maged Michael and Michael Scott's paper:
+ * {@link http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf}
+ * <a href="http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf">
+ * Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms</a>
+ * <p>
+ * The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory
+ * Reclamation for Lock-Free objects" and it is available here:
+ * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf
+ *
+ * @author Pedro Ramalhete
+ * @author Andreia Correia
+ */
+template <typename T>
+class LazyIndexArrayQueue {
+  static const long BUFFER_SIZE = 1024;
+
+ private:
+  struct Node {
+    std::atomic<int> deqidx;
+    std::atomic<T*> items[BUFFER_SIZE];
+    std::atomic<int> enqidx;
+    std::atomic<Node*> next;
+
+    Node(T* item) : deqidx{0}, enqidx{0}, next{nullptr} {
+      items[0].store(item, std::memory_order_relaxed);
+      for (int i = 1; i < BUFFER_SIZE; i++) {
+        items[i].store(nullptr, std::memory_order_relaxed);
+      }
+    }
+
+    bool casNext(Node* cmp, Node* val) {
+      return next.compare_exchange_strong(cmp, val);
+    }
+  };
+
+  bool casTail(Node* cmp, Node* val) {
+    return tail.compare_exchange_strong(cmp, val);
+  }
+
+  bool casHead(Node* cmp, Node* val) {
+    return head.compare_exchange_strong(cmp, val);
+  }
+
+  // Pointers to head and tail of the list
+  alignas(128) std::atomic<Node*> head;
+  alignas(128) std::atomic<Node*> tail;
+
+  static const int MAX_THREADS = 128;
+  const int maxThreads;
+
+  T* taken = (T*)new int();  // Muuuahahah !
+
+  // We need just one hazard pointer
+  HazardPointers<Node> hp{1, maxThreads};
+  const int kHpTail = 0;
+  const int kHpHead = 0;
+
+ public:
+  LazyIndexArrayQueue(int maxThreads = MAX_THREADS) : maxThreads{maxThreads} {
+    Node* sentinelNode = new Node(nullptr);
+    head.store(sentinelNode, std::memory_order_relaxed);
+    tail.store(sentinelNode, std::memory_order_relaxed);
+  }
+
+  ~LazyIndexArrayQueue() {
+    while (dequeue(0) != nullptr)
+      ;                  // Drain the queue
+    delete head.load();  // Delete the last node
+    delete (int*)taken;
+  }
+
+  std::string className() {
+    return "LazyIndexArrayQueue";
+  }
+
+  void enqueue(T* item, const int tid) {
+    while (true) {
+      Node* ltail = hp.protect(kHpTail, tail, tid);
+      if (ltail->items[BUFFER_SIZE - 1].load() != nullptr) {  // This node is full
+        if (ltail != tail.load())
+          continue;
+        Node* lnext = ltail->next.load();
+        if (lnext == nullptr) {
+          Node* newNode = new Node(item);
+          if (ltail->casNext(nullptr, newNode)) {
+            casTail(ltail, newNode);
+            hp.clear(tid);
+            return;
+          }
+          delete newNode;
+        } else {
+          casTail(ltail, lnext);
+        }
+        continue;
+      }
+      // Find the first null entry in items[] and try to CAS from null to item
+      for (int i = ltail->enqidx.load(); i < BUFFER_SIZE; i++) {
+        if (ltail->items[i].load() != nullptr)
+          continue;
+        T* itemnull = nullptr;
+        if (ltail->items[i].compare_exchange_strong(itemnull, item)) {
+          ltail->enqidx.store(i + 1, std::memory_order_release);
+          hp.clear(tid);
+          return;
+        }
+        if (ltail != tail.load())
+          break;
+      }
+    }
+  }
+
+  T* dequeue(const int tid) {
+    while (true) {
+      Node* lhead = hp.protect(kHpHead, head, tid);
+      if (lhead->items[BUFFER_SIZE - 1].load() == taken) {  // This node has been drained, check if there is another one
+        Node* lnext = lhead->next.load();
+        if (lnext == nullptr) {  // No more nodes in the queue
+          hp.clear(tid);
+          return nullptr;
+        }
+        if (casHead(lhead, lnext))
+          hp.retire(lhead, tid);
+        continue;
+      }
+      // Find the first non taken entry in items[] and try to CAS from item to taken
+      for (int i = lhead->deqidx.load(); i < BUFFER_SIZE; i++) {
+        T* item = lhead->items[i].load();
+        if (item == nullptr) {
+          hp.clear(tid);
+          return nullptr;  // This node is empty
+        }
+        if (item == taken)
+          continue;
+        if (lhead->items[i].compare_exchange_strong(item, taken)) {
+          lhead->deqidx.store(i + 1, std::memory_order_release);
+          hp.clear(tid);
+          return item;
+        }
+        if (lhead != head.load())
+          break;
+      }
+    }
+  }
+};
+}  // namespace ConcurrencyFreaks
+
+#endif /* _FAA_ARRAY_QUEUE_HP_H_ */
--- a/tdactor/benchmark/third_party/HazardPointers.h
+++ b/tdactor/benchmark/third_party/HazardPointers.h
@ -0,0 +1,152 @@
+/******************************************************************************
+ * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Concurrency Freaks nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************
+ */
+
+#ifndef _HAZARD_POINTERS_H_
+#define _HAZARD_POINTERS_H_
+
+#include <atomic>
+#include <vector>
+#include <iostream>
+
+namespace ConcurrencyFreaks {
+template <typename T>
+class HazardPointers {
+ private:
+  static const int HP_MAX_THREADS = 128;
+  static const int HP_MAX_HPS = 4;  // This is named 'K' in the HP paper
+  static const int CLPAD = 128 / sizeof(std::atomic<T*>);
+  static const int HP_THRESHOLD_R = 0;                         // This is named 'R' in the HP paper
+  static const int MAX_RETIRED = HP_MAX_THREADS * HP_MAX_HPS;  // Maximum number of retired objects per thread
+
+  const int maxHPs;
+  const int maxThreads;
+
+  std::atomic<T*>* hp[HP_MAX_THREADS];
+  // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing
+  std::vector<T*> retiredList[HP_MAX_THREADS * CLPAD];
+
+ public:
+  HazardPointers(int maxHPs = HP_MAX_HPS, int maxThreads = HP_MAX_THREADS) : maxHPs{maxHPs}, maxThreads{maxThreads} {
+    for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) {
+      hp[ithread] =
+          new std::atomic<T*>[CLPAD *
+                              2];  // We allocate four cache lines to allow for many hps and without false sharing
+      for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) {
+        hp[ithread][ihp].store(nullptr, std::memory_order_relaxed);
+      }
+    }
+  }
+
+  ~HazardPointers() {
+    for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) {
+      delete[] hp[ithread];
+      // Clear the current retired nodes
+      for (unsigned iret = 0; iret < retiredList[ithread * CLPAD].size(); iret++) {
+        delete retiredList[ithread * CLPAD][iret];
+      }
+    }
+  }
+
+  /**
+     * Progress Condition: wait-free bounded (by maxHPs)
+     */
+  void clear(const int tid) {
+    for (int ihp = 0; ihp < maxHPs; ihp++) {
+      hp[tid][ihp].store(nullptr, std::memory_order_release);
+    }
+  }
+
+  /**
+     * Progress Condition: wait-free population oblivious
+     */
+  void clearOne(int ihp, const int tid) {
+    hp[tid][ihp].store(nullptr, std::memory_order_release);
+  }
+
+  /**
+     * Progress Condition: lock-free
+     */
+  T* protect(int index, const std::atomic<T*>& atom, const int tid) {
+    T* n = nullptr;
+    T* ret;
+    while ((ret = atom.load()) != n) {
+      hp[tid][index].store(ret);
+      n = ret;
+    }
+    return ret;
+  }
+
+  /**
+     * This returns the same value that is passed as ptr, which is sometimes useful
+     * Progress Condition: wait-free population oblivious
+     */
+  T* protectPtr(int index, T* ptr, const int tid) {
+    hp[tid][index].store(ptr);
+    return ptr;
+  }
+
+  /**
+     * This returns the same value that is passed as ptr, which is sometimes useful
+     * Progress Condition: wait-free population oblivious
+     */
+  T* protectRelease(int index, T* ptr, const int tid) {
+    hp[tid][index].store(ptr, std::memory_order_release);
+    return ptr;
+  }
+
+  /**
+     * Progress Condition: wait-free bounded (by the number of threads squared)
+     */
+  void retire(T* ptr, const int tid) {
+    retiredList[tid * CLPAD].push_back(ptr);
+    if (retiredList[tid * CLPAD].size() < HP_THRESHOLD_R)
+      return;
+    for (unsigned iret = 0; iret < retiredList[tid * CLPAD].size();) {
+      auto obj = retiredList[tid * CLPAD][iret];
+      bool canDelete = true;
+      for (int tid = 0; tid < maxThreads && canDelete; tid++) {
+        for (int ihp = maxHPs - 1; ihp >= 0; ihp--) {
+          if (hp[tid][ihp].load() == obj) {
+            canDelete = false;
+            break;
+          }
+        }
+      }
+      if (canDelete) {
+        retiredList[tid * CLPAD].erase(retiredList[tid * CLPAD].begin() + iret);
+        delete obj;
+        continue;
+      }
+      iret++;
+    }
+  }
+};
+}  // namespace ConcurrencyFreaks
+
+#endif /* _HAZARD_POINTERS_H_ */
--- a/tdactor/benchmark/third_party/LCRQueue.h
+++ b/tdactor/benchmark/third_party/LCRQueue.h
@ -0,0 +1,313 @@
+/******************************************************************************
+ * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Concurrency Freaks nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************
+ */
+
+#ifndef _LCRQ_QUEUE_HP_H_
+#define _LCRQ_QUEUE_HP_H_
+
+#include <atomic>
+
+// CAS2 macro
+
+#define __CAS2(ptr, o1, o2, n1, n2)                                     \
+  ({                                                                    \
+    char __ret;                                                         \
+    __typeof__(o2) __junk;                                              \
+    __typeof__(*(ptr)) __old1 = (o1);                                   \
+    __typeof__(o2) __old2 = (o2);                                       \
+    __typeof__(*(ptr)) __new1 = (n1);                                   \
+    __typeof__(o2) __new2 = (n2);                                       \
+    asm volatile("lock cmpxchg16b %2;setz %1"                           \
+                 : "=d"(__junk), "=a"(__ret), "+m"(*ptr)                \
+                 : "b"(__new1), "c"(__new2), "a"(__old1), "d"(__old2)); \
+    __ret;                                                              \
+  })
+
+#define CAS2(ptr, o1, o2, n1, n2) __CAS2(ptr, o1, o2, n1, n2)
+
+#define BIT_TEST_AND_SET(ptr, b)                                                    \
+  ({                                                                                \
+    char __ret;                                                                     \
+    asm volatile("lock btsq $63, %0; setnc %1" : "+m"(*ptr), "=a"(__ret) : : "cc"); \
+    __ret;                                                                          \
+  })
+
+/**
+ * <h1> LCRQ Queue </h1>
+ *
+ * This is LCRQ by Adam Morrison and Yehuda Afek
+ * http://www.cs.tau.ac.il/~mad/publications/ppopp2013-x86queues.pdf
+ *
+ * This implementation does NOT obey the C++ memory model rules AND it is x86 specific.
+ * No guarantees are given on the correctness or consistency of the results if you use this queue.
+ *
+ * Bugs fixed:
+ * tt was not initialized in dequeue();
+ *
+ * <p>
+ * enqueue algorithm: MS enqueue + LCRQ with re-usage
+ * dequeue algorithm: MS dequeue + LCRQ with re-usage
+ * Consistency: Linearizable
+ * enqueue() progress: lock-free
+ * dequeue() progress: lock-free
+ * Memory Reclamation: Hazard Pointers (lock-free)
+ *
+ * <p>
+ * The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory
+ * Reclamation for Lock-Free objects" and it is available here:
+ * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf
+ *
+ * @author Pedro Ramalhete
+ * @author Andreia Correia
+ */
+namespace ConcurrencyFreaks {
+template <typename T>
+class LCRQueue {
+ private:
+  static const int RING_POW = 10;
+  static const uint64_t RING_SIZE = 1ull << RING_POW;
+
+  struct Cell {
+    std::atomic<T*> val;
+    std::atomic<uint64_t> idx;
+    uint64_t pad[14];
+  } __attribute__((aligned(128)));
+
+  struct Node {
+    std::atomic<int64_t> head __attribute__((aligned(128)));
+    std::atomic<int64_t> tail __attribute__((aligned(128)));
+    std::atomic<Node*> next __attribute__((aligned(128)));
+    Cell array[RING_SIZE];
+
+    Node() {
+      for (unsigned i = 0; i < RING_SIZE; i++) {
+        array[i].val.store(nullptr, std::memory_order_relaxed);
+        array[i].idx.store(i, std::memory_order_relaxed);
+      }
+      head.store(0, std::memory_order_relaxed);
+      tail.store(0, std::memory_order_relaxed);
+      next.store(nullptr, std::memory_order_relaxed);
+    }
+  };
+
+  alignas(128) std::atomic<Node*> head;
+  alignas(128) std::atomic<Node*> tail;
+
+  static const int MAX_THREADS = 128;
+  const int maxThreads;
+
+  HazardPointers<Node> hp{1, maxThreads};
+  const int kHpTail = 0;
+  const int kHpHead = 0;
+
+  /*
+     * Private methods
+     */
+  int is_empty(T* v) {
+    return (v == nullptr);
+  }
+
+  uint64_t node_index(uint64_t i) {
+    return (i & ~(1ull << 63));
+  }
+
+  uint64_t set_unsafe(uint64_t i) {
+    return (i | (1ull << 63));
+  }
+
+  uint64_t node_unsafe(uint64_t i) {
+    return (i & (1ull << 63));
+  }
+
+  inline uint64_t tail_index(uint64_t t) {
+    return (t & ~(1ull << 63));
+  }
+
+  int crq_is_closed(uint64_t t) {
+    return (t & (1ull << 63)) != 0;
+  }
+
+  void fixState(Node* lhead) {
+    while (1) {
+      uint64_t t = lhead->tail.fetch_add(0);
+      uint64_t h = lhead->head.fetch_add(0);
+      // TODO: is it ok or not to cast "t" to int64_t ?
+      if (lhead->tail.load() != (int64_t)t)
+        continue;
+      if (h > t) {
+        int64_t tmp = t;
+        if (lhead->tail.compare_exchange_strong(tmp, h))
+          break;
+        continue;
+      }
+      break;
+    }
+  }
+
+  int close_crq(Node* rq, const uint64_t tailticket, const int tries) {
+    if (tries < 10) {
+      int64_t tmp = tailticket + 1;
+      return rq->tail.compare_exchange_strong(tmp, (tailticket + 1) | (1ull << 63));
+    } else {
+      return BIT_TEST_AND_SET(&rq->tail, 63);
+    }
+  }
+
+ public:
+  LCRQueue(int maxThreads = MAX_THREADS) : maxThreads{maxThreads} {
+    // Shared object init
+    Node* sentinel = new Node;
+    head.store(sentinel, std::memory_order_relaxed);
+    tail.store(sentinel, std::memory_order_relaxed);
+  }
+
+  ~LCRQueue() {
+    while (dequeue(0) != nullptr)
+      ;                  // Drain the queue
+    delete head.load();  // Delete the last node
+  }
+
+  std::string className() {
+    return "LCRQueue";
+  }
+
+  void enqueue(T* item, const int tid) {
+    int try_close = 0;
+    while (true) {
+      Node* ltail = hp.protectPtr(kHpTail, tail.load(), tid);
+      if (ltail != tail.load())
+        continue;
+      Node* lnext = ltail->next.load();
+      if (lnext != nullptr) {  // Help advance the tail
+        tail.compare_exchange_strong(ltail, lnext);
+        continue;
+      }
+
+      uint64_t tailticket = ltail->tail.fetch_add(1);
+      if (crq_is_closed(tailticket)) {
+        Node* newNode = new Node();
+        // Solo enqueue (superfluous?)
+        newNode->tail.store(1, std::memory_order_relaxed);
+        newNode->array[0].val.store(item, std::memory_order_relaxed);
+        newNode->array[0].idx.store(0, std::memory_order_relaxed);
+        Node* nullnode = nullptr;
+        if (ltail->next.compare_exchange_strong(nullnode, newNode)) {  // Insert new ring
+          tail.compare_exchange_strong(ltail, newNode);                // Advance the tail
+          hp.clear(tid);
+          return;
+        }
+        delete newNode;
+        continue;
+      }
+      Cell* cell = &ltail->array[tailticket & (RING_SIZE - 1)];
+      uint64_t idx = cell->idx.load();
+      if (cell->val.load() == nullptr) {
+        if (node_index(idx) <= tailticket) {
+          // TODO: is the missing cast before "t" ok or not to add?
+          if ((!node_unsafe(idx) || ltail->head.load() < (int64_t)tailticket)) {
+            if (CAS2((void**)cell, nullptr, idx, item, tailticket)) {
+              hp.clear(tid);
+              return;
+            }
+          }
+        }
+      }
+      if (((int64_t)(tailticket - ltail->head.load()) >= (int64_t)RING_SIZE) &&
+          close_crq(ltail, tailticket, ++try_close))
+        continue;
+    }
+  }
+
+  T* dequeue(const int tid) {
+    while (true) {
+      Node* lhead = hp.protectPtr(kHpHead, head.load(), tid);
+      if (lhead != head.load())
+        continue;
+      uint64_t headticket = lhead->head.fetch_add(1);
+      Cell* cell = &lhead->array[headticket & (RING_SIZE - 1)];
+
+      int r = 0;
+      uint64_t tt = 0;
+
+      while (true) {
+        uint64_t cell_idx = cell->idx.load();
+        uint64_t unsafe = node_unsafe(cell_idx);
+        uint64_t idx = node_index(cell_idx);
+        T* val = cell->val.load();
+
+        if (idx > headticket)
+          break;
+
+        if (val != nullptr) {
+          if (idx == headticket) {
+            if (CAS2((void**)cell, val, cell_idx, nullptr, unsafe | (headticket + RING_SIZE))) {
+              hp.clear(tid);
+              return val;
+            }
+          } else {
+            if (CAS2((void**)cell, val, cell_idx, val, set_unsafe(idx)))
+              break;
+          }
+        } else {
+          if ((r & ((1ull << 10) - 1)) == 0)
+            tt = lhead->tail.load();
+          // Optimization: try to bail quickly if queue is closed.
+          int crq_closed = crq_is_closed(tt);
+          uint64_t t = tail_index(tt);
+          if (unsafe) {  // Nothing to do, move along
+            if (CAS2((void**)cell, val, cell_idx, val, unsafe | (headticket + RING_SIZE)))
+              break;
+          } else if (t < headticket + 1 || r > 200000 || crq_closed) {
+            if (CAS2((void**)cell, val, idx, val, headticket + RING_SIZE)) {
+              if (r > 200000 && tt > RING_SIZE)
+                BIT_TEST_AND_SET(&lhead->tail, 63);
+              break;
+            }
+          } else {
+            ++r;
+          }
+        }
+      }
+
+      if (tail_index(lhead->tail.load()) <= headticket + 1) {
+        fixState(lhead);
+        // try to return empty
+        Node* lnext = lhead->next.load();
+        if (lnext == nullptr) {
+          hp.clear(tid);
+          return nullptr;  // Queue is empty
+        }
+        if (tail_index(lhead->tail) <= headticket + 1) {
+          if (head.compare_exchange_strong(lhead, lnext))
+            hp.retire(lhead, tid);
+        }
+      }
+    }
+  }
+};
+}  // namespace ConcurrencyFreaks
+#endif /* _LCRQ_QUEUE_HP_H_ */
--- a/tdactor/benchmark/third_party/LazyIndexArrayQueue.h
+++ b/tdactor/benchmark/third_party/LazyIndexArrayQueue.h
--- a/tdactor/benchmark/third_party/MoodyCamelQueue.h
+++ b/tdactor/benchmark/third_party/MoodyCamelQueue.h
--- a/tdactor/benchmark/third_party/mp-queue.c
+++ b/tdactor/benchmark/third_party/mp-queue.c
@ -0,0 +1,684 @@
+/*
+    This file is part of KittenDB-Engine Library.
+
+    KittenDB-Engine Library is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    KittenDB-Engine Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with KittenDB-Engine Library.  If not, see <http://www.gnu.org/licenses/>.
+
+    Copyright 2014-2016 Telegraph Inc
+              2014-2016 Nikolai Durov
+              2014      Andrey Lopatin
+*/
+
+char disable_linker_warning_about_empty_file_mp_queue_cpp;
+
+#ifdef TG_LCR_QUEUE
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+#include "mp-queue.h"
+
+#undef assert
+#ifndef assert
+#define assert(x) x
+#endif
+
+volatile int mpq_blocks_allocated, mpq_blocks_allocated_max, mpq_blocks_allocations, mpq_blocks_true_allocations,
+    mpq_blocks_wasted, mpq_blocks_prepared;
+volatile int mpq_small_blocks_allocated, mpq_small_blocks_allocated_max;
+
+__thread int mpq_this_thread_id;
+__thread void **thread_hazard_pointers;
+volatile int mpq_threads;
+
+struct mp_queue MqGarbageBlocks, MqPreparedBlocks;
+struct mp_queue MqGarbageSmallBlocks, MqPreparedSmallBlocks;
+
+static inline void barrier(void) {
+  asm volatile("" : : : "memory");
+}
+static inline void mfence(void) {
+  asm volatile("mfence" : : : "memory");
+}
+
+/* hazard pointers, one per thread */
+
+void *mqb_hazard_ptr[MAX_MPQ_THREADS][THREAD_HPTRS] __attribute__((aligned(64)));
+
+int is_hazard_ptr(void *ptr, int a, int b) {
+  barrier();
+  int k = mpq_threads, q = mpq_this_thread_id;
+  barrier();
+  int i, j, r = 0;
+  for (j = a; j <= b; j++) {
+    if (mqb_hazard_ptr[q][j] == ptr) {
+      r = 1;
+      break;
+    }
+  }
+  for (i = 1; i <= k; i++) {
+    if (i == q) {
+      continue;
+    }
+    for (j = a; j <= b; j++) {
+      if (mqb_hazard_ptr[i][j] == ptr) {
+        barrier();
+        return r + 2;
+      }
+    }
+  }
+  barrier();
+  return r;
+}
+
+void clear_thread_ids() {
+  mpq_threads = 0;
+  mpq_this_thread_id = 0;
+}
+/* initialize this thread id and return it */
+int get_this_thread_id(void) {
+  int i = mpq_this_thread_id;
+  if (i) {
+    return i;
+  }
+  i = __sync_fetch_and_add(&mpq_threads, 1) + 1;
+  assert(i > 0 && i < MAX_MPQ_THREADS);
+  thread_hazard_pointers = mqb_hazard_ptr[i];
+  return mpq_this_thread_id = i;
+}
+
+/* custom semaphore implementation using futexes */
+
+int mp_sem_post(mp_sem_t *sem) {
+  __sync_fetch_and_add(&sem->value, 1);
+  if (sem->waiting > 0) {
+    syscall(__NR_futex, &sem->value, FUTEX_WAKE, 1, NULL, 0, 0);
+  }
+  return 0;
+}
+
+int mp_sem_wait(mp_sem_t *sem) {
+  int v = sem->value, q = 0;
+  while (1) {
+    if (v > 0) {
+      v = __sync_fetch_and_add(&sem->value, -1);
+      if (v > 0) {
+        return 0;
+      }
+      v = __sync_add_and_fetch(&sem->value, 1);
+    } else {
+      if (v < 0 && q++ < 10) {
+        barrier();
+        v = sem->value;
+        continue;
+      }
+      __sync_fetch_and_add(&sem->waiting, 1);
+      syscall(__NR_futex, &sem->value, FUTEX_WAIT, v, NULL, 0, 0);
+      __sync_fetch_and_add(&sem->waiting, -1);
+      v = sem->value;
+      q = 0;
+    }
+  }
+}
+
+int mp_sem_trywait(mp_sem_t *sem) {
+  int v = sem->value;
+  if (v > 0) {
+    v = __sync_fetch_and_add(&sem->value, -1);
+    if (v > 0) {
+      return 0;
+    }
+    __sync_fetch_and_add(&sem->value, 1);
+  }
+  return -1;
+}
+
+/* functions for one mp_queue_block */
+
+// may invoke mpq_pop()/mpq_push() if allow_recursion=1
+struct mp_queue_block *alloc_mpq_block(mqn_value_t first_val, int allow_recursion, int is_small) {
+  is_small = 0;
+  struct mp_queue_block *QB = 0;
+  int prepared = 0, align_bytes = 0;
+  long size = (is_small ? MPQ_SMALL_BLOCK_SIZE : MPQ_BLOCK_SIZE);
+  if (allow_recursion) {
+    QB = mpq_pop(is_small ? &MqGarbageSmallBlocks : &MqGarbageBlocks, MPQF_RECURSIVE);
+    if (QB) {
+      if (!is_hazard_ptr(QB, 0, 2)) {
+        // reclaiming garbage
+        assert(QB->mqb_magic == MQ_BLOCK_GARBAGE_MAGIC);
+        __sync_fetch_and_add(&mpq_blocks_wasted, -1);
+        align_bytes = QB->mqb_align_bytes;
+      } else {
+        mpq_push(is_small ? &MqGarbageSmallBlocks : &MqGarbageBlocks, QB, MPQF_RECURSIVE);
+        QB = 0;
+      }
+    }
+    if (!QB) {
+      QB = mpq_pop(is_small ? &MqPreparedSmallBlocks : &MqPreparedBlocks, MPQF_RECURSIVE);
+      if (QB) {
+        assert(QB->mqb_magic == MQ_BLOCK_PREPARED_MAGIC);
+        prepared = 1;
+        __sync_fetch_and_add(&mpq_blocks_prepared, -1);
+        align_bytes = QB->mqb_align_bytes;
+      }
+    }
+  }
+  if (!QB) {
+    char *new_block = malloc(offsetof(struct mp_queue_block, mqb_nodes) + size * (2 * sizeof(void *)) +
+                             MPQ_BLOCK_ALIGNMENT - sizeof(void *));
+    assert(new_block);
+    assert(!((long)new_block & (sizeof(void *) - 1)));
+    align_bytes = -(int)(long)new_block & (MPQ_BLOCK_ALIGNMENT - 1);
+    QB = (struct mp_queue_block *)(new_block + align_bytes);
+
+    __sync_fetch_and_add(&mpq_blocks_true_allocations, 1);
+    if (is_small) {
+      int t = __sync_fetch_and_add(&mpq_small_blocks_allocated, 1);
+      if (t >= mpq_small_blocks_allocated_max) {
+        __sync_bool_compare_and_swap(&mpq_small_blocks_allocated_max, mpq_small_blocks_allocated_max, t + 1);
+      }
+    } else {
+      int t = __sync_fetch_and_add(&mpq_blocks_allocated, 1);
+      if (t >= mpq_blocks_allocated_max) {
+        __sync_bool_compare_and_swap(&mpq_blocks_allocated_max, mpq_blocks_allocated_max, t + 1);
+      }
+    }
+  } else {
+    assert(QB->mqb_size == size);
+  }
+  __sync_fetch_and_add(&mpq_blocks_allocations, 1);
+
+  memset(QB, 0, offsetof(struct mp_queue_block, mqb_nodes));
+  QB->mqb_align_bytes = align_bytes;
+  QB->mqb_size = size;
+
+  QB->mqb_nodes[0].idx = MQN_SAFE;
+  QB->mqb_nodes[0].val = first_val;
+
+  if (!prepared) {
+    long i;
+    for (i = 1; i < size; i++) {
+      QB->mqb_nodes[i].idx = MQN_SAFE + i;
+      QB->mqb_nodes[i].val = 0;
+    }
+  }
+
+  if (first_val) {
+    QB->mqb_tail = 1;
+  }
+
+  QB->mqb_magic = MQ_BLOCK_USED_MAGIC;
+  return QB;
+}
+
+void free_mpq_block(struct mp_queue_block *QB) {
+  assert(QB->mqb_magic == MQ_BLOCK_USED_MAGIC);
+  assert((unsigned)QB->mqb_align_bytes < MPQ_BLOCK_ALIGNMENT && !(QB->mqb_align_bytes & (sizeof(void *) - 1)));
+  QB->mqb_magic = MQ_BLOCK_FREE_MAGIC;
+  if (QB->mqb_size == MPQ_SMALL_BLOCK_SIZE) {
+    __sync_fetch_and_add(&mpq_small_blocks_allocated, -1);
+  } else {
+    assert(QB->mqb_size == MPQ_BLOCK_SIZE);
+    __sync_fetch_and_add(&mpq_blocks_allocated, -1);
+  }
+  free((char *)QB - QB->mqb_align_bytes);
+}
+
+static inline void mpq_fix_state(struct mp_queue_block *QB) {
+  long h, t;
+  while (1) {
+    barrier();
+    t = QB->mqb_tail;
+    barrier();
+    h = QB->mqb_head;
+    barrier();
+    if ((unsigned long)h <= (unsigned long)t) {
+      break;
+    }
+    if (QB->mqb_tail != t) {
+      continue;
+    }
+    // here tail < head ; try to advance tail to head
+    // (or to some value h such that tail < h <= head)
+    if (__sync_bool_compare_and_swap(&QB->mqb_tail, t, h)) {
+      break;
+    }
+  }
+}
+
+mqn_value_t mpq_block_pop(struct mp_queue_block *QB) {
+  // fprintf (stderr, "%d:mpq_block_pop(%p)\n", mpq_this_thread_id, QB);
+  long size = QB->mqb_size;
+  while (1) {
+    long h = __sync_fetch_and_add(&QB->mqb_head, 1);
+    // fprintf (stderr, "%d:  mpq_block_pop(%ld)\n", mpq_this_thread_id, h);
+    mpq_node_t *node = &QB->mqb_nodes[h & (size - 1)];
+    while (1) {
+      mpq_node_t d, e;
+      barrier();
+      mqn_value_t val = node->val;
+      barrier();
+      long safe_idx = node->idx;
+      barrier();
+      long idx = safe_idx & MQN_IDX_MASK;
+      if (idx > h) {
+        break;
+      }
+      d.val = val;
+      d.idx = safe_idx;
+      if (val) {
+        if (idx == h) {
+          e.idx = safe_idx + size;
+          e.val = 0;
+          if (__sync_bool_compare_and_swap(&node->pair, d.pair, e.pair)) {
+            // fprintf (stderr, "%d:  mpq_block_pop(%ld) -> %lx\n", mpq_this_thread_id, h, (long) val);
+            return val;
+          }
+        } else {
+          e.val = val;
+          e.idx = idx;  // clear 'safe' flag
+          if (__sync_bool_compare_and_swap(&node->pair, d.pair, e.pair)) {
+            break;
+          }
+        }
+      } else {
+        e.idx = (safe_idx & MQN_SAFE) + h + size;
+        e.val = 0;
+        if (__sync_bool_compare_and_swap(&node->pair, d.pair, e.pair)) {
+          break;
+        }
+      }
+      /* somebody changed this element while we were inspecting it, make another loop iteration */
+    }
+    barrier();
+    long t = QB->mqb_tail & MQN_IDX_MASK;
+    barrier();
+    if (t <= h + 1) {
+      mpq_fix_state(QB);
+      return 0;
+    }
+    /* now try again with a new value of h */
+  }
+}
+
+long mpq_block_push(struct mp_queue_block *QB, mqn_value_t val) {
+  int iterations = 0;
+  long size = QB->mqb_size;
+  // fprintf (stderr, "%d:mpq_block_push(%p)\n", mpq_this_thread_id, QB);
+  while (1) {
+    long t = __sync_fetch_and_add(&QB->mqb_tail, 1);
+    // fprintf (stderr, "%d:  mpq_block_push(%ld)\n", mpq_this_thread_id, t);
+    if (t & MQN_SAFE) {
+      return -1L;  // bad luck
+    }
+    mpq_node_t *node = &QB->mqb_nodes[t & (size - 1)];
+    barrier();
+    mqn_value_t old_val = node->val;
+    barrier();
+    long safe_idx = node->idx;
+    barrier();
+    long idx = safe_idx & MQN_IDX_MASK;
+    if (!old_val && idx <= t && ((safe_idx & MQN_SAFE) || QB->mqb_head <= t)) {
+      mpq_node_t d, e;
+      d.idx = safe_idx;
+      d.val = 0;
+      e.idx = MQN_SAFE + t;
+      e.val = val;
+      if (__sync_bool_compare_and_swap(&node->pair, d.pair, e.pair)) {
+        // fprintf (stderr, "%d:  mpq_block_push(%ld) <- %lx\n", mpq_this_thread_id, t, (long) val);
+        return t;  // pushed OK
+      }
+    }
+    barrier();
+    long h = QB->mqb_head;
+    barrier();
+    if (t - h >= size || ++iterations > 10) {
+      __sync_fetch_and_or(&QB->mqb_tail, MQN_SAFE);  // closing queue
+      return -1L;                                    // bad luck
+    }
+  }
+}
+
+/* functions for mp_queue = list of mp_queue_block's */
+void init_mp_queue(struct mp_queue *MQ) {
+  assert(MQ->mq_magic != MQ_MAGIC && MQ->mq_magic != MQ_MAGIC_SEM);
+  memset(MQ, 0, sizeof(struct mp_queue));
+  MQ->mq_head = MQ->mq_tail = alloc_mpq_block(0, 0, 1);
+  MQ->mq_magic = MQ_MAGIC;
+
+  if (!MqGarbageBlocks.mq_magic) {
+    init_mp_queue(&MqGarbageBlocks);
+    init_mp_queue(&MqGarbageSmallBlocks);
+  } else if (!MqPreparedBlocks.mq_magic) {
+    init_mp_queue(&MqPreparedBlocks);
+    init_mp_queue(&MqPreparedSmallBlocks);
+  }
+}
+
+void init_mp_queue_w(struct mp_queue *MQ) {
+  init_mp_queue(MQ);
+#if MPQ_USE_POSIX_SEMAPHORES
+  sem_init(&MQ->mq_sem, 0, 0);
+#endif
+  MQ->mq_magic = MQ_MAGIC_SEM;
+}
+
+struct mp_queue *alloc_mp_queue(void) {
+  struct mp_queue *MQ = NULL;
+  assert(!posix_memalign((void **)&MQ, 64, sizeof(*MQ)));
+  memset(MQ, 0, sizeof(*MQ));
+  init_mp_queue(MQ);
+  return MQ;
+}
+
+struct mp_queue *alloc_mp_queue_w(void) {
+  struct mp_queue *MQ = NULL;
+  assert(!posix_memalign((void **)&MQ, 64, sizeof(*MQ)));
+  memset(MQ, 0, sizeof(*MQ));
+  init_mp_queue_w(MQ);
+  return MQ;
+}
+
+/* invoke only if sure that nobody else may be using this mp_queue in parallel */
+void clear_mp_queue(struct mp_queue *MQ) {
+  assert(MQ->mq_magic == MQ_MAGIC || MQ->mq_magic == MQ_MAGIC_SEM);
+  assert(MQ->mq_head && MQ->mq_tail);
+  struct mp_queue_block *QB = MQ->mq_head, *QBN;
+  for (QB = MQ->mq_head; QB; QB = QBN) {
+    QBN = QB->mqb_next;
+    assert(QB->mqb_next || QB == MQ->mq_tail);
+    QB->mqb_next = 0;
+    free_mpq_block(QB);
+  }
+  MQ->mq_head = MQ->mq_tail = 0;
+  MQ->mq_magic = 0;
+}
+
+void free_mp_queue(struct mp_queue *MQ) {
+  clear_mp_queue(MQ);
+  free(MQ);
+}
+
+// may invoke mpq_push() to discard new empty block
+mqn_value_t mpq_pop(struct mp_queue *MQ, int flags) {
+  void **hptr = &mqb_hazard_ptr[get_this_thread_id()][0];
+  long r = ((flags & MPQF_RECURSIVE) != 0);
+  struct mp_queue_block *QB;
+  mqn_value_t v;
+  while (1) {
+    QB = MQ->mq_head;
+    barrier();
+    hptr[r] = QB;
+    barrier();
+    __sync_synchronize();
+    if (MQ->mq_head != QB) {
+      continue;
+    }
+
+    v = mpq_block_pop(QB);
+    if (v) {
+      break;
+    }
+    barrier();
+    if (!QB->mqb_next) {
+      QB = 0;
+      break;
+    }
+    v = mpq_block_pop(QB);
+    if (v) {
+      break;
+    }
+    if (__sync_bool_compare_and_swap(&MQ->mq_head, QB, QB->mqb_next)) {
+      // want to free QB here, but this is complicated if somebody else holds a pointer
+      if (is_hazard_ptr(QB, 0, 2) <= 1) {
+        free_mpq_block(QB);
+      } else {
+        __sync_fetch_and_add(&mpq_blocks_wasted, 1);
+        // ... put QB into some GC queue? ...
+        QB->mqb_magic = MQ_BLOCK_GARBAGE_MAGIC;
+        mpq_push(QB->mqb_size == MPQ_SMALL_BLOCK_SIZE ? &MqGarbageSmallBlocks : &MqGarbageBlocks, QB,
+                 flags & MPQF_RECURSIVE);
+      }
+    }
+  }
+  if (flags & MPQF_STORE_PTR) {
+    hptr[2] = QB;
+  }
+  hptr[r] = 0;
+  return v;
+}
+
+/* 1 = definitely empty (for some serialization), 0 = possibly non-empty;
+   may invoke mpq_push() to discard empty block */
+int mpq_is_empty(struct mp_queue *MQ) {
+  void **hptr = &mqb_hazard_ptr[get_this_thread_id()][0];
+  struct mp_queue_block *QB;
+  while (1) {
+    QB = MQ->mq_head;
+    barrier();
+    *hptr = QB;
+    barrier();
+    __sync_synchronize();
+    if (MQ->mq_head != QB) {
+      continue;
+    }
+    barrier();
+    long h = QB->mqb_head;
+    barrier();
+    long t = QB->mqb_tail;
+    barrier();
+    if (!(t & MQN_SAFE)) {
+      *hptr = 0;
+      return t <= h;
+    }
+    t &= MQN_IDX_MASK;
+    if (t > h) {
+      *hptr = 0;
+      return 0;
+    }
+    barrier();
+    if (!QB->mqb_next) {
+      *hptr = 0;
+      return 1;
+    }
+    if (__sync_bool_compare_and_swap(&MQ->mq_head, QB, QB->mqb_next)) {
+      // want to free QB here, but this is complicated if somebody else holds a pointer
+      if (is_hazard_ptr(QB, 0, 2) <= 1) {
+        free_mpq_block(QB);
+      } else {
+        __sync_fetch_and_add(&mpq_blocks_wasted, 1);
+        // ... put QB into some GC queue? ...
+        QB->mqb_magic = MQ_BLOCK_GARBAGE_MAGIC;
+        mpq_push(QB->mqb_size == MPQ_SMALL_BLOCK_SIZE ? &MqGarbageSmallBlocks : &MqGarbageBlocks, QB, 0);
+      }
+    }
+  }
+  *hptr = 0;
+  return 0;
+}
+
+/* may invoke mpq_alloc_block (which recursively invokes mpq_pop)
+   or mpq_push() (without needing to hold hazard pointer) to deal with blocks */
+long mpq_push(struct mp_queue *MQ, mqn_value_t val, int flags) {
+  void **hptr = mqb_hazard_ptr[get_this_thread_id()];
+  long r = ((flags & MPQF_RECURSIVE) != 0);
+  while (1) {
+    struct mp_queue_block *QB = MQ->mq_tail;
+    barrier();
+    hptr[r] = QB;
+    barrier();
+    __sync_synchronize();
+    if (MQ->mq_tail != QB) {
+      continue;
+    }
+
+    if (QB->mqb_next) {
+      __sync_bool_compare_and_swap(&MQ->mq_tail, QB, QB->mqb_next);
+      continue;
+    }
+    long pos = mpq_block_push(QB, val);
+    if (pos >= 0) {
+      if (flags & MPQF_STORE_PTR) {
+        hptr[2] = QB;
+      }
+      hptr[r] = 0;
+      return pos;
+    }
+#define DBG(c)  // fprintf (stderr, "[%d] pushing %lx to %p,%p: %c\n", mpq_this_thread_id, (long) val, MQ, QB, c);
+    DBG('A');
+    /*
+    if (__sync_fetch_and_add (&QB->mqb_next_allocators, 1)) {
+      // somebody else will allocate next block; busy wait instead of spuruous alloc/free
+      DBG('B')
+      while (!QB->mqb_next) {
+        barrier ();
+      }
+      DBG('C')
+      continue;
+    }
+    */
+    int is_small = (QB == MQ->mq_head);
+    struct mp_queue_block *NQB;
+    if (!r) {
+      assert(!hptr[1]);
+      NQB = alloc_mpq_block(val, 1, is_small);
+      assert(!hptr[1]);
+    } else {
+      NQB = alloc_mpq_block(val, 0, is_small);
+    }
+    assert(hptr[r] == QB);
+    DBG('D')
+    if (__sync_bool_compare_and_swap(&QB->mqb_next, 0, NQB)) {
+      __sync_bool_compare_and_swap(&MQ->mq_tail, QB, NQB);
+      DBG('E')
+      if (flags & MPQF_STORE_PTR) {
+        hptr[2] = NQB;
+      }
+      hptr[r] = 0;
+      return 0;
+    } else {
+      DBG('F');
+      NQB->mqb_magic = MQ_BLOCK_PREPARED_MAGIC;
+      mpq_push(is_small ? &MqPreparedSmallBlocks : &MqPreparedBlocks, NQB, 0);
+      __sync_fetch_and_add(&mpq_blocks_prepared, 1);
+    }
+  }
+#undef DBG
+}
+
+mqn_value_t mpq_pop_w(struct mp_queue *MQ, int flags) {
+  assert(MQ->mq_magic == MQ_MAGIC_SEM);
+  int s = -1, iterations = flags & MPQF_MAX_ITERATIONS;
+  while (iterations-- > 0) {
+#if MPQ_USE_POSIX_SEMAPHORES
+    s = sem_trywait(&MQ->mq_sem);
+#else
+    s = mp_sem_trywait(&MQ->mq_sem);
+#endif
+    if (!s) {
+      break;
+    }
+#if MPQ_USE_POSIX_SEMAPHORES
+    assert(errno == EAGAIN || errno == EINTR);
+#endif
+  }
+  while (s < 0) {
+#if MPQ_USE_POSIX_SEMAPHORES
+    s = sem_wait(&MQ->mq_sem);
+#else
+    s = mp_sem_wait(&MQ->mq_sem);
+#endif
+    if (!s) {
+      break;
+    }
+#if MPQ_USE_POSIX_SEMAPHORES
+    assert(errno == EAGAIN);
+#endif
+  }
+  mqn_value_t *v = mpq_pop(MQ, flags);
+  assert(v);
+  return v;
+}
+
+mqn_value_t mpq_pop_nw(struct mp_queue *MQ, int flags) {
+  assert(MQ->mq_magic == MQ_MAGIC_SEM);
+  int s = -1, iterations = flags & MPQF_MAX_ITERATIONS;
+  while (iterations-- > 0) {
+#if MPQ_USE_POSIX_SEMAPHORES
+    s = sem_trywait(&MQ->mq_sem);
+#else
+    s = mp_sem_trywait(&MQ->mq_sem);
+#endif
+    if (s >= 0) {
+      break;
+    }
+#if MPQ_USE_POSIX_SEMAPHORES
+    assert(errno == EAGAIN || errno == EINTR);
+#endif
+  }
+  if (s < 0) {
+    return 0;
+  }
+  mqn_value_t *v = mpq_pop(MQ, flags);
+  assert(v);
+  return v;
+}
+
+long mpq_push_w(struct mp_queue *MQ, mqn_value_t v, int flags) {
+  assert(MQ->mq_magic == MQ_MAGIC_SEM);
+  long res = mpq_push(MQ, v, flags);
+#if MPQ_USE_POSIX_SEMAPHORES
+  assert(sem_post(&MQ->mq_sem) >= 0);
+#else
+  assert(mp_sem_post(&MQ->mq_sem) >= 0);
+#endif
+  return res;
+}
+
+void *get_ptr_multithread_copy(void **ptr, void (*incref)(void *ptr)) {
+  void **hptr = &mqb_hazard_ptr[get_this_thread_id()][COMMON_HAZARD_PTR_NUM];
+  assert(*hptr == NULL);
+
+  void *R;
+  while (1) {
+    R = *ptr;
+    barrier();
+    *hptr = R;
+    barrier();
+    mfence();
+
+    if (R != *ptr) {
+      continue;
+    }
+
+    incref(R);
+
+    barrier();
+    *hptr = NULL;
+
+    break;
+  }
+  return R;
+}
+#endif
--- a/tdactor/benchmark/third_party/mp-queue.h
+++ b/tdactor/benchmark/third_party/mp-queue.h
@ -0,0 +1,149 @@
+/*
+    This file is part of KittenDB-Engine Library.
+
+    KittenDB-Engine Library is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    KittenDB-Engine Library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with KittenDB-Engine Library.  If not, see <http://www.gnu.org/licenses/>.
+
+    Copyright 2014 Telegraph Inc
+              2014 Nikolai Durov
+              2014 Andrey Lopatin
+*/
+
+#ifndef __KDB_MP_QUEUE_H__
+#define __KDB_MP_QUEUE_H__
+
+#define MPQ_USE_POSIX_SEMAPHORES 0
+
+#if MPQ_USE_POSIX_SEMAPHORES
+#include <semaphore.h>
+#endif
+
+typedef struct mp_semaphore {
+  volatile int value;
+  volatile int waiting;
+} mp_sem_t;
+
+#define THREAD_HPTRS 21
+
+#define MPQ_SMALL_BLOCK_SIZE 64
+#define MPQ_BLOCK_SIZE 4096  // must be a power of 2
+#define MPQ_BLOCK_ALIGNMENT 64
+
+#ifdef _LP64
+typedef int int128_t __attribute__((__mode__(TI)));
+#define DLONG int128_t
+// # define DLONG __int128
+#define MQN_SAFE (-1LL << 63)
+#else
+#define DLONG long long
+#define MQN_SAFE (-1L << 31)
+#endif
+
+#define MQN_IDX_MASK (~MQN_SAFE)
+
+typedef void *mqn_value_t;
+
+typedef struct mp_queue_node {
+  union {
+    struct {
+      long idx;
+      union {
+        long mqn_value;
+        void *mqn_ptr;
+        mqn_value_t val;
+      };
+    };
+    DLONG pair;
+  };
+} mpq_node_t;
+
+#define MQ_BLOCK_USED_MAGIC 0x1ebacaef
+#define MQ_BLOCK_FREE_MAGIC 0x2e4afeda
+#define MQ_BLOCK_GARBAGE_MAGIC 0x3a04dc7d
+#define MQ_BLOCK_PREPARED_MAGIC 0x4b9b13cd
+
+#define MQ_MAGIC 0x1aed9b43
+#define MQ_MAGIC_SEM 0x1aedcd21
+
+struct mp_queue_block {
+  long mqb_head __attribute__((aligned(64)));
+  int mqb_magic;
+  int mqb_align_bytes;
+  int mqb_size;  // power of 2; one of MPQ_BLOCK_SIZE or MPQ_SMALL_BLOCK_SIZE
+  long mqb_tail __attribute__((aligned(64)));
+  struct mp_queue_block *mqb_next;
+  int mqb_next_allocators;
+  mpq_node_t mqb_nodes[MPQ_BLOCK_SIZE] __attribute__((aligned(64)));
+};
+
+struct mp_queue {
+  struct mp_queue_block *mq_head __attribute__((aligned(64)));
+  int mq_magic;
+  struct mp_queue_block *mq_tail __attribute__((aligned(64)));
+#if MPQ_USE_POSIX_SEMAPHORES
+  sem_t mq_sem __attribute__((aligned(64)));
+#else
+  mp_sem_t mq_sem __attribute__((aligned(64)));
+#endif
+};
+
+extern volatile int mpq_blocks_allocated, mpq_blocks_allocated_max, mpq_blocks_allocations, mpq_blocks_true_allocations,
+    mpq_blocks_wasted, mpq_blocks_prepared;
+extern volatile int mpq_small_blocks_allocated, mpq_small_blocks_allocated_max;
+
+#define MAX_MPQ_THREADS 22
+extern __thread int mpq_this_thread_id;
+extern __thread void **thread_hazard_pointers;
+extern volatile int mpq_threads;
+
+/* initialize this thread id and return it */
+void clear_thread_ids(void);
+int get_this_thread_id(void);
+
+/* functions for one mp_queue_block */
+struct mp_queue_block *alloc_mpq_block(mqn_value_t first_val, int allow_recursion, int is_small);
+void free_mpq_block(struct mp_queue_block *QB);
+
+mqn_value_t mpq_block_pop(struct mp_queue_block *QB);
+long mpq_block_push(struct mp_queue_block *QB, mqn_value_t val);
+
+/* functions for mp_queue = list of mp_queue_block's */
+void init_mp_queue(struct mp_queue *MQ);
+struct mp_queue *alloc_mp_queue(void);
+struct mp_queue *alloc_mp_queue_w(void);
+void init_mp_queue_w(struct mp_queue *MQ);
+void clear_mp_queue(struct mp_queue *MQ);  // frees all mpq block chain; invoke only if nobody else is using mp-queue
+void free_mp_queue(struct mp_queue *MQ);   // same + invoke free()
+
+// flags for mpq_push / mpq_pop functions
+#define MPQF_RECURSIVE 8192
+#define MPQF_STORE_PTR 4096
+#define MPQF_MAX_ITERATIONS (MPQF_STORE_PTR - 1)
+
+long mpq_push(struct mp_queue *MQ, mqn_value_t val, int flags);
+mqn_value_t mpq_pop(struct mp_queue *MQ, int flags);
+int mpq_is_empty(struct mp_queue *MQ);
+
+long mpq_push_w(struct mp_queue *MQ, mqn_value_t val, int flags);
+mqn_value_t mpq_pop_w(struct mp_queue *MQ, int flags);
+mqn_value_t mpq_pop_nw(struct mp_queue *MQ, int flags);
+
+int mp_sem_post(mp_sem_t *sem);
+int mp_sem_wait(mp_sem_t *sem);
+int mp_sem_trywait(mp_sem_t *sem);
+
+#define COMMON_HAZARD_PTR_NUM 3
+int is_hazard_ptr(void *ptr, int a, int b);
+extern void *mqb_hazard_ptr[MAX_MPQ_THREADS][THREAD_HPTRS];
+void *get_ptr_multithread_copy(void **ptr, void (*incref)(void *ptr));
+#endif