From 5cab638034c19e78aef52c8cb6a2b6331cc4e771 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 14:22:24 -0400 Subject: [PATCH 01/14] implement polite waiting --- .gitignore | 2 + CMakeLists.txt | 4 ++ src/snmalloc/ds/combininglock.h | 66 +++++++++++++++++++++++++++++---- src/snmalloc/pal/pal_consts.h | 5 +++ src/snmalloc/pal/pal_linux.h | 56 +++++++++++++++++++++++++++- src/snmalloc/pal/pal_windows.h | 30 +++++++++++++++ 6 files changed, 153 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 8737c737a..8057a72fa 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ CMakeFiles/ *~ *.sw? +# cache dirs +.cache diff --git a/CMakeLists.txt b/CMakeLists.txt index a5aa31814..3941c06af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,6 +134,9 @@ int main() { # this is why we check its existence here CHECK_INCLUDE_FILE_CXX(linux/random.h SNMALLOC_HAS_LINUX_RANDOM_H) +# check if futex.h is available +CHECK_INCLUDE_FILE_CXX(linux/futex.h SNMALLOC_HAS_LINUX_FUTEX_H) + # Provide as function so other projects can reuse # FIXME: This modifies some variables that may or may not be the ones that # provide flags and so is broken by design. It should be removed once Verona @@ -248,6 +251,7 @@ add_as_define(SNMALLOC_TRACING) add_as_define(SNMALLOC_CI_BUILD) add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY) add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H) +add_as_define(SNMALLOC_HAS_LINUX_FUTEX_H) if (SNMALLOC_NO_REALLOCARRAY) add_as_define(SNMALLOC_NO_REALLOCARRAY) endif() diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index 1857713d1..e3ac6e948 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -4,6 +4,7 @@ #include "../pal/pal.h" #include +#include namespace snmalloc { @@ -39,10 +40,14 @@ namespace snmalloc */ class CombiningLockNode { + static constexpr bool always_spin = + !pal_supports; + template friend class CombiningLockNodeTempl; - enum class LockStatus + enum class LockStatus : std:: + conditional_t { // The work for this node has not been completed. WAITING, @@ -53,7 +58,10 @@ namespace snmalloc // The work for this thread has not been completed, and it is the // head of the queue. - HEAD + HEAD, + + // The waiter is currently sleeping. + SLEEPING }; // Status of the queue, set by the thread at the head of the queue, @@ -74,6 +82,49 @@ namespace snmalloc status.store(s, std::memory_order_release); } + static void wake(CombiningLockNode* node, LockStatus message) + { + if constexpr (always_spin) + { + node->set_status(message); + } + else + { + if ( + node->status.exchange(message, std::memory_order_acq_rel) == + LockStatus::SLEEPING) + { + DefaultPal::notify_one_on_address(node->status); + } + } + } + + void wait() + { + if constexpr (always_spin) + { + while (status.load(std::memory_order_acquire) == LockStatus::WAITING) + Aal::pause(); + } + else + { + int remaining = 100; + while (remaining > 0) + { + if (status.load(std::memory_order_acquire) != LockStatus::WAITING) + return; + Aal::pause(); + remaining--; + } + LockStatus expected = LockStatus::WAITING; + if (status.compare_exchange_strong( + expected, LockStatus::SLEEPING, std::memory_order_acq_rel)) + { + DefaultPal::wait_on_address(status, LockStatus::SLEEPING); + } + } + } + SNMALLOC_SLOW_PATH void attach_slow(CombiningLock& lock) { // There is contention for the lock, we need to add our work to the @@ -86,8 +137,7 @@ namespace snmalloc prev->next.store(this, std::memory_order_release); // Wait to for predecessor to complete - while (status.load(std::memory_order_relaxed) == LockStatus::WAITING) - Aal::pause(); + wait(); // Determine if another thread completed our work. if (status.load(std::memory_order_acquire) == LockStatus::DONE) @@ -131,7 +181,7 @@ namespace snmalloc break; // Signal this work was completed and move on to // next item. - curr->set_status(LockStatus::DONE); + wake(curr, LockStatus::DONE); curr = n; } @@ -146,7 +196,7 @@ namespace snmalloc { // Queue was successfully closed. // Notify last element the work was completed. - curr->set_status(LockStatus::DONE); + wake(curr, LockStatus::DONE); lock.release(); return; } @@ -160,13 +210,13 @@ namespace snmalloc // As we had to wait, give the job to the next thread // to carry on performing the work. - n->set_status(LockStatus::HEAD); + wake(n, LockStatus::HEAD); // Notify the thread that we completed its work. // Note that this needs to be before setting curr->status, // as after the status is set the thread may deallocate the // queue node. - curr->set_status(LockStatus::DONE); + wake(curr, LockStatus::DONE); return; } }; diff --git a/src/snmalloc/pal/pal_consts.h b/src/snmalloc/pal/pal_consts.h index 5679c336e..c4c4c25a2 100644 --- a/src/snmalloc/pal/pal_consts.h +++ b/src/snmalloc/pal/pal_consts.h @@ -60,6 +60,11 @@ namespace snmalloc * modify which parts get dumped. */ CoreDump = (1 << 6), + + /** + * This Pal provides a way for parking threads at a specific address. + */ + WaitOnAddress = (1 << 7), }; /** diff --git a/src/snmalloc/pal/pal_linux.h b/src/snmalloc/pal/pal_linux.h index 2ff8add0c..76241a132 100644 --- a/src/snmalloc/pal/pal_linux.h +++ b/src/snmalloc/pal/pal_linux.h @@ -14,6 +14,10 @@ # include # endif +# if defined(SNMALLOC_HAS_LINUX_FUTEX_H) +# include +# endif + extern "C" int puts(const char* str); namespace snmalloc @@ -27,8 +31,12 @@ namespace snmalloc * * We always make sure that linux has entropy support. */ - static constexpr uint64_t pal_features = - PALPOSIX::pal_features | Entropy | CoreDump; + static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy | + CoreDump +# ifdef SNMALLOC_HAS_LINUX_FUTEX_H + | WaitOnAddress +# endif + ; static constexpr size_t page_size = Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size; @@ -232,6 +240,50 @@ namespace snmalloc // its APIs are not exception-free. return dev_urandom(); } + +# ifdef SNMALLOC_HAS_LINUX_FUTEX_H + using WaitingWord = int; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + for (;;) + { + if (addr.load(std::memory_order_relaxed) != expected) + break; + + long ret = syscall( + SYS_futex, &addr, FUTEX_WAIT_PRIVATE, expected, nullptr, nullptr, 0); + + if (ret == -EINTR) + continue; + + return; + } + } + + template + static void notify_one_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + syscall(SYS_futex, &addr, FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); + } + + template + static void notify_all_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + syscall( + SYS_futex, &addr, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); + } +# endif }; } // namespace snmalloc #endif diff --git a/src/snmalloc/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h index 2ab0bfc1f..4f3d6b1ab 100644 --- a/src/snmalloc/pal/pal_windows.h +++ b/src/snmalloc/pal/pal_windows.h @@ -20,6 +20,7 @@ # if (NTDDI_VERSION >= NTDDI_WIN10_RS5) && \ (WINVER >= _WIN32_WINNT_WIN10) && !defined(USE_SYSTEMATIC_TESTING) # define PLATFORM_HAS_VIRTUALALLOC2 +# define PLATFORM_HAS_WAITONADDRESS # endif # endif @@ -60,6 +61,9 @@ namespace snmalloc Time # if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING) | AlignedAllocation +# endif +# if defined(PLATFORM_HAS_WAITONADDRESS) + | WaitOnAddress # endif ; @@ -231,6 +235,32 @@ namespace snmalloc std::chrono::steady_clock::now().time_since_epoch()) .count()); } + +# ifdef PLATFORM_HAS_WAITONADDRESS + using WaitingWord = char; + template + void wait_on_address(std::atomic& addr, T expected) + { + for (;;) + { + if (addr.load(std::memory_order_relaxed) != expected) + break; + + if (::WaitOnAddress(&addr, &expected, sizeof(T), INFINITE)) + break; + } + } + + void notify_one_on_address(std::atomic& addr) + { + ::WakeByAddressSingle(&addr); + } + + void notify_all_on_address(std::atomic& addr) + { + ::WakeByAddressAll(&addr); + } +# endif }; } #endif From ef62bc37ecedd3e6ee59cb926ffe5af61dc54514 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 17:59:39 -0400 Subject: [PATCH 02/14] Update src/snmalloc/pal/pal_linux.h Co-authored-by: Matthew Parkinson --- src/snmalloc/pal/pal_linux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/pal/pal_linux.h b/src/snmalloc/pal/pal_linux.h index 76241a132..364864905 100644 --- a/src/snmalloc/pal/pal_linux.h +++ b/src/snmalloc/pal/pal_linux.h @@ -258,7 +258,7 @@ namespace snmalloc long ret = syscall( SYS_futex, &addr, FUTEX_WAIT_PRIVATE, expected, nullptr, nullptr, 0); - if (ret == -EINTR) + if (ret == -1 && errno == EINTR) continue; return; From 0e44b89d192dff4414493748bedfa2db42aae9e6 Mon Sep 17 00:00:00 2001 From: schrodingerzhu Date: Tue, 29 Oct 2024 18:44:42 -0400 Subject: [PATCH 03/14] fix build issues --- src/snmalloc/ds/combininglock.h | 37 ++++++++++++++++++++++++++------- src/snmalloc/pal/pal_windows.h | 12 +++++------ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index e3ac6e948..139986d01 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -40,14 +40,33 @@ namespace snmalloc */ class CombiningLockNode { - static constexpr bool always_spin = - !pal_supports; + template + static constexpr bool has_wait_on_address = + pal_supports; + + template + struct WaitWordTypeSelect; + + template + struct WaitWordTypeSelect + { + using type = Pal::WaitingWord; + }; + + template + struct WaitWordTypeSelect + { + using type = int; + }; + + using WaitingWoldType = + typename WaitWordTypeSelect, DefaultPal>:: + type; template friend class CombiningLockNodeTempl; - enum class LockStatus : std:: - conditional_t + enum class LockStatus : WaitingWoldType { // The work for this node has not been completed. WAITING, @@ -82,9 +101,10 @@ namespace snmalloc status.store(s, std::memory_order_release); } + template static void wake(CombiningLockNode* node, LockStatus message) { - if constexpr (always_spin) + if constexpr (!has_wait_on_address) { node->set_status(message); } @@ -94,14 +114,15 @@ namespace snmalloc node->status.exchange(message, std::memory_order_acq_rel) == LockStatus::SLEEPING) { - DefaultPal::notify_one_on_address(node->status); + Pal::notify_one_on_address(node->status); } } } + template void wait() { - if constexpr (always_spin) + if constexpr (!has_wait_on_address) { while (status.load(std::memory_order_acquire) == LockStatus::WAITING) Aal::pause(); @@ -120,7 +141,7 @@ namespace snmalloc if (status.compare_exchange_strong( expected, LockStatus::SLEEPING, std::memory_order_acq_rel)) { - DefaultPal::wait_on_address(status, LockStatus::SLEEPING); + Pal::wait_on_address(status, LockStatus::SLEEPING); } } } diff --git a/src/snmalloc/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h index 4f3d6b1ab..0b3e22951 100644 --- a/src/snmalloc/pal/pal_windows.h +++ b/src/snmalloc/pal/pal_windows.h @@ -238,8 +238,8 @@ namespace snmalloc # ifdef PLATFORM_HAS_WAITONADDRESS using WaitingWord = char; - template - void wait_on_address(std::atomic& addr, T expected) + template + static void wait_on_address(std::atomic& addr, T expected) { for (;;) { @@ -250,13 +250,13 @@ namespace snmalloc break; } } - - void notify_one_on_address(std::atomic& addr) + template + static void notify_one_on_address(std::atomic& addr) { ::WakeByAddressSingle(&addr); } - - void notify_all_on_address(std::atomic& addr) + template + static void notify_all_on_address(std::atomic& addr) { ::WakeByAddressAll(&addr); } From b999c14ff315383116e60d2e7267a779d4824d4e Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 18:49:08 -0400 Subject: [PATCH 04/14] fix more build issues --- src/snmalloc/ds/combininglock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index 139986d01..0ae318549 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -50,7 +50,7 @@ namespace snmalloc template struct WaitWordTypeSelect { - using type = Pal::WaitingWord; + using type = typename Pal::WaitingWord; }; template From caa709c2f2409e0e8b3fc0c95cd3c2abb69b4082 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 19:50:05 -0400 Subject: [PATCH 05/14] support _umtx_op for freebsd --- src/snmalloc/pal/pal_freebsd.h | 45 +++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/snmalloc/pal/pal_freebsd.h b/src/snmalloc/pal/pal_freebsd.h index 199aef4cf..854c6f130 100644 --- a/src/snmalloc/pal/pal_freebsd.h +++ b/src/snmalloc/pal/pal_freebsd.h @@ -13,6 +13,9 @@ # endif # endif +extern "C" int +_umtx_op(void* obj, int op, unsigned long val, void* uaddr, void* uaddr2); + /** * Direct system-call wrappers so that we can skip libthr interception, which * won't work if malloc is broken. @@ -44,7 +47,7 @@ namespace snmalloc * add new features that they should add any required feature flags. */ static constexpr uint64_t pal_features = - PALBSD_Aligned::pal_features | CoreDump; + PALBSD_Aligned::pal_features | CoreDump | WaitOnAddress; /** * FreeBSD uses atypically small address spaces on its 64 bit RISC machines. @@ -129,6 +132,46 @@ namespace snmalloc p.unsafe_ptr(), ~static_cast(CHERI_PERM_SW_VMEM))); } # endif + + using WaitingWord = unsigned int; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + if (a.load(std::memory_order_relaxed) == v) + _umtx_op( + &addr, + UMTX_OP_WAIT_UINT_PRIVATE, + static_cast(v), + nullptr, + nullptr); + } + + template + static void notify_one_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + _umtx_op(&addr, UMTX_OP_WAKE_PRIVATE, 1, nullptr, nullptr); + } + + template + static void notify_all_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + _umtx_op( + &addr, + UMTX_OP_WAKE_PRIVATE, + static_cast(INT_MAX), + nullptr, + nullptr); + } }; } // namespace snmalloc #endif From e491fa2ea968f2f1343fc7ff70394715cd35dfb2 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 19:57:51 -0400 Subject: [PATCH 06/14] unify waiting style --- src/snmalloc/pal/pal_freebsd.h | 13 ++++++++++--- src/snmalloc/pal/pal_linux.h | 13 +++++-------- src/snmalloc/pal/pal_windows.h | 5 +---- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/snmalloc/pal/pal_freebsd.h b/src/snmalloc/pal/pal_freebsd.h index 854c6f130..639df1b1e 100644 --- a/src/snmalloc/pal/pal_freebsd.h +++ b/src/snmalloc/pal/pal_freebsd.h @@ -141,13 +141,20 @@ namespace snmalloc static_assert( sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), "T must be the same size and alignment as WaitingWord"); - if (a.load(std::memory_order_relaxed) == v) - _umtx_op( + int backup = errno; + while (addr.load(std::memory_order_relaxed) == expected) + { + int ret = _umtx_op( &addr, UMTX_OP_WAIT_UINT_PRIVATE, - static_cast(v), + static_cast(expected), nullptr, nullptr); + + if (ret == 0) + break; + } + errno = backup; } template diff --git a/src/snmalloc/pal/pal_linux.h b/src/snmalloc/pal/pal_linux.h index 364864905..0b043de8a 100644 --- a/src/snmalloc/pal/pal_linux.h +++ b/src/snmalloc/pal/pal_linux.h @@ -247,22 +247,19 @@ namespace snmalloc template static void wait_on_address(std::atomic& addr, T expected) { + int backup = errno; static_assert( sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), "T must be the same size and alignment as WaitingWord"); - for (;;) + while (addr.load(std::memory_order_relaxed) == expected) { - if (addr.load(std::memory_order_relaxed) != expected) - break; - long ret = syscall( SYS_futex, &addr, FUTEX_WAIT_PRIVATE, expected, nullptr, nullptr, 0); - if (ret == -1 && errno == EINTR) - continue; - - return; + if (ret == 0) + break; } + errno = backup; } template diff --git a/src/snmalloc/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h index 0b3e22951..4d94e4f67 100644 --- a/src/snmalloc/pal/pal_windows.h +++ b/src/snmalloc/pal/pal_windows.h @@ -241,11 +241,8 @@ namespace snmalloc template static void wait_on_address(std::atomic& addr, T expected) { - for (;;) + while (addr.load(std::memory_order_relaxed) == expected) { - if (addr.load(std::memory_order_relaxed) != expected) - break; - if (::WaitOnAddress(&addr, &expected, sizeof(T), INFINITE)) break; } From 4f97c0bace38a6458589c7d9c5fa746cb342d8a8 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 29 Oct 2024 20:04:12 -0400 Subject: [PATCH 07/14] fix --- src/snmalloc/pal/pal_freebsd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/snmalloc/pal/pal_freebsd.h b/src/snmalloc/pal/pal_freebsd.h index 639df1b1e..d7fba0f08 100644 --- a/src/snmalloc/pal/pal_freebsd.h +++ b/src/snmalloc/pal/pal_freebsd.h @@ -13,8 +13,7 @@ # endif # endif -extern "C" int -_umtx_op(void* obj, int op, unsigned long val, void* uaddr, void* uaddr2); +# include /** * Direct system-call wrappers so that we can skip libthr interception, which From 9a5b746fb6d6c21b5ab88ee385f05ab26d7b58ed Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 30 Oct 2024 09:32:43 -0400 Subject: [PATCH 08/14] address CR Co-authored-by: Matthew Parkinson --- src/snmalloc/ds/combininglock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index 0ae318549..d6eaea289 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -59,14 +59,14 @@ namespace snmalloc using type = int; }; - using WaitingWoldType = + using WaitingWordType = typename WaitWordTypeSelect, DefaultPal>:: type; template friend class CombiningLockNodeTempl; - enum class LockStatus : WaitingWoldType + enum class LockStatus : WaitingWordType { // The work for this node has not been completed. WAITING, From 28251c01696def5c113920c0e7c85d9bc2b32f75 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 30 Oct 2024 15:21:12 -0400 Subject: [PATCH 09/14] support macos --- src/snmalloc/pal/pal_apple.h | 98 +++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/src/snmalloc/pal/pal_apple.h b/src/snmalloc/pal/pal_apple.h index f023e195a..9e883ba66 100644 --- a/src/snmalloc/pal/pal_apple.h +++ b/src/snmalloc/pal/pal_apple.h @@ -17,6 +17,25 @@ namespace snmalloc { + extern "C" int os_sync_wait_on_address( + void* addr, uint64_t value, size_t size, uint32_t flags) + __attribute__((weak_import)); + + extern "C" int + os_sync_wake_by_address_any(void* addr, size_t size, uint32_t flags) + __attribute__((weak_import)); + + extern "C" int + os_sync_wake_by_address_all(void* addr, size_t size, uint32_t flags) + __attribute__((weak_import)); + + extern "C" int + __ulock_wait(uint32_t lock_type, void* addr, uint64_t value, uint32_t) + __attribute__((weak_import)); + + extern "C" int __ulock_wake(uint32_t lock_type, void* addr, uint64_t) + __attribute__((weak_import)); + /** * PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...). */ @@ -28,7 +47,7 @@ namespace snmalloc * The features exported by this PAL. */ static constexpr uint64_t pal_features = - AlignedAllocation | LazyCommit | Entropy | Time; + AlignedAllocation | LazyCommit | Entropy | Time | WaitOnAddress; /* * `page_size` @@ -281,6 +300,83 @@ namespace snmalloc return result; } + + using WaitingWord = uint32_t; + static constexpr uint32_t UL_COMPARE_AND_WAIT = 0x0000'0001; + static constexpr uint32_t ULF_NO_ERRNO = 0x0100'0000; + static constexpr uint32_t ULF_WAKE_ALL = 0x0000'0100; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + int errno_backup = errno; + while (addr.load(std::memory_order_relaxed) == expected) + { + if (os_sync_wait_on_address) + { + if ( + os_sync_wait_on_address( + &addr, static_cast(expected), sizeof(T), 0) != -1) + { + errno = errno_backup; + return; + } + } + else if (__ulock_wait) + { + if ( + __ulock_wait( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, + &addr, + static_cast(expected), + 0) != -1) + { + return; + } + } + } + } + + template + static void notify_one_on_address(std::atomic& addr) + { + if (os_sync_wake_by_address_any) + { + os_sync_wake_by_address_any(&addr, sizeof(T), 0); + } + else if (__ulock_wake) + { + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) + { + int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; + } + } + } + + template + static void notify_all_on_address(std::atomic& addr) + { + if (os_sync_wake_by_address_all) + { + os_sync_wake_by_address_all(&addr, sizeof(T), 0); + } + else if (__ulock_wake) + { + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) + { + int ret = __ulock_wake( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; + } + } + } }; } // namespace snmalloc #endif From 26f15eb18f20b585b5fee489389cc0e2a725d368 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 13 Nov 2024 22:31:29 -0500 Subject: [PATCH 10/14] static dispatch os APIs for apple --- src/snmalloc/pal/pal_apple.h | 117 ++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/src/snmalloc/pal/pal_apple.h b/src/snmalloc/pal/pal_apple.h index 9e883ba66..e4588b1f8 100644 --- a/src/snmalloc/pal/pal_apple.h +++ b/src/snmalloc/pal/pal_apple.h @@ -15,26 +15,38 @@ # include # include +# if __has_include() && __has_include() +# include +# include +# if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ + defined(MAC_OS_X_VERSION_14_4) +# if __MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_14_4 +# define SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS +# endif +# endif +# endif + namespace snmalloc { +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + // For macos 14.4+, we use os_sync_wait_on_address and friends. It is + // available as a part of stable API, and the usage is more straightforward. extern "C" int os_sync_wait_on_address( - void* addr, uint64_t value, size_t size, uint32_t flags) - __attribute__((weak_import)); + void* addr, uint64_t value, size_t size, uint32_t flags); extern "C" int - os_sync_wake_by_address_any(void* addr, size_t size, uint32_t flags) - __attribute__((weak_import)); + os_sync_wake_by_address_any(void* addr, size_t size, uint32_t flags); extern "C" int - os_sync_wake_by_address_all(void* addr, size_t size, uint32_t flags) - __attribute__((weak_import)); - + os_sync_wake_by_address_all(void* addr, size_t size, uint32_t flags); +# else + // For platforms before macos 14.4, we use __ulock_wait and friends. It is + // available since macos 10.12. extern "C" int - __ulock_wait(uint32_t lock_type, void* addr, uint64_t value, uint32_t) - __attribute__((weak_import)); + __ulock_wait(uint32_t lock_type, void* addr, uint64_t value, uint32_t); - extern "C" int __ulock_wake(uint32_t lock_type, void* addr, uint64_t) - __attribute__((weak_import)); + extern "C" int __ulock_wake(uint32_t lock_type, void* addr, uint64_t); +# endif /** * PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...). @@ -302,9 +314,11 @@ namespace snmalloc } using WaitingWord = uint32_t; +# ifndef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS static constexpr uint32_t UL_COMPARE_AND_WAIT = 0x0000'0001; static constexpr uint32_t ULF_NO_ERRNO = 0x0100'0000; static constexpr uint32_t ULF_WAKE_ALL = 0x0000'0100; +# endif template static void wait_on_address(std::atomic& addr, T expected) @@ -312,70 +326,61 @@ namespace snmalloc int errno_backup = errno; while (addr.load(std::memory_order_relaxed) == expected) { - if (os_sync_wait_on_address) +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + if ( + os_sync_wait_on_address( + &addr, static_cast(expected), sizeof(T), 0) != -1) { - if ( - os_sync_wait_on_address( - &addr, static_cast(expected), sizeof(T), 0) != -1) - { - errno = errno_backup; - return; - } + errno = errno_backup; + return; } - else if (__ulock_wait) +# else + if ( + __ulock_wait( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, + &addr, + static_cast(expected), + 0) != -1) { - if ( - __ulock_wait( - UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, - &addr, - static_cast(expected), - 0) != -1) - { - return; - } + return; } +# endif } } template static void notify_one_on_address(std::atomic& addr) { - if (os_sync_wake_by_address_any) +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + os_sync_wake_by_address_any(&addr, sizeof(T), 0); +# else + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) { - os_sync_wake_by_address_any(&addr, sizeof(T), 0); - } - else if (__ulock_wake) - { - // __ulock_wake can get interrupted, so retry until either waking up a - // waiter or failing because there are no waiters (ENOENT). - for (;;) - { - int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, &addr, 0); - if (ret >= 0 || ret == -ENOENT) - return; - } + int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; } +# endif } template static void notify_all_on_address(std::atomic& addr) { - if (os_sync_wake_by_address_all) - { - os_sync_wake_by_address_all(&addr, sizeof(T), 0); - } - else if (__ulock_wake) +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + os_sync_wake_by_address_all(&addr, sizeof(T), 0); +# else + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) { - // __ulock_wake can get interrupted, so retry until either waking up a - // waiter or failing because there are no waiters (ENOENT). - for (;;) - { - int ret = __ulock_wake( - UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, &addr, 0); - if (ret >= 0 || ret == -ENOENT) - return; - } + int ret = __ulock_wake( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; } +# endif } }; } // namespace snmalloc From 27e344e1c5ba9239fcfefad784e7f60853113c53 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 13 Nov 2024 22:38:12 -0500 Subject: [PATCH 11/14] make wait_on_address configurable via cmake --- CMakeLists.txt | 8 ++++++++ src/snmalloc/ds/combininglock.h | 11 ++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3941c06af..019feaa48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SNMALLOC_LINK_ICF "Link with Identical Code Folding" ON) option(SNMALLOC_IPO "Link with IPO/LTO support" OFF) option(SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS "Build tests and ld_preload for individual mitigations" OFF) option(SNMALLOC_ENABLE_DYNAMIC_LOADING "Build such that snmalloc can be dynamically loaded. This is not required for LD_PRELOAD, and will harm performance if enabled." OFF) +option(SNMALLOC_ENABLE_WAIT_ON_ADDRESS "Use wait on address backoff strategy if it is available" ON) # Options that apply only if we're not building the header-only library cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) cmake_dependent_option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) @@ -196,6 +197,13 @@ if(SNMALLOC_USE_CXX17) else() target_compile_features(snmalloc INTERFACE cxx_std_20) endif() + +if(SNMALLOC_ENABLE_WAIT_ON_ADDRESS) + target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=1) +else() + target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=0) +endif() + # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus if(MSVC) target_compile_options(snmalloc INTERFACE "/Zc:__cplusplus") diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index d6eaea289..e632c0ced 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -41,8 +41,9 @@ namespace snmalloc class CombiningLockNode { template - static constexpr bool has_wait_on_address = - pal_supports; + static constexpr bool use_wait_on_address = + pal_supports && + SNMALLOC_USE_WAIT_ON_ADDRESS; template struct WaitWordTypeSelect; @@ -60,7 +61,7 @@ namespace snmalloc }; using WaitingWordType = - typename WaitWordTypeSelect, DefaultPal>:: + typename WaitWordTypeSelect, DefaultPal>:: type; template @@ -104,7 +105,7 @@ namespace snmalloc template static void wake(CombiningLockNode* node, LockStatus message) { - if constexpr (!has_wait_on_address) + if constexpr (!use_wait_on_address) { node->set_status(message); } @@ -122,7 +123,7 @@ namespace snmalloc template void wait() { - if constexpr (!has_wait_on_address) + if constexpr (!use_wait_on_address) { while (status.load(std::memory_order_acquire) == LockStatus::WAITING) Aal::pause(); From 20e789fbceaaea59741999f5c9115f9c47ce717e Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 13 Nov 2024 22:46:12 -0500 Subject: [PATCH 12/14] undo extra include --- src/snmalloc/ds/combininglock.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index e632c0ced..89a4bc258 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -4,7 +4,6 @@ #include "../pal/pal.h" #include -#include namespace snmalloc { From d69919a9a21411e4f91ad0177ebb2cf60c7c934a Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 13 Nov 2024 22:50:25 -0500 Subject: [PATCH 13/14] fix macos build --- src/snmalloc/pal/pal_apple.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/pal/pal_apple.h b/src/snmalloc/pal/pal_apple.h index e4588b1f8..f6a7f1a2d 100644 --- a/src/snmalloc/pal/pal_apple.h +++ b/src/snmalloc/pal/pal_apple.h @@ -323,7 +323,7 @@ namespace snmalloc template static void wait_on_address(std::atomic& addr, T expected) { - int errno_backup = errno; + [[maybe_unused]] int errno_backup = errno; while (addr.load(std::memory_order_relaxed) == expected) { # ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS From ee5d9021e4e638ae4217f6588b61eb84388bcd69 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 14 Nov 2024 10:11:20 -0500 Subject: [PATCH 14/14] fix clang-tidy build --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8e10f4950..e3339917a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -479,7 +479,7 @@ jobs: git diff --exit-code - name: Run clang-tidy run: | - clang-tidy-15 src/snmalloc/override/malloc.cc -header-filter="`pwd`/*" -warnings-as-errors='*' -export-fixes=tidy.fail -- -std=c++17 -mcx16 -DSNMALLOC_PLATFORM_HAS_GETENTROPY=0 -Isrc + clang-tidy-15 src/snmalloc/override/malloc.cc -header-filter="`pwd`/*" -warnings-as-errors='*' -export-fixes=tidy.fail -- -std=c++17 -mcx16 -DSNMALLOC_USE_WAIT_ON_ADDRESS=1 -DSNMALLOC_PLATFORM_HAS_GETENTROPY=0 -Isrc if [ -f tidy.fail ] ; then cat tidy.fail exit 1