From afea16b8f752b3ccd3f66f9ac3890586d44cde69 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 23 May 2020 21:17:15 +0200 Subject: [PATCH] musl: restore lock skipping for mostly-singlethreaded programs, and related patches The remainder of the patch series proposed by upstream [2] for the locking synchronization issue [1]. [1] https://www.openwall.com/lists/musl/2020/05/22/3 [2] https://www.openwall.com/lists/musl/2020/05/22/10 Signed-off-by: Matthias Schiffer --- ...ist-unlink-in-pthread_exit-after-all.patch | 51 +++++++++++ ...own-size-of-some-libc-struct-members.patch | 25 ++++++ ...pping-for-processes-that-return-to-s.patch | 90 +++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch create mode 100644 toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch create mode 100644 toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch diff --git a/toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch b/toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch new file mode 100644 index 0000000000..d47f2f4108 --- /dev/null +++ b/toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch @@ -0,0 +1,51 @@ +From 4d5aa20a94a2d3fae3e69289dc23ecafbd0c16c4 Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Fri, 22 May 2020 17:35:14 -0400 +Subject: [PATCH 1/4] reorder thread list unlink in pthread_exit after all + locks + +since the backend for LOCK() skips locking if single-threaded, it's +unsafe to make the process appear single-threaded before the last use +of lock. + +this fixes potential unsynchronized access to a linked list via +__dl_thread_cleanup. +--- + src/thread/pthread_create.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +--- a/src/thread/pthread_create.c ++++ b/src/thread/pthread_create.c +@@ -90,14 +90,7 @@ _Noreturn void __pthread_exit(void *resu + exit(0); + } + +- /* At this point we are committed to thread termination. Unlink +- * the thread from the list. This change will not be visible +- * until the lock is released, which only happens after SYS_exit +- * has been called, via the exit futex address pointing at the lock. */ +- libc.threads_minus_1--; +- self->next->prev = self->prev; +- self->prev->next = self->next; +- self->prev = self->next = self; ++ /* At this point we are committed to thread termination. */ + + /* Process robust list in userspace to handle non-pshared mutexes + * and the detached thread case where the robust list head will +@@ -121,6 +114,16 @@ _Noreturn void __pthread_exit(void *resu + __do_orphaned_stdio_locks(); + __dl_thread_cleanup(); + ++ /* Last, unlink thread from the list. This change will not be visible ++ * until the lock is released, which only happens after SYS_exit ++ * has been called, via the exit futex address pointing at the lock. ++ * This needs to happen after any possible calls to LOCK() that might ++ * skip locking if libc.threads_minus_1 is zero. */ ++ libc.threads_minus_1--; ++ self->next->prev = self->prev; ++ self->prev->next = self->next; ++ self->prev = self->next = self; ++ + /* This atomic potentially competes with a concurrent pthread_detach + * call; the loser is responsible for freeing thread resources. */ + int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); diff --git a/toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch b/toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch new file mode 100644 index 0000000000..6650434397 --- /dev/null +++ b/toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch @@ -0,0 +1,25 @@ +From f12888e9eb9eed60cc266b899dcafecb4752964a Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Fri, 22 May 2020 17:25:38 -0400 +Subject: [PATCH 3/4] cut down size of some libc struct members + +these are all flags that can be single-byte values. +--- + src/internal/libc.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/src/internal/libc.h ++++ b/src/internal/libc.h +@@ -18,9 +18,9 @@ struct tls_module { + }; + + struct __libc { +- int can_do_threads; +- int threaded; +- int secure; ++ char can_do_threads; ++ char threaded; ++ char secure; + int threads_minus_1; + size_t *auxv; + struct tls_module *tls_head; diff --git a/toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch b/toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch new file mode 100644 index 0000000000..83a6d0247a --- /dev/null +++ b/toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch @@ -0,0 +1,90 @@ +From 8d81ba8c0bc6fe31136cb15c9c82ef4c24965040 Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Fri, 22 May 2020 17:45:47 -0400 +Subject: [PATCH 4/4] restore lock-skipping for processes that return to + single-threaded state + +the design used here relies on the barrier provided by the first lock +operation after the process returns to single-threaded state to +synchronize with actions by the last thread that exited. by storing +the intent to change modes in the same object used to detect whether +locking is needed, it's possible to avoid an extra (possibly costly) +memory load after the lock is taken. +--- + src/internal/libc.h | 1 + + src/malloc/malloc.c | 5 ++++- + src/thread/__lock.c | 4 +++- + src/thread/pthread_create.c | 8 ++++---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +--- a/src/internal/libc.h ++++ b/src/internal/libc.h +@@ -21,6 +21,7 @@ struct __libc { + char can_do_threads; + char threaded; + char secure; ++ volatile signed char need_locks; + int threads_minus_1; + size_t *auxv; + struct tls_module *tls_head; +--- a/src/malloc/malloc.c ++++ b/src/malloc/malloc.c +@@ -26,8 +26,11 @@ int __malloc_replaced; + + static inline void lock(volatile int *lk) + { +- if (libc.threaded) ++ int need_locks = libc.need_locks; ++ if (need_locks) { + while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); ++ if (need_locks < 0) libc.need_locks = 0; ++ } + } + + static inline void unlock(volatile int *lk) +--- a/src/thread/__lock.c ++++ b/src/thread/__lock.c +@@ -18,9 +18,11 @@ + + void __lock(volatile int *l) + { +- if (!libc.threaded) return; ++ int need_locks = libc.need_locks; ++ if (!need_locks) return; + /* fast path: INT_MIN for the lock, +1 for the congestion */ + int current = a_cas(l, 0, INT_MIN + 1); ++ if (need_locks < 0) libc.need_locks = 0; + if (!current) return; + /* A first spin loop, for medium congestion. */ + for (unsigned i = 0; i < 10; ++i) { +--- a/src/thread/pthread_create.c ++++ b/src/thread/pthread_create.c +@@ -118,8 +118,8 @@ _Noreturn void __pthread_exit(void *resu + * until the lock is released, which only happens after SYS_exit + * has been called, via the exit futex address pointing at the lock. + * This needs to happen after any possible calls to LOCK() that might +- * skip locking if libc.threads_minus_1 is zero. */ +- libc.threads_minus_1--; ++ * skip locking if process appears single-threaded. */ ++ if (!--libc.threads_minus_1) libc.need_locks = -1; + self->next->prev = self->prev; + self->prev->next = self->next; + self->prev = self->next = self; +@@ -339,7 +339,7 @@ int __pthread_create(pthread_t *restrict + ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); + + __tl_lock(); +- libc.threads_minus_1++; ++ if (!libc.threads_minus_1++) libc.need_locks = 1; + ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock); + + /* All clone failures translate to EAGAIN. If explicit scheduling +@@ -363,7 +363,7 @@ int __pthread_create(pthread_t *restrict + new->next->prev = new; + new->prev->next = new; + } else { +- libc.threads_minus_1--; ++ if (!--libc.threads_minus_1) libc.need_locks = 0; + } + __tl_unlock(); + __restore_sigs(&set); -- 2.30.2