--- /dev/null
+From e01b5939b38aea5ecbe41670643199825874b26c Mon Sep 17 00:00:00 2001
+From: Rich Felker <dalias@aerifal.cx>
+Date: Thu, 21 May 2020 23:32:45 -0400
+Subject: [PATCH 2/4] don't use libc.threads_minus_1 as relaxed atomic for
+ skipping locks
+
+after all but the last thread exits, the next thread to observe
+libc.threads_minus_1==0 and conclude that it can skip locking fails to
+synchronize with any changes to memory that were made by the
+last-exiting thread. this can produce data races.
+
+on some archs, at least x86, memory synchronization is unlikely to be
+a problem; however, with the inline locks in malloc, skipping the lock
+also eliminated the compiler barrier, and caused code that needed to
+re-check chunk in-use bits after obtaining the lock to reuse a stale
+value, possibly from before the process became single-threaded. this
+in turn produced corruption of the heap state.
+
+some uses of libc.threads_minus_1 remain, especially for allocation of
+new TLS in the dynamic linker; otherwise, it could be removed
+entirely. it's made non-volatile to reflect that the remaining
+accesses are only made under lock on the thread list.
+
+instead of libc.threads_minus_1, libc.threaded is now used for
+skipping locks. the difference is that libc.threaded is permanently
+true once an additional thread has been created. this will produce
+some performance regression in processes that are mostly
+single-threaded but occasionally creating threads. in the future it
+may be possible to bring back the full lock-skipping, but more care
+needs to be taken to produce a safe design.
+---
+ src/internal/libc.h | 2 +-
+ src/malloc/malloc.c | 2 +-
+ src/thread/__lock.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -21,7 +21,7 @@ struct __libc {
+ int can_do_threads;
+ int threaded;
+ int secure;
+- volatile int threads_minus_1;
++ int threads_minus_1;
+ size_t *auxv;
+ struct tls_module *tls_head;
+ size_t tls_size, tls_align, tls_cnt;
+--- a/src/malloc/malloc.c
++++ b/src/malloc/malloc.c
+@@ -26,7 +26,7 @@ int __malloc_replaced;
+
+ static inline void lock(volatile int *lk)
+ {
+- if (libc.threads_minus_1)
++ if (libc.threaded)
+ while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
+ }
+
+--- a/src/thread/__lock.c
++++ b/src/thread/__lock.c
+@@ -18,7 +18,7 @@
+
+ void __lock(volatile int *l)
+ {
+- if (!libc.threads_minus_1) return;
++ if (!libc.threaded) return;
+ /* fast path: INT_MIN for the lock, +1 for the congestion */
+ int current = a_cas(l, 0, INT_MIN + 1);
+ if (!current) return;