--- /dev/null
+From e4ddd92318e50d8fad1f91fc07c5f6bacb9d6b21 Mon Sep 17 00:00:00 2001
+From: Daniel Golle <daniel@makrotopia.org>
+Date: Mon May 25 16:02:49 2015 -0400
+Subject: [PATCH] squashed commits since v1.1.9
+
+967bcbf mark mips crt code as code
+7b75c48 mark mips cancellable syscall code as code
+0e0e494 simplify/shrink relocation processing in dynamic linker stage 1
+09db855 remove processing of DT_JMPREL from dynamic linker stage 1 bootstrap
+9f26ebd fix stack alignment code in mips crt_arch.h
+63caf1d add .text section directive to all crt_arch.h files missing it
+3b0e832 remove outdated and misleading comment in iconv.c
+39b8ce6 in iconv_open, accept "CHAR" and "" as aliases for "UTF-8"
+c648cef fix inconsistency in a_and and a_or argument types on x86[_64]
+390f93e inline llsc atomics when building for sh4a
+c093e2e reprocess libc/ldso RELA relocations in stage 3 of dynamic linking
+43e9f65 fix null pointer dereference in dcngettext under specific conditions
+68630b5 eliminate costly tricks to avoid TLS access for current locale state
+707d7c3 in i386 __set_thread_area, don't assume %gs register is initially zero
+c0f10cf make arm reloc.h CRTJMP macro compatible with thumb
+83340c7 make arm crt_arch.h compatible with thumb code generation
+---
+ arch/aarch64/crt_arch.h | 1 +
+ arch/arm/crt_arch.h | 11 +--
+ arch/arm/reloc.h | 5 ++
+ arch/i386/atomic.h | 8 +--
+ arch/microblaze/crt_arch.h | 1 +
+ arch/mips/crt_arch.h | 5 +-
+ arch/or1k/crt_arch.h | 1 +
+ arch/powerpc/crt_arch.h | 1 +
+ arch/sh/atomic.h | 83 ++++++++++++++++++++++
+ arch/sh/crt_arch.h | 1 +
+ arch/sh/src/atomic.c | 135 ++++++++++++------------------------
+ arch/x32/atomic.h | 8 +--
+ arch/x86_64/atomic.h | 8 +--
+ crt/mips/crt1.s | 2 +
+ crt/mips/crti.s | 2 +
+ src/internal/libc.h | 2 -
+ src/internal/locale_impl.h | 6 +-
+ src/ldso/dlstart.c | 41 +++++------
+ src/ldso/dynlink.c | 2 +-
+ src/locale/dcngettext.c | 2 +-
+ src/locale/iconv.c | 9 +--
+ src/locale/setlocale.c | 7 +-
+ src/locale/uselocale.c | 10 +--
+ src/thread/i386/__set_thread_area.s | 13 ++--
+ src/thread/mips/syscall_cp.s | 3 +
+ src/thread/pthread_create.c | 6 --
+ 26 files changed, 200 insertions(+), 173 deletions(-)
+
+diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
+index 3a4b321..b64fb3d 100644
+--- a/arch/aarch64/crt_arch.h
++++ b/arch/aarch64/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START "\n"
+ ".type " START ",%function\n"
+ START ":\n"
+diff --git a/arch/arm/crt_arch.h b/arch/arm/crt_arch.h
+index d1f9a66..99508b1 100644
+--- a/arch/arm/crt_arch.h
++++ b/arch/arm/crt_arch.h
+@@ -1,15 +1,18 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".type " START ",%function \n"
+ START ": \n"
+ " mov fp, #0 \n"
+ " mov lr, #0 \n"
+-" mov a1, sp \n"
+ " ldr a2, 1f \n"
+-"2: add a2, pc, a2 \n"
+-" and sp, sp, #-16 \n"
++" add a2, pc, a2 \n"
++" mov a1, sp \n"
++"2: and ip, a1, #-16 \n"
++" mov sp, ip \n"
+ " bl " START "_c \n"
+ ".weak _DYNAMIC \n"
+ ".hidden _DYNAMIC \n"
+-"1: .word _DYNAMIC-2b-8 \n"
++".align 2 \n"
++"1: .word _DYNAMIC-2b \n"
+ );
+diff --git a/arch/arm/reloc.h b/arch/arm/reloc.h
+index dec0031..e1ef350 100644
+--- a/arch/arm/reloc.h
++++ b/arch/arm/reloc.h
+@@ -28,5 +28,10 @@
+ #define REL_TPOFF R_ARM_TLS_TPOFF32
+ //#define REL_TLSDESC R_ARM_TLS_DESC
+
++#ifdef __thumb__
++#define CRTJMP(pc,sp) __asm__ __volatile__( \
++ "mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
++#else
+ #define CRTJMP(pc,sp) __asm__ __volatile__( \
+ "mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
++#endif
+diff --git a/arch/i386/atomic.h b/arch/i386/atomic.h
+index 4fe7bde..95fecbd 100644
+--- a/arch/i386/atomic.h
++++ b/arch/i386/atomic.h
+@@ -50,16 +50,16 @@ static inline int a_cas(volatile int *p, int t, int s)
+ return t;
+ }
+
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ __asm__( "lock ; orl %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ __asm__( "lock ; andl %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+ static inline int a_swap(volatile int *x, int v)
+diff --git a/arch/microblaze/crt_arch.h b/arch/microblaze/crt_arch.h
+index ada98c8..bca78bf 100644
+--- a/arch/microblaze/crt_arch.h
++++ b/arch/microblaze/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".align 2 \n"
+ START ": \n"
+diff --git a/arch/mips/crt_arch.h b/arch/mips/crt_arch.h
+index 9a60be0..21e139b 100644
+--- a/arch/mips/crt_arch.h
++++ b/arch/mips/crt_arch.h
+@@ -1,6 +1,7 @@
+ __asm__(
+ ".set push\n"
+ ".set noreorder\n"
++".text \n"
+ ".global _" START "\n"
+ ".global " START "\n"
+ ".type _" START ", @function\n"
+@@ -21,8 +22,8 @@ __asm__(
+ " addu $5, $5, $gp \n"
+ " lw $25, 4($ra) \n"
+ " addu $25, $25, $gp \n"
+-" subu $sp, $sp, 16 \n"
++" and $sp, $sp, -8 \n"
+ " jalr $25 \n"
+-" and $sp, $sp, -8 \n"
++" subu $sp, $sp, 16 \n"
+ ".set pop \n"
+ );
+diff --git a/arch/or1k/crt_arch.h b/arch/or1k/crt_arch.h
+index 8441556..9e310ca 100644
+--- a/arch/or1k/crt_arch.h
++++ b/arch/or1k/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".align 4 \n"
+ START ": \n"
+diff --git a/arch/powerpc/crt_arch.h b/arch/powerpc/crt_arch.h
+index ec3cd29..9b65886 100644
+--- a/arch/powerpc/crt_arch.h
++++ b/arch/powerpc/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".type " START ", %function \n"
+ START ": \n"
+diff --git a/arch/sh/atomic.h b/arch/sh/atomic.h
+index a1d22e4..f2e6dac 100644
+--- a/arch/sh/atomic.h
++++ b/arch/sh/atomic.h
+@@ -22,6 +22,88 @@ static inline int a_ctz_64(uint64_t x)
+ return a_ctz_l(y);
+ }
+
++#define LLSC_CLOBBERS "r0", "t", "memory"
++#define LLSC_START(mem) "synco\n" \
++ "0: movli.l @" mem ", r0\n"
++#define LLSC_END(mem) \
++ "1: movco.l r0, @" mem "\n" \
++ " bf 0b\n" \
++ " synco\n"
++
++static inline int __sh_cas_llsc(volatile int *p, int t, int s)
++{
++ int old;
++ __asm__ __volatile__(
++ LLSC_START("%1")
++ " mov r0, %0\n"
++ " cmp/eq %0, %2\n"
++ " bf 1f\n"
++ " mov %3, r0\n"
++ LLSC_END("%1")
++ : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
++ return old;
++}
++
++static inline int __sh_swap_llsc(volatile int *x, int v)
++{
++ int old;
++ __asm__ __volatile__(
++ LLSC_START("%1")
++ " mov r0, %0\n"
++ " mov %2, r0\n"
++ LLSC_END("%1")
++ : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
++ return old;
++}
++
++static inline int __sh_fetch_add_llsc(volatile int *x, int v)
++{
++ int old;
++ __asm__ __volatile__(
++ LLSC_START("%1")
++ " mov r0, %0\n"
++ " add %2, r0\n"
++ LLSC_END("%1")
++ : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
++ return old;
++}
++
++static inline void __sh_store_llsc(volatile int *p, int x)
++{
++ __asm__ __volatile__(
++ " synco\n"
++ " mov.l %1, @%0\n"
++ " synco\n"
++ : : "r"(p), "r"(x) : "memory");
++}
++
++static inline void __sh_and_llsc(volatile int *x, int v)
++{
++ __asm__ __volatile__(
++ LLSC_START("%0")
++ " and %1, r0\n"
++ LLSC_END("%0")
++ : : "r"(x), "r"(v) : LLSC_CLOBBERS);
++}
++
++static inline void __sh_or_llsc(volatile int *x, int v)
++{
++ __asm__ __volatile__(
++ LLSC_START("%0")
++ " or %1, r0\n"
++ LLSC_END("%0")
++ : : "r"(x), "r"(v) : LLSC_CLOBBERS);
++}
++
++#ifdef __SH4A__
++#define a_cas(p,t,s) __sh_cas_llsc(p,t,s)
++#define a_swap(x,v) __sh_swap_llsc(x,v)
++#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
++#define a_store(x,v) __sh_store_llsc(x, v)
++#define a_and(x,v) __sh_and_llsc(x, v)
++#define a_or(x,v) __sh_or_llsc(x, v)
++#else
++
+ int __sh_cas(volatile int *, int, int);
+ int __sh_swap(volatile int *, int);
+ int __sh_fetch_add(volatile int *, int);
+@@ -35,6 +117,7 @@ void __sh_or(volatile int *, int);
+ #define a_store(x,v) __sh_store(x, v)
+ #define a_and(x,v) __sh_and(x, v)
+ #define a_or(x,v) __sh_or(x, v)
++#endif
+
+ static inline void *a_cas_p(volatile void *p, void *t, void *s)
+ {
+diff --git a/arch/sh/crt_arch.h b/arch/sh/crt_arch.h
+index a873ffd..f890710 100644
+--- a/arch/sh/crt_arch.h
++++ b/arch/sh/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ START ": \n"
+ " mova 1f, r0 \n"
+diff --git a/arch/sh/src/atomic.c b/arch/sh/src/atomic.c
+index 1339567..f8c615f 100644
+--- a/arch/sh/src/atomic.c
++++ b/arch/sh/src/atomic.c
+@@ -1,12 +1,7 @@
+-#include "libc.h"
++#ifndef __SH4A__
+
+-#define LLSC_CLOBBERS "r0", "t", "memory"
+-#define LLSC_START(mem) "synco\n" \
+- "0: movli.l @" mem ", r0\n"
+-#define LLSC_END(mem) \
+- "1: movco.l r0, @" mem "\n" \
+- " bf 0b\n" \
+- " synco\n"
++#include "atomic.h"
++#include "libc.h"
+
+ /* gusa is a hack in the kernel which lets you create a sequence of instructions
+ * which will be restarted if the process is preempted in the middle of the
+@@ -34,114 +29,74 @@
+
+ int __sh_cas(volatile int *p, int t, int s)
+ {
++ if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s);
++
+ int old;
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- LLSC_START("%1")
+- " mov r0, %0\n"
+- " cmp/eq %0, %2\n"
+- " bf 1f\n"
+- " mov %3, r0\n"
+- LLSC_END("%1")
+- : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
+- } else {
+- __asm__ __volatile__(
+- GUSA_START_EVEN("%1", "%0")
+- " cmp/eq %0, %2\n"
+- " bf 1f\n"
+- GUSA_END("%1", "%3")
+- : "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
+- }
++ __asm__ __volatile__(
++ GUSA_START_EVEN("%1", "%0")
++ " cmp/eq %0, %2\n"
++ " bf 1f\n"
++ GUSA_END("%1", "%3")
++ : "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
+ return old;
+ }
+
+ int __sh_swap(volatile int *x, int v)
+ {
++ if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v);
++
+ int old;
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- LLSC_START("%1")
+- " mov r0, %0\n"
+- " mov %2, r0\n"
+- LLSC_END("%1")
+- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+- } else {
+- __asm__ __volatile__(
+- GUSA_START_EVEN("%1", "%0")
+- GUSA_END("%1", "%2")
+- : "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+- }
++ __asm__ __volatile__(
++ GUSA_START_EVEN("%1", "%0")
++ GUSA_END("%1", "%2")
++ : "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ return old;
+ }
+
+ int __sh_fetch_add(volatile int *x, int v)
+ {
++ if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v);
++
+ int old, dummy;
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- LLSC_START("%1")
+- " mov r0, %0\n"
+- " add %2, r0\n"
+- LLSC_END("%1")
+- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+- } else {
+- __asm__ __volatile__(
+- GUSA_START_EVEN("%2", "%0")
+- " mov %0, %1\n"
+- " add %3, %1\n"
+- GUSA_END("%2", "%1")
+- : "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+- }
++ __asm__ __volatile__(
++ GUSA_START_EVEN("%2", "%0")
++ " mov %0, %1\n"
++ " add %3, %1\n"
++ GUSA_END("%2", "%1")
++ : "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ return old;
+ }
+
+ void __sh_store(volatile int *p, int x)
+ {
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- " synco\n"
+- " mov.l %1, @%0\n"
+- " synco\n"
+- : : "r"(p), "r"(x) : "memory");
+- } else {
+- __asm__ __volatile__(
+- " mov.l %1, @%0\n"
+- : : "r"(p), "r"(x) : "memory");
+- }
++ if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x);
++ __asm__ __volatile__(
++ " mov.l %1, @%0\n"
++ : : "r"(p), "r"(x) : "memory");
+ }
+
+ void __sh_and(volatile int *x, int v)
+ {
++ if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v);
++
+ int dummy;
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- LLSC_START("%0")
+- " and %1, r0\n"
+- LLSC_END("%0")
+- : : "r"(x), "r"(v) : LLSC_CLOBBERS);
+- } else {
+- __asm__ __volatile__(
+- GUSA_START_ODD("%1", "%0")
+- " and %2, %0\n"
+- GUSA_END("%1", "%0")
+- : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+- }
++ __asm__ __volatile__(
++ GUSA_START_ODD("%1", "%0")
++ " and %2, %0\n"
++ GUSA_END("%1", "%0")
++ : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ }
+
+ void __sh_or(volatile int *x, int v)
+ {
++ if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v);
++
+ int dummy;
+- if (__hwcap & CPU_HAS_LLSC) {
+- __asm__ __volatile__(
+- LLSC_START("%0")
+- " or %1, r0\n"
+- LLSC_END("%0")
+- : : "r"(x), "r"(v) : LLSC_CLOBBERS);
+- } else {
+- __asm__ __volatile__(
+- GUSA_START_ODD("%1", "%0")
+- " or %2, %0\n"
+- GUSA_END("%1", "%0")
+- : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+- }
++ __asm__ __volatile__(
++ GUSA_START_ODD("%1", "%0")
++ " or %2, %0\n"
++ GUSA_END("%1", "%0")
++ : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ }
++
++#endif
+diff --git a/arch/x32/atomic.h b/arch/x32/atomic.h
+index 333098c..b2014cc 100644
+--- a/arch/x32/atomic.h
++++ b/arch/x32/atomic.h
+@@ -47,16 +47,16 @@ static inline int a_cas(volatile int *p, int t, int s)
+ return t;
+ }
+
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ __asm__( "lock ; or %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ __asm__( "lock ; and %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+ static inline int a_swap(volatile int *x, int v)
+diff --git a/arch/x86_64/atomic.h b/arch/x86_64/atomic.h
+index 333098c..b2014cc 100644
+--- a/arch/x86_64/atomic.h
++++ b/arch/x86_64/atomic.h
+@@ -47,16 +47,16 @@ static inline int a_cas(volatile int *p, int t, int s)
+ return t;
+ }
+
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ __asm__( "lock ; or %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ __asm__( "lock ; and %1, %0"
+- : "=m"(*(int *)p) : "r"(v) : "memory" );
++ : "=m"(*p) : "r"(v) : "memory" );
+ }
+
+ static inline int a_swap(volatile int *x, int v)
+diff --git a/crt/mips/crt1.s b/crt/mips/crt1.s
+index 093d7d5..794b6f7 100644
+--- a/crt/mips/crt1.s
++++ b/crt/mips/crt1.s
+@@ -4,6 +4,8 @@
+ .weak _fini
+ .global __start
+ .global _start
++.type __start,@function
++.type _start,@function
+ __start:
+ _start:
+ subu $fp, $fp, $fp # Zero the frame pointer.
+diff --git a/crt/mips/crti.s b/crt/mips/crti.s
+index b1593d1..39dee38 100644
+--- a/crt/mips/crti.s
++++ b/crt/mips/crti.s
+@@ -2,6 +2,7 @@
+
+ .section .init
+ .global _init
++.type _init,@function
+ .align 2
+ _init:
+ subu $sp,$sp,32
+@@ -10,6 +11,7 @@ _init:
+
+ .section .fini
+ .global _fini
++.type _fini,@function
+ .align 2
+ _fini:
+ subu $sp,$sp,32
+diff --git a/src/internal/libc.h b/src/internal/libc.h
+index 51ee186..212f0e8 100644
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -23,8 +23,6 @@ struct __libc {
+ volatile int ofl_lock[2];
+ size_t tls_size;
+ size_t page_size;
+- volatile int uselocale_cnt;
+- volatile int bytelocale_cnt_minus_1;
+ struct __locale_struct global_locale;
+ };
+
+diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h
+index 9142f0c..5aebbf6 100644
+--- a/src/internal/locale_impl.h
++++ b/src/internal/locale_impl.h
+@@ -20,11 +20,9 @@ const char *__lctrans_cur(const char *);
+ #define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)-2])
+ #define LCTRANS_CUR(msg) __lctrans_cur(msg)
+
+-#define CURRENT_LOCALE \
+- (libc.uselocale_cnt ? __pthread_self()->locale : &libc.global_locale)
++#define CURRENT_LOCALE (__pthread_self()->locale)
+
+-#define CURRENT_UTF8 \
+- (libc.bytelocale_cnt_minus_1<0 || __pthread_self()->locale->ctype_utf8)
++#define CURRENT_UTF8 (__pthread_self()->locale->ctype_utf8)
+
+ #undef MB_CUR_MAX
+ #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
+diff --git a/src/ldso/dlstart.c b/src/ldso/dlstart.c
+index 46f4a5c..5f84465 100644
+--- a/src/ldso/dlstart.c
++++ b/src/ldso/dlstart.c
+@@ -56,31 +56,22 @@ void _dlstart_c(size_t *sp, size_t *dynv)
+ for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
+ }
+
+- /* The use of the reloc_info structure and nested loops is a trick
+- * to work around the fact that we can't necessarily make function
+- * calls yet. Each struct in the array serves like the arguments
+- * to a function call. */
+- struct {
+- void *rel;
+- size_t size;
+- size_t stride;
+- } reloc_info[] = {
+- { base+dyn[DT_JMPREL], dyn[DT_PLTRELSZ], 2+(dyn[DT_PLTREL]==DT_RELA) },
+- { base+dyn[DT_REL], dyn[DT_RELSZ], 2 },
+- { base+dyn[DT_RELA], dyn[DT_RELASZ], 3 },
+- { 0, 0, 0 }
+- };
+-
+- for (i=0; reloc_info[i].stride; i++) {
+- size_t *rel = reloc_info[i].rel;
+- size_t rel_size = reloc_info[i].size;
+- size_t stride = reloc_info[i].stride;
+- for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+- if (!IS_RELATIVE(rel[1])) continue;
+- size_t *rel_addr = (void *)(base + rel[0]);
+- size_t addend = stride==3 ? rel[2] : *rel_addr;
+- *rel_addr = (size_t)base + addend;
+- }
++ size_t *rel, rel_size;
++
++ rel = (void *)(base+dyn[DT_REL]);
++ rel_size = dyn[DT_RELSZ];
++ for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
++ if (!IS_RELATIVE(rel[1])) continue;
++ size_t *rel_addr = (void *)(base + rel[0]);
++ *rel_addr += (size_t)base;
++ }
++
++ rel = (void *)(base+dyn[DT_RELA]);
++ rel_size = dyn[DT_RELASZ];
++ for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
++ if (!IS_RELATIVE(rel[1])) continue;
++ size_t *rel_addr = (void *)(base + rel[0]);
++ *rel_addr = (size_t)base + rel[2];
+ }
+
+ const char *strings = (void *)(base + dyn[DT_STRTAB]);
+diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
+index 7c92ef6..93595a0 100644
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -281,7 +281,7 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
+ }
+
+ int gotplt = (type == REL_GOT || type == REL_PLT);
+- if (dso->rel_update_got && !gotplt) continue;
++ if (dso->rel_update_got && !gotplt && stride==2) continue;
+
+ addend = stride>2 ? rel[2]
+ : gotplt || type==REL_COPY ? 0
+diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
+index 0057cb5..30dd41d 100644
+--- a/src/locale/dcngettext.c
++++ b/src/locale/dcngettext.c
+@@ -132,7 +132,7 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
+ switch (category) {
+ case LC_MESSAGES:
+ locname = loc->messages_name;
+- if (!*locname) goto notrans;
++ if (!locname || !*locname) goto notrans;
+ break;
+ case LC_TIME:
+ case LC_MONETARY:
+diff --git a/src/locale/iconv.c b/src/locale/iconv.c
+index a0b0232..e6121ae 100644
+--- a/src/locale/iconv.c
++++ b/src/locale/iconv.c
+@@ -23,19 +23,13 @@
+ #define BIG5 0340
+ #define EUC_KR 0350
+
+-/* FIXME: these are not implemented yet
+- * EUC: A1-FE A1-FE
+- * GBK: 81-FE 40-7E,80-FE
+- * Big5: A1-FE 40-7E,A1-FE
+- */
+-
+ /* Definitions of charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided entries.
+ * 3. Character table (size determined by field 2). */
+
+ static const unsigned char charmaps[] =
+-"utf8\0\0\310"
++"utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2\0ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+@@ -90,6 +84,7 @@ static int fuzzycmp(const unsigned char *a, const unsigned char *b)
+ static size_t find_charmap(const void *name)
+ {
+ const unsigned char *s;
++ if (!*(char *)name) name=charmaps; /* "utf8" */
+ for (s=charmaps; *s; ) {
+ if (!fuzzycmp(name, s)) {
+ for (; *s; s+=strlen((void *)s)+1);
+diff --git a/src/locale/setlocale.c b/src/locale/setlocale.c
+index 8ea389a..d797f43 100644
+--- a/src/locale/setlocale.c
++++ b/src/locale/setlocale.c
+@@ -55,12 +55,7 @@ char *setlocale(int cat, const char *name)
+ return buf;
+ }
+
+- if (name) {
+- int adj = libc.global_locale.ctype_utf8;
+- __setlocalecat(&libc.global_locale, cat, name);
+- adj -= libc.global_locale.ctype_utf8;
+- if (adj) a_fetch_add(&libc.bytelocale_cnt_minus_1, adj);
+- }
++ if (name) __setlocalecat(&libc.global_locale, cat, name);
+
+ switch (cat) {
+ case LC_CTYPE:
+diff --git a/src/locale/uselocale.c b/src/locale/uselocale.c
+index 5106795..b70a0c1 100644
+--- a/src/locale/uselocale.c
++++ b/src/locale/uselocale.c
+@@ -10,15 +10,7 @@ locale_t __uselocale(locale_t new)
+
+ if (new == LC_GLOBAL_LOCALE) new = global;
+
+- if (new && new != old) {
+- int adj = 0;
+- if (new == global) a_dec(&libc.uselocale_cnt);
+- else if (!new->ctype_utf8) adj++;
+- if (old == global) a_inc(&libc.uselocale_cnt);
+- else if (!old->ctype_utf8) adj--;
+- a_fetch_add(&libc.bytelocale_cnt_minus_1, adj);
+- self->locale = new;
+- }
++ self->locale = new;
+
+ return old == global ? LC_GLOBAL_LOCALE : old;
+ }
+diff --git a/src/thread/i386/__set_thread_area.s b/src/thread/i386/__set_thread_area.s
+index 1d85268..3a558fb 100644
+--- a/src/thread/i386/__set_thread_area.s
++++ b/src/thread/i386/__set_thread_area.s
+@@ -6,10 +6,10 @@ __set_thread_area:
+ push $0x51
+ push $0xfffff
+ push 16(%esp)
+- xor %edx,%edx
+- mov %gs,%dx
+- sub $3,%edx
+- sar $3,%edx
++ call 1f
++1: addl $4f-1b,(%esp)
++ pop %ecx
++ mov (%ecx),%edx
+ push %edx
+ mov %esp,%ebx
+ xor %eax,%eax
+@@ -18,6 +18,7 @@ __set_thread_area:
+ testl %eax,%eax
+ jnz 2f
+ movl (%esp),%edx
++ movl %edx,(%ecx)
+ leal 3(,%edx,8),%edx
+ 3: movw %dx,%gs
+ 1:
+@@ -38,3 +39,7 @@ __set_thread_area:
+ mov $7,%dl
+ inc %al
+ jmp 3b
++
++.data
++ .align 4
++4: .long -1
+diff --git a/src/thread/mips/syscall_cp.s b/src/thread/mips/syscall_cp.s
+index 399289e..8f76d40 100644
+--- a/src/thread/mips/syscall_cp.s
++++ b/src/thread/mips/syscall_cp.s
+@@ -2,10 +2,13 @@
+
+ .global __cp_begin
+ .hidden __cp_begin
++.type __cp_begin,@function
+ .global __cp_end
+ .hidden __cp_end
++.type __cp_end,@function
+ .global __cp_cancel
+ .hidden __cp_cancel
++.type __cp_cancel,@function
+ .hidden __cancel
+ .global __syscall_cp_asm
+ .hidden __syscall_cp_asm
+diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
+index 4eb8b88..de72818 100644
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -67,12 +67,6 @@ _Noreturn void __pthread_exit(void *result)
+ exit(0);
+ }
+
+- if (self->locale != &libc.global_locale) {
+- a_dec(&libc.uselocale_cnt);
+- if (self->locale->ctype_utf8)
+- a_dec(&libc.bytelocale_cnt_minus_1);
+- }
+-
+ /* Process robust list in userspace to handle non-pshared mutexes
+ * and the detached thread case where the robust list head will
+ * be invalid when the kernel would process it. */
+--
+2.4.1
+