From ed95e47f072caaeffdc48a78718cfc08fe89f690 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 30 Jan 2016 20:15:11 +0000
Subject: [PATCH] musl: update to the latest git version, fixes TLS issues

Signed-off-by: Felix Fietkau <nbd@openwrt.org>

SVN-Revision: 48574
---
 .../000-update-to-git-2016-01-22.patch        |  7636 -------
 .../000-update-to-git-2016-01-30.patch        | 17861 ++++++++++++++++
 .../010-Add-PowerPC-soft-float-support.patch  |     2 +-
 .../patches/030-mips-add-vdso-support.patch   |    80 -
 ...ribute-to-some-function-declarations.patch |    24 +-
 .../patches/200-add_libssp_nonshared.patch    |    18 +-
 toolchain/musl/patches/300-relative.patch     |     2 +-
 7 files changed, 17878 insertions(+), 7745 deletions(-)
 delete mode 100644 toolchain/musl/patches/000-update-to-git-2016-01-22.patch
 create mode 100644 toolchain/musl/patches/000-update-to-git-2016-01-30.patch
 delete mode 100644 toolchain/musl/patches/030-mips-add-vdso-support.patch

diff --git a/toolchain/musl/patches/000-update-to-git-2016-01-22.patch b/toolchain/musl/patches/000-update-to-git-2016-01-22.patch
deleted file mode 100644
index f5fc159ad10..00000000000
--- a/toolchain/musl/patches/000-update-to-git-2016-01-22.patch
+++ /dev/null
@@ -1,7636 +0,0 @@
---- a/.gitignore
-+++ b/.gitignore
-@@ -5,9 +5,6 @@
- *.so.1
- arch/*/bits/alltypes.h
- config.mak
--include/bits
--tools/musl-gcc
--tools/musl-clang
--tools/ld.musl-clang
- lib/musl-gcc.specs
- src/internal/version.h
-+/obj/
---- a/Makefile
-+++ b/Makefile
-@@ -8,6 +8,7 @@
- # Do not make changes here.
- #
- 
-+srcdir = .
- exec_prefix = /usr/local
- bindir = $(exec_prefix)/bin
- 
-@@ -16,31 +17,38 @@ includedir = $(prefix)/include
- libdir = $(prefix)/lib
- syslibdir = /lib
- 
--SRCS = $(sort $(wildcard src/*/*.c arch/$(ARCH)/src/*.c))
--OBJS = $(SRCS:.c=.o)
-+BASE_SRCS = $(sort $(wildcard $(srcdir)/src/*/*.c $(srcdir)/arch/$(ARCH)/src/*.[csS]))
-+BASE_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(BASE_SRCS)))
-+ARCH_SRCS = $(wildcard $(srcdir)/src/*/$(ARCH)/*.[csS])
-+ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS)))
-+REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS)))
-+OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS))))
- LOBJS = $(OBJS:.o=.lo)
--GENH = include/bits/alltypes.h
--GENH_INT = src/internal/version.h
--IMPH = src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h
-+GENH = obj/include/bits/alltypes.h
-+GENH_INT = obj/src/internal/version.h
-+IMPH = $(addprefix $(srcdir)/, src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h)
- 
--LDFLAGS = 
-+LDFLAGS =
-+LDFLAGS_AUTO =
- LIBCC = -lgcc
- CPPFLAGS =
--CFLAGS = -Os -pipe
-+CFLAGS =
-+CFLAGS_AUTO = -Os -pipe
- CFLAGS_C99FSE = -std=c99 -ffreestanding -nostdinc 
- 
- CFLAGS_ALL = $(CFLAGS_C99FSE)
--CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I./arch/$(ARCH) -I./src/internal -I./include
--CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS)
--CFLAGS_ALL_STATIC = $(CFLAGS_ALL)
--CFLAGS_ALL_SHARED = $(CFLAGS_ALL) -fPIC -DSHARED
-+CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I$(srcdir)/arch/$(ARCH) -Iobj/src/internal -I$(srcdir)/src/internal -Iobj/include -I$(srcdir)/include
-+CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS_AUTO) $(CFLAGS)
-+
-+LDFLAGS_ALL = $(LDFLAGS_AUTO) $(LDFLAGS)
- 
- AR      = $(CROSS_COMPILE)ar
- RANLIB  = $(CROSS_COMPILE)ranlib
--INSTALL = ./tools/install.sh
-+INSTALL = $(srcdir)/tools/install.sh
- 
--ARCH_INCLUDES = $(wildcard arch/$(ARCH)/bits/*.h)
--ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH) $(ARCH_INCLUDES:arch/$(ARCH)/%=include/%))
-+ARCH_INCLUDES = $(wildcard $(srcdir)/arch/$(ARCH)/bits/*.h)
-+INCLUDES = $(wildcard $(srcdir)/include/*.h $(srcdir)/include/*/*.h)
-+ALL_INCLUDES = $(sort $(INCLUDES:$(srcdir)/%=%) $(GENH:obj/%=%) $(ARCH_INCLUDES:$(srcdir)/arch/$(ARCH)/%=include/%))
- 
- EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl
- EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a)
-@@ -49,7 +57,7 @@ STATIC_LIBS = lib/libc.a
- SHARED_LIBS = lib/libc.so
- TOOL_LIBS = lib/musl-gcc.specs
- ALL_LIBS = $(CRT_LIBS) $(STATIC_LIBS) $(SHARED_LIBS) $(EMPTY_LIBS) $(TOOL_LIBS)
--ALL_TOOLS = tools/musl-gcc
-+ALL_TOOLS = obj/musl-gcc
- 
- WRAPCC_GCC = gcc
- WRAPCC_CLANG = clang
-@@ -58,95 +66,93 @@ LDSO_PATHNAME = $(syslibdir)/ld-musl-$(A
- 
- -include config.mak
- 
-+ifeq ($(ARCH),)
-+$(error Please set ARCH in config.mak before running make.)
-+endif
-+
- all: $(ALL_LIBS) $(ALL_TOOLS)
- 
-+OBJ_DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_LIBS) $(ALL_TOOLS) $(OBJS) $(GENH) $(GENH_INT))) $(addprefix obj/, crt crt/$(ARCH) include))
-+
-+$(ALL_LIBS) $(ALL_TOOLS) $(CRT_LIBS:lib/%=obj/crt/%) $(OBJS) $(LOBJS) $(GENH) $(GENH_INT): | $(OBJ_DIRS)
-+
-+$(OBJ_DIRS):
-+	mkdir -p $@
-+
- install: install-libs install-headers install-tools
- 
- clean:
--	rm -f crt/*.o
--	rm -f $(OBJS)
--	rm -f $(LOBJS)
--	rm -f $(ALL_LIBS) lib/*.[ao] lib/*.so
--	rm -f $(ALL_TOOLS)
--	rm -f $(GENH) $(GENH_INT)
--	rm -f include/bits
-+	rm -rf obj lib
- 
- distclean: clean
- 	rm -f config.mak
- 
--include/bits:
--	@test "$(ARCH)" || { echo "Please set ARCH in config.mak before running make." ; exit 1 ; }
--	ln -sf ../arch/$(ARCH)/bits $@
-+obj/include/bits/alltypes.h: $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in $(srcdir)/tools/mkalltypes.sed
-+	sed -f $(srcdir)/tools/mkalltypes.sed $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in > $@
- 
--include/bits/alltypes.h.in: include/bits
-+obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git)
-+	printf '#define VERSION "%s"\n' "$$(cd $(srcdir); sh tools/version.sh)" > $@
- 
--include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/mkalltypes.sed
--	sed -f tools/mkalltypes.sed include/bits/alltypes.h.in include/alltypes.h.in > $@
-+obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h
- 
--src/internal/version.h: $(wildcard VERSION .git)
--	printf '#define VERSION "%s"\n' "$$(sh tools/version.sh)" > $@
-+obj/crt/rcrt1.o obj/src/ldso/dlstart.lo obj/src/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
- 
--src/internal/version.lo: src/internal/version.h
-+obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/src/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
- 
--crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
-+obj/crt/rcrt1.o: $(srcdir)/src/ldso/dlstart.c
- 
--crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h)
-+obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC
- 
--crt/rcrt1.o: src/ldso/dlstart.c
-+obj/crt/$(ARCH)/crti.o: $(srcdir)/crt/$(ARCH)/crti.s
- 
--crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC
-+obj/crt/$(ARCH)/crtn.o: $(srcdir)/crt/$(ARCH)/crtn.s
- 
--OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
--$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
-+OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%))
-+$(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3
- 
- MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c
--$(MEMOPS_SRCS:%.c=%.o) $(MEMOPS_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_MEMOPS)
-+$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS)
- 
- NOSSP_SRCS = $(wildcard crt/*.c) \
- 	src/env/__libc_start_main.c src/env/__init_tls.c \
- 	src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \
- 	src/string/memset.c src/string/memcpy.c \
- 	src/ldso/dlstart.c src/ldso/dynlink.c
--$(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP)
-+$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
-+
-+$(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT
- 
--$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT
-+$(LOBJS): CFLAGS_ALL += -fPIC -DSHARED
- 
--# This incantation ensures that changes to any subarch asm files will
--# force the corresponding object file to be rebuilt, even if the implicit
--# rule below goes indirectly through a .sub file.
--define mkasmdep
--$(dir $(patsubst %/,%,$(dir $(1))))$(notdir $(1:.s=.o)): $(1)
--endef
--$(foreach s,$(wildcard src/*/$(ARCH)*/*.s),$(eval $(call mkasmdep,$(s))))
-+CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $<
- 
- # Choose invocation of assembler to be used
--# $(1) is input file, $(2) is output file, $(3) is assembler flags
- ifeq ($(ADD_CFI),yes)
--	AS_CMD = LC_ALL=C awk -f tools/add-cfi.common.awk -f tools/add-cfi.$(ARCH).awk $< | $(CC) -x assembler -c -o $@ -
-+	AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
- else
--	AS_CMD = $(CC) -c -o $@ $<
-+	AS_CMD = $(CC_CMD)
- endif
- 
--%.o: $(ARCH)$(ASMSUBARCH)/%.sub
--	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $(dir $<)$(shell cat $<)
-+obj/%.o: $(srcdir)/%.s
-+	$(AS_CMD)
- 
--%.o: $(ARCH)/%.s
--	$(AS_CMD) $(CFLAGS_ALL_STATIC)
-+obj/%.o: $(srcdir)/%.S
-+	$(CC_CMD)
- 
--%.o: %.c $(GENH) $(IMPH)
--	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
-+obj/%.o: $(srcdir)/%.c $(GENH) $(IMPH)
-+	$(CC_CMD)
- 
--%.lo: $(ARCH)$(ASMSUBARCH)/%.sub
--	$(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $(dir $<)$(shell cat $<)
-+obj/%.lo: $(srcdir)/%.s
-+	$(AS_CMD)
- 
--%.lo: $(ARCH)/%.s
--	$(AS_CMD) $(CFLAGS_ALL_SHARED)
-+obj/%.lo: $(srcdir)/%.S
-+	$(CC_CMD)
- 
--%.lo: %.c $(GENH) $(IMPH)
--	$(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $<
-+obj/%.lo: $(srcdir)/%.c $(GENH) $(IMPH)
-+	$(CC_CMD)
- 
- lib/libc.so: $(LOBJS)
--	$(CC) $(CFLAGS_ALL_SHARED) $(LDFLAGS) -nostdlib -shared \
-+	$(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -nostdlib -shared \
- 	-Wl,-e,_dlstart -Wl,-Bsymbolic-functions \
- 	-o $@ $(LOBJS) $(LIBCC)
- 
-@@ -159,21 +165,27 @@ $(EMPTY_LIBS):
- 	rm -f $@
- 	$(AR) rc $@
- 
--lib/%.o: crt/%.o
-+lib/%.o: obj/crt/%.o
- 	cp $< $@
- 
--lib/musl-gcc.specs: tools/musl-gcc.specs.sh config.mak
-+lib/crti.o: obj/crt/$(ARCH)/crti.o
-+	cp $< $@
-+
-+lib/crtn.o: obj/crt/$(ARCH)/crtn.o
-+	cp $< $@
-+
-+lib/musl-gcc.specs: $(srcdir)/tools/musl-gcc.specs.sh config.mak
- 	sh $< "$(includedir)" "$(libdir)" "$(LDSO_PATHNAME)" > $@
- 
--tools/musl-gcc: config.mak
-+obj/musl-gcc: config.mak
- 	printf '#!/bin/sh\nexec "$${REALGCC:-$(WRAPCC_GCC)}" "$$@" -specs "%s/musl-gcc.specs"\n' "$(libdir)" > $@
- 	chmod +x $@
- 
--tools/%-clang: tools/%-clang.in config.mak
-+obj/%-clang: $(srcdir)/tools/%-clang.in config.mak
- 	sed -e 's!@CC@!$(WRAPCC_CLANG)!g' -e 's!@PREFIX@!$(prefix)!g' -e 's!@INCDIR@!$(includedir)!g' -e 's!@LIBDIR@!$(libdir)!g' -e 's!@LDSO@!$(LDSO_PATHNAME)!g' $< > $@
- 	chmod +x $@
- 
--$(DESTDIR)$(bindir)/%: tools/%
-+$(DESTDIR)$(bindir)/%: obj/%
- 	$(INSTALL) -D $< $@
- 
- $(DESTDIR)$(libdir)/%.so: lib/%.so
-@@ -182,10 +194,13 @@ $(DESTDIR)$(libdir)/%.so: lib/%.so
- $(DESTDIR)$(libdir)/%: lib/%
- 	$(INSTALL) -D -m 644 $< $@
- 
--$(DESTDIR)$(includedir)/bits/%: arch/$(ARCH)/bits/%
-+$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/$(ARCH)/bits/%
-+	$(INSTALL) -D -m 644 $< $@
-+
-+$(DESTDIR)$(includedir)/bits/%: obj/include/bits/%
- 	$(INSTALL) -D -m 644 $< $@
- 
--$(DESTDIR)$(includedir)/%: include/%
-+$(DESTDIR)$(includedir)/%: $(srcdir)/include/%
- 	$(INSTALL) -D -m 644 $< $@
- 
- $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so
-@@ -195,12 +210,12 @@ install-libs: $(ALL_LIBS:lib/%=$(DESTDIR
- 
- install-headers: $(ALL_INCLUDES:include/%=$(DESTDIR)$(includedir)/%)
- 
--install-tools: $(ALL_TOOLS:tools/%=$(DESTDIR)$(bindir)/%)
-+install-tools: $(ALL_TOOLS:obj/%=$(DESTDIR)$(bindir)/%)
- 
- musl-git-%.tar.gz: .git
--	 git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
-+	 git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
- 
- musl-%.tar.gz: .git
--	 git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
-+	 git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
- 
- .PHONY: all clean install install-libs install-headers install-tools
---- a/arch/aarch64/atomic.h
-+++ /dev/null
-@@ -1,206 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_64(uint64_t x)
--{
--	__asm__(
--		"	rbit %0, %1\n"
--		"	clz %0, %0\n"
--		: "=r"(x) : "r"(x));
--	return x;
--}
--
--static inline int a_ctz_l(unsigned long x)
--{
--	return a_ctz_64(x);
--}
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__("dmb ish");
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	void *old;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %0,%3\n"
--		"	cmp %0,%1\n"
--		"	b.ne 1f\n"
--		"	stxr %w0,%2,%3\n"
--		"	cbnz %w0,1b\n"
--		"	mov %0,%1\n"
--		"1:	dmb ish\n"
--		: "=&r"(old)
--		: "r"(t), "r"(s), "Q"(*(long*)p)
--		: "memory", "cc");
--	return old;
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	int old;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%3\n"
--		"	cmp %w0,%w1\n"
--		"	b.ne 1f\n"
--		"	stxr %w0,%w2,%3\n"
--		"	cbnz %w0,1b\n"
--		"	mov %w0,%w1\n"
--		"1:	dmb ish\n"
--		: "=&r"(old)
--		: "r"(t), "r"(s), "Q"(*p)
--		: "memory", "cc");
--	return old;
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old, tmp;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%3\n"
--		"	stxr %w1,%w2,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old, tmp;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%3\n"
--		"	add %w0,%w0,%w2\n"
--		"	stxr %w1,%w0,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--	return old-v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%2\n"
--		"	add %w0,%w0,#1\n"
--		"	stxr %w1,%w0,%2\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%2\n"
--		"	sub %w0,%w0,#1\n"
--		"	stxr %w1,%w0,%2\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %0,%3\n"
--		"	and %0,%0,%2\n"
--		"	stxr %w1,%0,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*p)
--		: "memory", "cc" );
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%3\n"
--		"	and %w0,%w0,%w2\n"
--		"	stxr %w1,%w0,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*p)
--		: "memory", "cc" );
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %0,%3\n"
--		"	orr %0,%0,%2\n"
--		"	stxr %w1,%0,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*p)
--		: "memory", "cc" );
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	return a_or_64(p, v);
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldxr %w0,%3\n"
--		"	orr %w0,%w0,%w2\n"
--		"	stxr %w1,%w0,%3\n"
--		"	cbnz %w1,1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*p)
--		: "memory", "cc" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"	str %w1,%0\n"
--		"	dmb ish\n"
--		: "=m"(*p)
--		: "r"(x)
--		: "memory", "cc" );
--}
--
--#define a_spin a_barrier
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--
--#endif
---- /dev/null
-+++ b/arch/aarch64/atomic_arch.h
-@@ -0,0 +1,53 @@
-+#define a_ll a_ll
-+static inline int a_ll(volatile int *p)
-+{
-+	int v;
-+	__asm__ __volatile__ ("ldxr %0, %1" : "=r"(v) : "Q"(*p));
-+	return v;
-+}
-+
-+#define a_sc a_sc
-+static inline int a_sc(volatile int *p, int v)
-+{
-+	int r;
-+	__asm__ __volatile__ ("stxr %w0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
-+	return !r;
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__ ("dmb ish" : : : "memory");
-+}
-+
-+#define a_pre_llsc a_barrier
-+#define a_post_llsc a_barrier
-+
-+#define a_cas_p a_cas_p
-+static inline void *a_cas_p(volatile void *p, void *t, void *s)
-+{
-+	void *old;
-+	__asm__ __volatile__(
-+		"	dmb ish\n"
-+		"1:	ldxr %0,%3\n"
-+		"	cmp %0,%1\n"
-+		"	b.ne 1f\n"
-+		"	stxr %w0,%2,%3\n"
-+		"	cbnz %w0,1b\n"
-+		"	mov %0,%1\n"
-+		"1:	dmb ish\n"
-+		: "=&r"(old)
-+		: "r"(t), "r"(s), "Q"(*(void *volatile *)p)
-+		: "memory", "cc");
-+	return old;
-+}
-+
-+#define a_ctz_64 a_ctz_64
-+static inline int a_ctz_64(uint64_t x)
-+{
-+	__asm__(
-+		"	rbit %0, %1\n"
-+		"	clz %0, %0\n"
-+		: "=r"(x) : "r"(x));
-+	return x;
-+}
---- a/arch/aarch64/pthread_arch.h
-+++ b/arch/aarch64/pthread_arch.h
-@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
- #define TLS_ABOVE_TP
- #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
- 
--#define CANCEL_REG_IP 33
-+#define MC_PC pc
---- a/arch/arm/atomic.h
-+++ /dev/null
-@@ -1,261 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__("dmb ish");
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	int old;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%3\n"
--		"	cmp %0,%1\n"
--		"	bne 1f\n"
--		"	strex %0,%2,%3\n"
--		"	cmp %0, #0\n"
--		"	bne 1b\n"
--		"	mov %0, %1\n"
--		"1:	dmb ish\n"
--		: "=&r"(old)
--		: "r"(t), "r"(s), "Q"(*p)
--		: "memory", "cc" );
--	return old;
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old, tmp;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%3\n"
--		"	strex %1,%2,%3\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old, tmp;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%3\n"
--		"	add %0,%0,%2\n"
--		"	strex %1,%0,%3\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--	return old-v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%2\n"
--		"	add %0,%0,#1\n"
--		"	strex %1,%0,%2\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%2\n"
--		"	sub %0,%0,#1\n"
--		"	strex %1,%0,%2\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_and(volatile int *x, int v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%3\n"
--		"	and %0,%0,%2\n"
--		"	strex %1,%0,%3\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_or(volatile int *x, int v)
--{
--	int tmp, tmp2;
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"1:	ldrex %0,%3\n"
--		"	orr %0,%0,%2\n"
--		"	strex %1,%0,%3\n"
--		"	cmp %1, #0\n"
--		"	bne 1b\n"
--		"	dmb ish\n"
--		: "=&r"(tmp), "=&r"(tmp2)
--		: "r"(v), "Q"(*x)
--		: "memory", "cc" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__ __volatile__(
--		"	dmb ish\n"
--		"	str %1,%0\n"
--		"	dmb ish\n"
--		: "=m"(*p)
--		: "r"(x)
--		: "memory", "cc" );
--}
--
--#else
--
--int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
--#define __k_cas __a_cas
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__("bl __a_barrier"
--		: : : "memory", "cc", "ip", "lr" );
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	int old;
--	for (;;) {
--		if (!__k_cas(t, s, p))
--			return t;
--		if ((old=*p) != t)
--			return old;
--	}
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (__k_cas(old, v, x));
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (__k_cas(old, old+v, x));
--	return old;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	a_fetch_add(x, 1);
--}
--
--static inline void a_dec(volatile int *x)
--{
--	a_fetch_add(x, -1);
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	a_barrier();
--	*p = x;
--	a_barrier();
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (__k_cas(old, old&v, p));
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (__k_cas(old, old|v, p));
--}
--
--#endif
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--#define a_spin a_barrier
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p, u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p, u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/arm/atomic_arch.h
-@@ -0,0 +1,64 @@
-+__attribute__((__visibility__("hidden")))
-+extern const void *__arm_atomics[3]; /* gettp, cas, barrier */
-+
-+#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
-+ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
-+
-+#define a_ll a_ll
-+static inline int a_ll(volatile int *p)
-+{
-+	int v;
-+	__asm__ __volatile__ ("ldrex %0, %1" : "=r"(v) : "Q"(*p));
-+	return v;
-+}
-+
-+#define a_sc a_sc
-+static inline int a_sc(volatile int *p, int v)
-+{
-+	int r;
-+	__asm__ __volatile__ ("strex %0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
-+	return !r;
-+}
-+
-+#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__ ("dmb ish" : : : "memory");
-+}
-+
-+#endif
-+
-+#define a_pre_llsc a_barrier
-+#define a_post_llsc a_barrier
-+
-+#else
-+
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	for (;;) {
-+		register int r0 __asm__("r0") = t;
-+		register int r1 __asm__("r1") = s;
-+		register volatile int *r2 __asm__("r2") = p;
-+		int old;
-+		__asm__ __volatile__ (
-+			"bl __a_cas"
-+			: "+r"(r0) : "r"(r1), "r"(r2)
-+			: "memory", "r3", "lr", "ip", "cc" );
-+		if (!r0) return t;
-+		if ((old=*p)!=t) return old;
-+	}
-+}
-+
-+#endif
-+
-+#ifndef a_barrier
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__("bl __a_barrier"
-+		: : : "memory", "cc", "ip", "lr" );
-+}
-+#endif
---- a/arch/arm/pthread_arch.h
-+++ b/arch/arm/pthread_arch.h
-@@ -27,4 +27,4 @@ static inline pthread_t __pthread_self()
- #define TLS_ABOVE_TP
- #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
- 
--#define CANCEL_REG_IP 18
-+#define MC_PC arm_pc
---- a/arch/arm/reloc.h
-+++ b/arch/arm/reloc.h
-@@ -6,10 +6,10 @@
- #define ENDIAN_SUFFIX ""
- #endif
- 
--#if __SOFTFP__
--#define FP_SUFFIX ""
--#else
-+#if __ARM_PCS_VFP
- #define FP_SUFFIX "hf"
-+#else
-+#define FP_SUFFIX ""
- #endif
- 
- #define LDSO_ARCH "arm" ENDIAN_SUFFIX FP_SUFFIX
-@@ -28,10 +28,5 @@
- #define REL_TPOFF       R_ARM_TLS_TPOFF32
- //#define REL_TLSDESC     R_ARM_TLS_DESC
- 
--#ifdef __thumb__
- #define CRTJMP(pc,sp) __asm__ __volatile__( \
- 	"mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
--#else
--#define CRTJMP(pc,sp) __asm__ __volatile__( \
--	"mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
--#endif
---- a/arch/arm/src/__aeabi_atexit.c
-+++ /dev/null
-@@ -1,6 +0,0 @@
--int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
--
--int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
--{
--	return __cxa_atexit (func, obj, d);
--}
---- a/arch/arm/src/__aeabi_memclr.c
-+++ /dev/null
-@@ -1,9 +0,0 @@
--#include <string.h>
--#include "libc.h"
--
--void __aeabi_memclr(void *dest, size_t n)
--{
--	memset(dest, 0, n);
--}
--weak_alias(__aeabi_memclr, __aeabi_memclr4);
--weak_alias(__aeabi_memclr, __aeabi_memclr8);
---- a/arch/arm/src/__aeabi_memcpy.c
-+++ /dev/null
-@@ -1,9 +0,0 @@
--#include <string.h>
--#include "libc.h"
--
--void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
--{
--	memcpy(dest, src, n);
--}
--weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
--weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
---- a/arch/arm/src/__aeabi_memmove.c
-+++ /dev/null
-@@ -1,9 +0,0 @@
--#include <string.h>
--#include "libc.h"
--
--void __aeabi_memmove(void *dest, const void *src, size_t n)
--{
--	memmove(dest, src, n);
--}
--weak_alias(__aeabi_memmove, __aeabi_memmove4);
--weak_alias(__aeabi_memmove, __aeabi_memmove8);
---- a/arch/arm/src/__aeabi_memset.c
-+++ /dev/null
-@@ -1,9 +0,0 @@
--#include <string.h>
--#include "libc.h"
--
--void __aeabi_memset(void *dest, size_t n, int c)
--{
--	memset(dest, c, n);
--}
--weak_alias(__aeabi_memset, __aeabi_memset4);
--weak_alias(__aeabi_memset, __aeabi_memset8);
---- a/arch/arm/src/__set_thread_area.c
-+++ /dev/null
-@@ -1,49 +0,0 @@
--#include <stdint.h>
--#include <elf.h>
--#include "pthread_impl.h"
--#include "libc.h"
--
--#define HWCAP_TLS (1 << 15)
--
--extern const unsigned char __attribute__((__visibility__("hidden")))
--	__a_barrier_dummy[], __a_barrier_oldkuser[],
--	__a_barrier_v6[], __a_barrier_v7[],
--	__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
--	__a_gettp_dummy[];
--
--#define __a_barrier_kuser 0xffff0fa0
--#define __a_cas_kuser 0xffff0fc0
--#define __a_gettp_kuser 0xffff0fe0
--
--extern uintptr_t __attribute__((__visibility__("hidden")))
--	__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
--
--#define SET(op,ver) (__a_##op##_ptr = \
--	(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
--
--int __set_thread_area(void *p)
--{
--#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
--	if (__hwcap & HWCAP_TLS) {
--		size_t *aux;
--		SET(cas, v7);
--		SET(barrier, v7);
--		for (aux=libc.auxv; *aux; aux+=2) {
--			if (*aux != AT_PLATFORM) continue;
--			const char *s = (void *)aux[1];
--			if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
--			SET(cas, v6);
--			SET(barrier, v6);
--			break;
--		}
--	} else {
--		int ver = *(int *)0xffff0ffc;
--		SET(gettp, kuser);
--		SET(cas, kuser);
--		SET(barrier, kuser);
--		if (ver < 2) a_crash();
--		if (ver < 3) SET(barrier, oldkuser);
--	}
--#endif
--	return __syscall(0xf0005, p);
--}
---- a/arch/arm/src/arm/atomics.s
-+++ /dev/null
-@@ -1,116 +0,0 @@
--.text
--
--.global __a_barrier
--.hidden __a_barrier
--.type __a_barrier,%function
--__a_barrier:
--	ldr ip,1f
--	ldr ip,[pc,ip]
--	add pc,pc,ip
--1:	.word __a_barrier_ptr-1b
--.global __a_barrier_dummy
--.hidden __a_barrier_dummy
--__a_barrier_dummy:
--	tst lr,#1
--	moveq pc,lr
--	bx lr
--.global __a_barrier_oldkuser
--.hidden __a_barrier_oldkuser
--__a_barrier_oldkuser:
--	push {r0,r1,r2,r3,ip,lr}
--	mov r1,r0
--	mov r2,sp
--	ldr ip,=0xffff0fc0
--	mov lr,pc
--	mov pc,ip
--	pop {r0,r1,r2,r3,ip,lr}
--	tst lr,#1
--	moveq pc,lr
--	bx lr
--.global __a_barrier_v6
--.hidden __a_barrier_v6
--__a_barrier_v6:
--	mcr p15,0,r0,c7,c10,5
--	bx lr
--.global __a_barrier_v7
--.hidden __a_barrier_v7
--__a_barrier_v7:
--	.word 0xf57ff05b        /* dmb ish */
--	bx lr
--
--.global __a_cas
--.hidden __a_cas
--.type __a_cas,%function
--__a_cas:
--	ldr ip,1f
--	ldr ip,[pc,ip]
--	add pc,pc,ip
--1:	.word __a_cas_ptr-1b
--.global __a_cas_dummy
--.hidden __a_cas_dummy
--__a_cas_dummy:
--	mov r3,r0
--	ldr r0,[r2]
--	subs r0,r3,r0
--	streq r1,[r2]
--	tst lr,#1
--	moveq pc,lr
--	bx lr
--.global __a_cas_v6
--.hidden __a_cas_v6
--__a_cas_v6:
--	mov r3,r0
--	mcr p15,0,r0,c7,c10,5
--1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
--	subs r0,r3,r0
--	.word 0x01820f91        /* strexeq r0,r1,[r2] */
--	teqeq r0,#1
--	beq 1b
--	mcr p15,0,r0,c7,c10,5
--	bx lr
--.global __a_cas_v7
--.hidden __a_cas_v7
--__a_cas_v7:
--	mov r3,r0
--	.word 0xf57ff05b        /* dmb ish */
--1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
--	subs r0,r3,r0
--	.word 0x01820f91        /* strexeq r0,r1,[r2] */
--	teqeq r0,#1
--	beq 1b
--	.word 0xf57ff05b        /* dmb ish */
--	bx lr
--
--.global __aeabi_read_tp
--.type __aeabi_read_tp,%function
--__aeabi_read_tp:
--
--.global __a_gettp
--.hidden __a_gettp
--.type __a_gettp,%function
--__a_gettp:
--	ldr r0,1f
--	ldr r0,[pc,r0]
--	add pc,pc,r0
--1:	.word __a_gettp_ptr-1b
--.global __a_gettp_dummy
--.hidden __a_gettp_dummy
--__a_gettp_dummy:
--	mrc p15,0,r0,c13,c0,3
--	bx lr
--
--.data
--.global __a_barrier_ptr
--.hidden __a_barrier_ptr
--__a_barrier_ptr:
--	.word 0
--
--.global __a_cas_ptr
--.hidden __a_cas_ptr
--__a_cas_ptr:
--	.word 0
--
--.global __a_gettp_ptr
--.hidden __a_gettp_ptr
--__a_gettp_ptr:
--	.word 0
---- a/arch/arm/src/find_exidx.c
-+++ /dev/null
-@@ -1,42 +0,0 @@
--#define _GNU_SOURCE
--#include <link.h>
--#include <stdint.h>
--
--struct find_exidx_data {
--	uintptr_t pc, exidx_start;
--	int exidx_len;
--};
--
--static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
--{
--	struct find_exidx_data *data = ptr;
--	const ElfW(Phdr) *phdr = info->dlpi_phdr;
--	uintptr_t addr, exidx_start = 0;
--	int i, match = 0, exidx_len = 0;
--
--	for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
--		addr = info->dlpi_addr + phdr->p_vaddr;
--		switch (phdr->p_type) {
--		case PT_LOAD:
--			match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
--			break;
--		case PT_ARM_EXIDX:
--			exidx_start = addr;
--			exidx_len = phdr->p_memsz;
--			break;
--		}
--	}
--	data->exidx_start = exidx_start;
--	data->exidx_len = exidx_len;
--	return match;
--}
--
--uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
--{
--	struct find_exidx_data data;
--	data.pc = pc;
--	if (dl_iterate_phdr(find_exidx, &data) <= 0)
--		return 0;
--	*pcount = data.exidx_len / 8;
--	return data.exidx_start;
--}
---- a/arch/i386/atomic.h
-+++ /dev/null
-@@ -1,110 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_64(uint64_t x)
--{
--	int r;
--	__asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:"
--		: "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
--	return r;
--}
--
--static inline int a_ctz_l(unsigned long x)
--{
--	long r;
--	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
--	return r;
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)"
--		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)"
--		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	__asm__( "lock ; orl %1, %0"
--		: "=m"(*(long *)p) : "r"(v) : "memory" );
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	__asm__( "lock ; orl %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	__asm__( "lock ; andl %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--#define a_xchg a_swap
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" );
--}
--
--static inline void a_spin()
--{
--	__asm__ __volatile__( "pause" : : : "memory" );
--}
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__( "" : : : "memory" );
--}
--
--static inline void a_crash()
--{
--	__asm__ __volatile__( "hlt" : : : "memory" );
--}
--
--
--#endif
---- /dev/null
-+++ b/arch/i386/atomic_arch.h
-@@ -0,0 +1,109 @@
-+#define a_ctz_64 a_ctz_64
-+static inline int a_ctz_64(uint64_t x)
-+{
-+	int r;
-+	__asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:"
-+		: "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
-+	return r;
-+}
-+
-+#define a_ctz_l a_ctz_l
-+static inline int a_ctz_l(unsigned long x)
-+{
-+	long r;
-+	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
-+	return r;
-+}
-+
-+#define a_and_64 a_and_64
-+static inline void a_and_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)"
-+		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
-+}
-+
-+#define a_or_64 a_or_64
-+static inline void a_or_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)"
-+		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
-+}
-+
-+#define a_or_l a_or_l
-+static inline void a_or_l(volatile void *p, long v)
-+{
-+	__asm__( "lock ; orl %1, %0"
-+		: "=m"(*(long *)p) : "r"(v) : "memory" );
-+}
-+
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	__asm__( "lock ; cmpxchg %3, %1"
-+		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
-+	return t;
-+}
-+
-+#define a_or a_or
-+static inline void a_or(volatile int *p, int v)
-+{
-+	__asm__( "lock ; orl %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_and a_and
-+static inline void a_and(volatile int *p, int v)
-+{
-+	__asm__( "lock ; andl %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *x, int v)
-+{
-+	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *x, int v)
-+{
-+	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_inc a_inc
-+static inline void a_inc(volatile int *x)
-+{
-+	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_dec a_dec
-+static inline void a_dec(volatile int *x)
-+{
-+	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_store a_store
-+static inline void a_store(volatile int *p, int x)
-+{
-+	__asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" );
-+}
-+
-+#define a_spin a_spin
-+static inline void a_spin()
-+{
-+	__asm__ __volatile__( "pause" : : : "memory" );
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__( "" : : : "memory" );
-+}
-+
-+#define a_crash a_crash
-+static inline void a_crash()
-+{
-+	__asm__ __volatile__( "hlt" : : : "memory" );
-+}
---- a/arch/i386/bits/alltypes.h.in
-+++ b/arch/i386/bits/alltypes.h.in
-@@ -26,10 +26,12 @@ TYPEDEF long double float_t;
- TYPEDEF long double double_t;
- #endif
- 
--#ifdef __cplusplus
--TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
--#else
-+#if !defined(__cplusplus)
- TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t;
-+#elif defined(__GNUC__)
-+TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t;
-+#else
-+TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
- #endif
- 
- TYPEDEF long time_t;
---- a/arch/i386/pthread_arch.h
-+++ b/arch/i386/pthread_arch.h
-@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
- 
- #define TP_ADJ(p) (p)
- 
--#define CANCEL_REG_IP 14
-+#define MC_PC gregs[REG_EIP]
---- a/arch/microblaze/atomic.h
-+++ /dev/null
-@@ -1,143 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	register int old, tmp;
--	__asm__ __volatile__ (
--		"	addi %0, r0, 0\n"
--		"1:	lwx %0, %2, r0\n"
--		"	rsubk %1, %0, %3\n"
--		"	bnei %1, 1f\n"
--		"	swx %4, %2, r0\n"
--		"	addic %1, r0, 0\n"
--		"	bnei %1, 1b\n"
--		"1:	"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(p), "r"(t), "r"(s)
--		: "cc", "memory" );
--	return old;
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	register int old, tmp;
--	__asm__ __volatile__ (
--		"	addi %0, r0, 0\n"
--		"1:	lwx %0, %2, r0\n"
--		"	swx %3, %2, r0\n"
--		"	addic %1, r0, 0\n"
--		"	bnei %1, 1b\n"
--		"1:	"
--		: "=&r"(old), "=&r"(tmp)
--		: "r"(x), "r"(v)
--		: "cc", "memory" );
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	register int new, tmp;
--	__asm__ __volatile__ (
--		"	addi %0, r0, 0\n"
--		"1:	lwx %0, %2, r0\n"
--		"	addk %0, %0, %3\n"
--		"	swx %0, %2, r0\n"
--		"	addic %1, r0, 0\n"
--		"	bnei %1, 1b\n"
--		"1:	"
--		: "=&r"(new), "=&r"(tmp)
--		: "r"(x), "r"(v)
--		: "cc", "memory" );
--	return new-v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	a_fetch_add(x, 1);
--}
--
--static inline void a_dec(volatile int *x)
--{
--	a_fetch_add(x, -1);
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__ __volatile__ (
--		"swi %1, %0"
--		: "=m"(*p) : "r"(x) : "memory" );
--}
--
--#define a_spin a_barrier
--
--static inline void a_barrier()
--{
--	a_cas(&(int){0}, 0, 0);
--}
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old&v) != old);
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old|v) != old);
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p, u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p, u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/microblaze/atomic_arch.h
-@@ -0,0 +1,53 @@
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	register int old, tmp;
-+	__asm__ __volatile__ (
-+		"	addi %0, r0, 0\n"
-+		"1:	lwx %0, %2, r0\n"
-+		"	rsubk %1, %0, %3\n"
-+		"	bnei %1, 1f\n"
-+		"	swx %4, %2, r0\n"
-+		"	addic %1, r0, 0\n"
-+		"	bnei %1, 1b\n"
-+		"1:	"
-+		: "=&r"(old), "=&r"(tmp)
-+		: "r"(p), "r"(t), "r"(s)
-+		: "cc", "memory" );
-+	return old;
-+}
-+
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *x, int v)
-+{
-+	register int old, tmp;
-+	__asm__ __volatile__ (
-+		"	addi %0, r0, 0\n"
-+		"1:	lwx %0, %2, r0\n"
-+		"	swx %3, %2, r0\n"
-+		"	addic %1, r0, 0\n"
-+		"	bnei %1, 1b\n"
-+		"1:	"
-+		: "=&r"(old), "=&r"(tmp)
-+		: "r"(x), "r"(v)
-+		: "cc", "memory" );
-+	return old;
-+}
-+
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *x, int v)
-+{
-+	register int new, tmp;
-+	__asm__ __volatile__ (
-+		"	addi %0, r0, 0\n"
-+		"1:	lwx %0, %2, r0\n"
-+		"	addk %0, %0, %3\n"
-+		"	swx %0, %2, r0\n"
-+		"	addic %1, r0, 0\n"
-+		"	bnei %1, 1b\n"
-+		"1:	"
-+		: "=&r"(new), "=&r"(tmp)
-+		: "r"(x), "r"(v)
-+		: "cc", "memory" );
-+	return new-v;
-+}
---- a/arch/microblaze/pthread_arch.h
-+++ b/arch/microblaze/pthread_arch.h
-@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
- 
- #define TP_ADJ(p) (p)
- 
--#define CANCEL_REG_IP 32
-+#define MC_PC regs.pc
---- a/arch/mips/atomic.h
-+++ /dev/null
-@@ -1,205 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	int dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %2\n"
--		"	bne %0, %3, 1f\n"
--		"	addu %1, %4, $0\n"
--		"	sc %1, %2\n"
--		"	beq %1, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		"1:	\n"
--		".set pop\n"
--		: "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
--        return t;
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old, dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %2\n"
--		"	addu %1, %3, $0\n"
--		"	sc %1, %2\n"
--		"	beq %1, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
--        return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old, dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %2\n"
--		"	addu %1, %0, %3\n"
--		"	sc %1, %2\n"
--		"	beq %1, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
--        return old;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	int dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %1\n"
--		"	addu %0, %0, 1\n"
--		"	sc %0, %1\n"
--		"	beq %0, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(dummy), "+m"(*x) : : "memory" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	int dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %1\n"
--		"	subu %0, %0, 1\n"
--		"	sc %0, %1\n"
--		"	beq %0, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(dummy), "+m"(*x) : : "memory" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"	sw %1, %0\n"
--		"	sync\n"
--		".set pop\n"
--		: "+m"(*p) : "r"(x) : "memory" );
--}
--
--#define a_spin a_barrier
--
--static inline void a_barrier()
--{
--	a_cas(&(int){0}, 0, 0);
--}
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %1\n"
--		"	and %0, %0, %2\n"
--		"	sc %0, %1\n"
--		"	beq %0, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int dummy;
--	__asm__ __volatile__(
--		".set push\n"
--		".set mips2\n"
--		".set noreorder\n"
--		"	sync\n"
--		"1:	ll %0, %1\n"
--		"	or %0, %0, %2\n"
--		"	sc %0, %1\n"
--		"	beq %0, $0, 1b\n"
--		"	nop\n"
--		"	sync\n"
--		".set pop\n"
--		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p, u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p, u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/mips/atomic_arch.h
-@@ -0,0 +1,39 @@
-+#define a_ll a_ll
-+static inline int a_ll(volatile int *p)
-+{
-+	int v;
-+	__asm__ __volatile__ (
-+		".set push ; .set mips2\n\t"
-+		"ll %0, %1"
-+		"\n\t.set pop"
-+		: "=r"(v) : "m"(*p));
-+	return v;
-+}
-+
-+#define a_sc a_sc
-+static inline int a_sc(volatile int *p, int v)
-+{
-+	int r;
-+	__asm__ __volatile__ (
-+		".set push ; .set mips2\n\t"
-+		"sc %0, %1"
-+		"\n\t.set pop"
-+		: "=r"(r), "=m"(*p) : "0"(v) : "memory");
-+	return r;
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	/* mips2 sync, but using too many directives causes
-+	 * gcc not to inline it, so encode with .long instead. */
-+	__asm__ __volatile__ (".long 0xf" : : : "memory");
-+#if 0
-+	__asm__ __volatile__ (
-+		".set push ; .set mips2 ; sync ; .set pop"
-+		: : : "memory");
-+#endif
-+}
-+
-+#define a_pre_llsc a_barrier
-+#define a_post_llsc a_barrier
---- a/arch/mips/crt_arch.h
-+++ b/arch/mips/crt_arch.h
-@@ -4,13 +4,16 @@ __asm__(
- ".text \n"
- ".global _" START "\n"
- ".global " START "\n"
-+".global " START "_data\n"
- ".type   _" START ", @function\n"
- ".type   " START ", @function\n"
-+".type   " START "_data, @function\n"
- "_" START ":\n"
- "" START ":\n"
- "	bal 1f \n"
- "	 move $fp, $0 \n"
--"2:	.gpword 2b \n"
-+"" START "_data: \n"
-+"	.gpword " START "_data \n"
- "	.gpword " START "_c \n"
- ".weak _DYNAMIC \n"
- ".hidden _DYNAMIC \n"
---- a/arch/mips/pthread_arch.h
-+++ b/arch/mips/pthread_arch.h
-@@ -16,4 +16,4 @@ static inline struct pthread *__pthread_
- 
- #define DTP_OFFSET 0x8000
- 
--#define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b)
-+#define MC_PC pc
---- a/arch/mips/syscall_arch.h
-+++ b/arch/mips/syscall_arch.h
-@@ -3,9 +3,7 @@
- ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
- #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
- 
--#ifdef SHARED
- __attribute__((visibility("hidden")))
--#endif
- long (__syscall)(long, ...);
- 
- #define SYSCALL_RLIM_INFINITY (-1UL/2)
---- a/arch/or1k/atomic.h
-+++ /dev/null
-@@ -1,120 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	__asm__("1:	l.lwa %0, %1\n"
--		"	l.sfeq %0, %2\n"
--		"	l.bnf 1f\n"
--		"	 l.nop\n"
--		"	l.swa %1, %3\n"
--		"	l.bnf 1b\n"
--		"	 l.nop\n"
--		"1:	\n"
--		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
--        return t;
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (a_cas(x, old, v) != old);
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (a_cas(x, old, old+v) != old);
--	return old;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	a_fetch_add(x, 1);
--}
--
--static inline void a_dec(volatile int *x)
--{
--	a_fetch_add(x, -1);
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	a_swap(p, x);
--}
--
--#define a_spin a_barrier
--
--static inline void a_barrier()
--{
--	a_cas(&(int){0}, 0, 0);
--}
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old&v) != old);
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old|v) != old);
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p, u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p, u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/or1k/atomic_arch.h
-@@ -0,0 +1,14 @@
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	__asm__("1:	l.lwa %0, %1\n"
-+		"	l.sfeq %0, %2\n"
-+		"	l.bnf 1f\n"
-+		"	 l.nop\n"
-+		"	l.swa %1, %3\n"
-+		"	l.bnf 1b\n"
-+		"	 l.nop\n"
-+		"1:	\n"
-+		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
-+        return t;
-+}
---- a/arch/or1k/pthread_arch.h
-+++ b/arch/or1k/pthread_arch.h
-@@ -14,5 +14,4 @@ static inline struct pthread *__pthread_
- #define TLS_ABOVE_TP
- #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
- 
--/* word-offset to 'pc' in mcontext_t */
--#define CANCEL_REG_IP 32
-+#define MC_PC regs.pc
---- a/arch/powerpc/atomic.h
-+++ /dev/null
-@@ -1,126 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--#include <endian.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	__asm__("\n"
--		"	sync\n"
--		"1:	lwarx %0, 0, %4\n"
--		"	cmpw %0, %2\n"
--		"	bne 1f\n"
--		"	stwcx. %3, 0, %4\n"
--		"	bne- 1b\n"
--		"	isync\n"
--		"1:	\n"
--		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" );
--        return t;
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (a_cas(x, old, v) != old);
--	return old;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	int old;
--	do old = *x;
--	while (a_cas(x, old, old+v) != old);
--	return old;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	a_fetch_add(x, 1);
--}
--
--static inline void a_dec(volatile int *x)
--{
--	a_fetch_add(x, -1);
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__ __volatile__ ("\n"
--		"	sync\n"
--		"	stw %1, %0\n"
--		"	isync\n"
--		: "=m"(*p) : "r"(x) : "memory" );
--}
--
--#define a_spin a_barrier
--
--static inline void a_barrier()
--{
--	a_cas(&(int){0}, 0, 0);
--}
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old&v) != old);
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	int old;
--	do old = *p;
--	while (a_cas(p, old, old|v) != old);
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p, u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p, u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/powerpc/atomic_arch.h
-@@ -0,0 +1,15 @@
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	__asm__("\n"
-+		"	sync\n"
-+		"1:	lwarx %0, 0, %4\n"
-+		"	cmpw %0, %2\n"
-+		"	bne 1f\n"
-+		"	stwcx. %3, 0, %4\n"
-+		"	bne- 1b\n"
-+		"	isync\n"
-+		"1:	\n"
-+		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" );
-+        return t;
-+}
---- a/arch/powerpc/pthread_arch.h
-+++ b/arch/powerpc/pthread_arch.h
-@@ -15,9 +15,8 @@ static inline struct pthread *__pthread_
- 
- #define DTP_OFFSET 0x8000
- 
--// offset of the PC register in mcontext_t, divided by the system wordsize
- // the kernel calls the ip "nip", it's the first saved value after the 32
- // GPRs.
--#define CANCEL_REG_IP 32
-+#define MC_PC gregs[32]
- 
- #define CANARY canary_at_end
---- a/arch/sh/atomic.h
-+++ /dev/null
-@@ -1,168 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_l(unsigned long x)
--{
--	static const char debruijn32[32] = {
--		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
--		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
--	};
--	return debruijn32[(x&-x)*0x076be629 >> 27];
--}
--
--static inline int a_ctz_64(uint64_t x)
--{
--	uint32_t y = x;
--	if (!y) {
--		y = x>>32;
--		return 32 + a_ctz_l(y);
--	}
--	return a_ctz_l(y);
--}
--
--#define LLSC_CLOBBERS "r0", "t", "memory"
--#define LLSC_START(mem) "synco\n"  \
--	"0:	movli.l @" mem ", r0\n"
--#define LLSC_END(mem)              \
--	"1:	movco.l r0, @" mem "\n"    \
--	"	bf 0b\n"                   \
--	"	synco\n"
--
--static inline int __sh_cas_llsc(volatile int *p, int t, int s)
--{
--	int old;
--	__asm__ __volatile__(
--		LLSC_START("%1")
--		"	mov r0, %0\n"
--		"	cmp/eq %0, %2\n"
--		"	bf 1f\n"
--		"	mov %3, r0\n"
--		LLSC_END("%1")
--		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
--	return old;
--}
--
--static inline int __sh_swap_llsc(volatile int *x, int v)
--{
--	int old;
--	__asm__ __volatile__(
--		LLSC_START("%1")
--		"	mov r0, %0\n"
--		"	mov %2, r0\n"
--		LLSC_END("%1")
--		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
--	return old;
--}
--
--static inline int __sh_fetch_add_llsc(volatile int *x, int v)
--{
--	int old;
--	__asm__ __volatile__(
--		LLSC_START("%1")
--		"	mov r0, %0\n"
--		"	add %2, r0\n"
--		LLSC_END("%1")
--		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
--	return old;
--}
--
--static inline void __sh_store_llsc(volatile int *p, int x)
--{
--	__asm__ __volatile__(
--		"	synco\n"
--		"	mov.l %1, @%0\n"
--		"	synco\n"
--		: : "r"(p), "r"(x) : "memory");
--}
--
--static inline void __sh_and_llsc(volatile int *x, int v)
--{
--	__asm__ __volatile__(
--		LLSC_START("%0")
--		"	and %1, r0\n"
--		LLSC_END("%0")
--		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
--}
--
--static inline void __sh_or_llsc(volatile int *x, int v)
--{
--	__asm__ __volatile__(
--		LLSC_START("%0")
--		"	or %1, r0\n"
--		LLSC_END("%0")
--		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
--}
--
--#ifdef __SH4A__
--#define a_cas(p,t,s)     __sh_cas_llsc(p,t,s)
--#define a_swap(x,v)      __sh_swap_llsc(x,v)
--#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
--#define a_store(x,v)     __sh_store_llsc(x, v)
--#define a_and(x,v)       __sh_and_llsc(x, v)
--#define a_or(x,v)        __sh_or_llsc(x, v)
--#else
--
--int  __sh_cas(volatile int *, int, int);
--int  __sh_swap(volatile int *, int);
--int  __sh_fetch_add(volatile int *, int);
--void __sh_store(volatile int *, int);
--void __sh_and(volatile int *, int);
--void __sh_or(volatile int *, int);
--
--#define a_cas(p,t,s)     __sh_cas(p,t,s)
--#define a_swap(x,v)      __sh_swap(x,v)
--#define a_fetch_add(x,v) __sh_fetch_add(x, v)
--#define a_store(x,v)     __sh_store(x, v)
--#define a_and(x,v)       __sh_and(x, v)
--#define a_or(x,v)        __sh_or(x, v)
--#endif
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	return (void *)a_cas(p, (int)t, (int)s);
--}
--
--static inline void a_inc(volatile int *x)
--{
--	a_fetch_add(x, 1);
--}
--
--static inline void a_dec(volatile int *x)
--{
--	a_fetch_add(x, -1);
--}
--
--#define a_spin a_barrier
--
--static inline void a_barrier()
--{
--	a_cas(&(int){0}, 0, 0);
--}
--
--static inline void a_crash()
--{
--	*(volatile char *)0=0;
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	a_or(p, v);
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_and((int *)p,   u.r[0]);
--	a_and((int *)p+1, u.r[1]);
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	union { uint64_t v; uint32_t r[2]; } u = { v };
--	a_or((int *)p,   u.r[0]);
--	a_or((int *)p+1, u.r[1]);
--}
--
--#endif
---- /dev/null
-+++ b/arch/sh/atomic_arch.h
-@@ -0,0 +1,46 @@
-+#if defined(__SH4A__)
-+
-+#define a_ll a_ll
-+static inline int a_ll(volatile int *p)
-+{
-+	int v;
-+	__asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p));
-+	return v;
-+}
-+
-+#define a_sc a_sc
-+static inline int a_sc(volatile int *p, int v)
-+{
-+	int r;
-+	__asm__ __volatile__ (
-+		"movco.l %2, @%3 ; movt %0"
-+		: "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc");
-+	return r;
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__ ("synco" : : "memory");
-+}
-+
-+#define a_pre_llsc a_barrier
-+#define a_post_llsc a_barrier
-+
-+#else
-+
-+#define a_cas a_cas
-+__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr;
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	register int r1 __asm__("r1");
-+	register int r2 __asm__("r2") = t;
-+	register int r3 __asm__("r3") = s;
-+	__asm__ __volatile__ (
-+		"jsr @%4 ; nop"
-+		: "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr)
-+		: "memory", "pr", "cc");
-+	return r3;
-+}
-+
-+#endif
---- a/arch/sh/crt_arch.h
-+++ b/arch/sh/crt_arch.h
-@@ -22,7 +22,8 @@ START ": \n"
- "	mov.l 1f, r5 \n"
- "	mov.l 1f+4, r6 \n"
- "	add r0, r5 \n"
--"	bsr __fdpic_fixup \n"
-+"	mov.l 4f, r1 \n"
-+"5:	bsrf r1 \n"
- "	 add r0, r6 \n"
- "	mov r0, r12 \n"
- #endif
-@@ -31,11 +32,16 @@ START ": \n"
- "	mov.l r9, @-r15 \n"
- "	mov.l r8, @-r15 \n"
- "	mov #-16, r0 \n"
--"	bsr " START "_c \n"
-+"	mov.l 2f, r1 \n"
-+"3:	bsrf r1 \n"
- "	 and r0, r15 \n"
- ".align 2 \n"
- "1:	.long __ROFIXUP_LIST__@PCREL \n"
- "	.long __ROFIXUP_END__@PCREL + 4 \n"
-+"2:	.long " START "_c@PCREL - (3b+4-.) \n"
-+#ifndef SHARED
-+"4:	.long __fdpic_fixup@PCREL - (5b+4-.) \n"
-+#endif
- );
- 
- #ifndef SHARED
-@@ -53,13 +59,14 @@ START ": \n"
- "	add r0, r5 \n"
- "	mov r15, r4 \n"
- "	mov #-16, r0 \n"
--"	and r0, r15 \n"
--"	bsr " START "_c \n"
--"	nop \n"
-+"	mov.l 2f, r1 \n"
-+"3:	bsrf r1 \n"
-+"	 and r0, r15 \n"
- ".align 2 \n"
- ".weak _DYNAMIC \n"
- ".hidden _DYNAMIC \n"
- "1:	.long _DYNAMIC-. \n"
-+"2:	.long " START "_c@PCREL - (3b+4-.) \n"
- );
- 
- #endif
---- a/arch/sh/pthread_arch.h
-+++ b/arch/sh/pthread_arch.h
-@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
- #define TLS_ABOVE_TP
- #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
- 
--#define CANCEL_REG_IP 17
-+#define MC_PC sc_pc
---- a/arch/sh/reloc.h
-+++ b/arch/sh/reloc.h
-@@ -32,6 +32,8 @@
- #define REL_DTPOFF      R_SH_TLS_DTPOFF32
- #define REL_TPOFF       R_SH_TLS_TPOFF32
- 
-+#define DL_NOMMU_SUPPORT 1
-+
- #if __SH_FDPIC__
- #define REL_FUNCDESC    R_SH_FUNCDESC
- #define REL_FUNCDESC_VAL R_SH_FUNCDESC_VALUE
---- a/arch/sh/src/__set_thread_area.c
-+++ /dev/null
-@@ -1,34 +0,0 @@
--#include "pthread_impl.h"
--#include "libc.h"
--#include "sh_atomic.h"
--#include <elf.h>
--
--/* Also perform sh-specific init */
--
--#define CPU_HAS_LLSC 0x0040
--
--__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
--
--int __set_thread_area(void *p)
--{
--	size_t *aux;
--	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
--#ifndef __SH4A__
--	if (__hwcap & CPU_HAS_LLSC) {
--		__sh_atomic_model = SH_A_LLSC;
--		return 0;
--	}
--#if !defined(__SH3__) && !defined(__SH4__)
--	for (aux=libc.auxv; *aux; aux+=2) {
--		if (*aux != AT_PLATFORM) continue;
--		const char *s = (void *)aux[1];
--		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
--		__sh_atomic_model = SH_A_IMASK;
--		__sh_nommu = 1;
--		return 0;
--	}
--#endif
--	/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
--#endif
--	return 0;
--}
---- a/arch/sh/src/atomic.c
-+++ /dev/null
-@@ -1,158 +0,0 @@
--#ifndef __SH4A__
--
--#include "sh_atomic.h"
--#include "atomic.h"
--#include "libc.h"
--
--static inline unsigned mask()
--{
--	unsigned sr;
--	__asm__ __volatile__ ( "\n"
--	"	stc sr,r0 \n"
--	"	mov r0,%0 \n"
--	"	or #0xf0,r0 \n"
--	"	ldc r0,sr \n"
--	: "=&r"(sr) : : "memory", "r0" );
--	return sr;
--}
--
--static inline void unmask(unsigned sr)
--{
--	__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
--}
--
--/* gusa is a hack in the kernel which lets you create a sequence of instructions
-- * which will be restarted if the process is preempted in the middle of the
-- * sequence. It will do for implementing atomics on non-smp systems. ABI is:
-- * r0  = address of first instruction after the atomic sequence
-- * r1  = original stack pointer
-- * r15 = -1 * length of atomic sequence in bytes
-- */
--#define GUSA_CLOBBERS   "r0", "r1", "memory"
--#define GUSA_START(mem,old,nop)    \
--	"	.align 2\n"                \
--	"	mova 1f, r0\n"             \
--	nop                            \
--	"	mov r15, r1\n"             \
--	"	mov #(0f-1f), r15\n"       \
--	"0:	mov.l @" mem ", " old "\n"
--/* the target of mova must be 4 byte aligned, so we may need a nop */
--#define GUSA_START_ODD(mem,old)  GUSA_START(mem,old,"")
--#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n")
--#define GUSA_END(mem,new)          \
--	"	mov.l " new ", @" mem "\n" \
--	"1:	mov r1, r15\n"
--
--int __sh_cas(volatile int *p, int t, int s)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
--
--	if (__sh_atomic_model == SH_A_IMASK) {
--		unsigned sr = mask();
--		int old = *p;
--		if (old==t) *p = s;
--		unmask(sr);
--		return old;
--	}
--
--	int old;
--	__asm__ __volatile__(
--		GUSA_START_EVEN("%1", "%0")
--		"	cmp/eq %0, %2\n"
--		"	bf 1f\n"
--		GUSA_END("%1", "%3")
--		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
--	return old;
--}
--
--int __sh_swap(volatile int *x, int v)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
--
--	if (__sh_atomic_model == SH_A_IMASK) {
--		unsigned sr = mask();
--		int old = *x;
--		*x = v;
--		unmask(sr);
--		return old;
--	}
--
--	int old;
--	__asm__ __volatile__(
--		GUSA_START_EVEN("%1", "%0")
--		GUSA_END("%1", "%2")
--		: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
--	return old;
--}
--
--int __sh_fetch_add(volatile int *x, int v)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
--
--	if (__sh_atomic_model == SH_A_IMASK) {
--		unsigned sr = mask();
--		int old = *x;
--		*x = old + v;
--		unmask(sr);
--		return old;
--	}
--
--	int old, dummy;
--	__asm__ __volatile__(
--		GUSA_START_EVEN("%2", "%0")
--		"	mov %0, %1\n"
--		"	add %3, %1\n"
--		GUSA_END("%2", "%1")
--		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
--	return old;
--}
--
--void __sh_store(volatile int *p, int x)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
--	__asm__ __volatile__(
--		"	mov.l %1, @%0\n"
--		: : "r"(p), "r"(x) : "memory");
--}
--
--void __sh_and(volatile int *x, int v)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
--
--	if (__sh_atomic_model == SH_A_IMASK) {
--		unsigned sr = mask();
--		int old = *x;
--		*x = old & v;
--		unmask(sr);
--		return;
--	}
--
--	int dummy;
--	__asm__ __volatile__(
--		GUSA_START_ODD("%1", "%0")
--		"	and %2, %0\n"
--		GUSA_END("%1", "%0")
--		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
--}
--
--void __sh_or(volatile int *x, int v)
--{
--	if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
--
--	if (__sh_atomic_model == SH_A_IMASK) {
--		unsigned sr = mask();
--		int old = *x;
--		*x = old | v;
--		unmask(sr);
--		return;
--	}
--
--	int dummy;
--	__asm__ __volatile__(
--		GUSA_START_ODD("%1", "%0")
--		"	or %2, %0\n"
--		GUSA_END("%1", "%0")
--		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
--}
--
--#endif
---- a/arch/sh/src/sh_atomic.h
-+++ /dev/null
-@@ -1,15 +0,0 @@
--#ifndef _SH_ATOMIC_H
--#define _SH_ATOMIC_H
--
--#define SH_A_GUSA 0
--#define SH_A_LLSC 1
--#define SH_A_CAS 2
--#if !defined(__SH3__) && !defined(__SH4__)
--#define SH_A_IMASK 3
--#else
--#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
--#endif
--
--extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
--
--#endif
---- a/arch/x32/atomic.h
-+++ /dev/null
-@@ -1,105 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_64(uint64_t x)
--{
--	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
--	return x;
--}
--
--static inline int a_ctz_l(unsigned long x)
--{
--	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
--	return x;
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; and %1, %0"
--			 : "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; or %1, %0"
--			 : "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	__asm__( "lock ; or %1, %0"
--		: "=m"(*(long *)p) : "r"(v) : "memory" );
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	__asm__( "lock ; or %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	__asm__( "lock ; and %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
--}
--
--static inline void a_spin()
--{
--	__asm__ __volatile__( "pause" : : : "memory" );
--}
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__( "" : : : "memory" );
--}
--
--static inline void a_crash()
--{
--	__asm__ __volatile__( "hlt" : : : "memory" );
--}
--
--
--#endif
---- /dev/null
-+++ b/arch/x32/atomic_arch.h
-@@ -0,0 +1,106 @@
-+#define a_ctz_64 a_ctz_64
-+static inline int a_ctz_64(uint64_t x)
-+{
-+	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
-+	return x;
-+}
-+
-+#define a_ctz_l a_ctz_l
-+static inline int a_ctz_l(unsigned long x)
-+{
-+	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
-+	return x;
-+}
-+
-+#define a_and_64 a_and_64
-+static inline void a_and_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; and %1, %0"
-+			 : "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_or_64 a_or_64
-+static inline void a_or_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+			 : "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_or_l a_or_l
-+static inline void a_or_l(volatile void *p, long v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+		: "=m"(*(long *)p) : "r"(v) : "memory" );
-+}
-+
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	__asm__( "lock ; cmpxchg %3, %1"
-+		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
-+	return t;
-+}
-+
-+#define a_or a_or
-+static inline void a_or(volatile int *p, int v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_and a_and
-+static inline void a_and(volatile int *p, int v)
-+{
-+	__asm__( "lock ; and %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *x, int v)
-+{
-+	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *x, int v)
-+{
-+	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_inc a_inc
-+static inline void a_inc(volatile int *x)
-+{
-+	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_dec a_dec
-+static inline void a_dec(volatile int *x)
-+{
-+	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_store a_store
-+static inline void a_store(volatile int *p, int x)
-+{
-+	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
-+}
-+
-+#define a_spin a_spin
-+static inline void a_spin()
-+{
-+	__asm__ __volatile__( "pause" : : : "memory" );
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__( "" : : : "memory" );
-+}
-+
-+#define a_crash a_crash
-+static inline void a_crash()
-+{
-+	__asm__ __volatile__( "hlt" : : : "memory" );
-+}
---- a/arch/x32/pthread_arch.h
-+++ b/arch/x32/pthread_arch.h
-@@ -7,6 +7,6 @@ static inline struct pthread *__pthread_
- 
- #define TP_ADJ(p) (p)
- 
--#define CANCEL_REG_IP 32
-+#define MC_PC gregs[REG_RIP]
- 
- #define CANARY canary2
---- a/arch/x32/src/syscall_cp_fixup.c
-+++ b/arch/x32/src/syscall_cp_fixup.c
-@@ -1,8 +1,6 @@
- #include <sys/syscall.h>
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
- long __syscall_cp_internal(volatile void*, long long, long long, long long, long long,
-                              long long, long long, long long);
- 
-@@ -14,9 +12,7 @@ struct __timespec_kernel { long long tv_
- 	ts->tv_nsec = __tsc(X)->tv_nsec; \
- 	(X) = (unsigned long)ts; } } while(0)
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
- long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3,
- 	                     long long a4, long long a5, long long a6)
- {
---- a/arch/x86_64/atomic.h
-+++ /dev/null
-@@ -1,105 +0,0 @@
--#ifndef _INTERNAL_ATOMIC_H
--#define _INTERNAL_ATOMIC_H
--
--#include <stdint.h>
--
--static inline int a_ctz_64(uint64_t x)
--{
--	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
--	return x;
--}
--
--static inline int a_ctz_l(unsigned long x)
--{
--	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
--	return x;
--}
--
--static inline void a_and_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; and %1, %0"
--			 : "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or_64(volatile uint64_t *p, uint64_t v)
--{
--	__asm__( "lock ; or %1, %0"
--			 : "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_or_l(volatile void *p, long v)
--{
--	__asm__( "lock ; or %1, %0"
--		: "=m"(*(long *)p) : "r"(v) : "memory" );
--}
--
--static inline void *a_cas_p(volatile void *p, void *t, void *s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline int a_cas(volatile int *p, int t, int s)
--{
--	__asm__( "lock ; cmpxchg %3, %1"
--		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
--	return t;
--}
--
--static inline void a_or(volatile int *p, int v)
--{
--	__asm__( "lock ; or %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline void a_and(volatile int *p, int v)
--{
--	__asm__( "lock ; and %1, %0"
--		: "=m"(*p) : "r"(v) : "memory" );
--}
--
--static inline int a_swap(volatile int *x, int v)
--{
--	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--static inline int a_fetch_add(volatile int *x, int v)
--{
--	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
--	return v;
--}
--
--static inline void a_inc(volatile int *x)
--{
--	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_dec(volatile int *x)
--{
--	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
--}
--
--static inline void a_store(volatile int *p, int x)
--{
--	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
--}
--
--static inline void a_spin()
--{
--	__asm__ __volatile__( "pause" : : : "memory" );
--}
--
--static inline void a_barrier()
--{
--	__asm__ __volatile__( "" : : : "memory" );
--}
--
--static inline void a_crash()
--{
--	__asm__ __volatile__( "hlt" : : : "memory" );
--}
--
--
--#endif
---- /dev/null
-+++ b/arch/x86_64/atomic_arch.h
-@@ -0,0 +1,107 @@
-+#define a_ctz_64 a_ctz_64
-+static inline int a_ctz_64(uint64_t x)
-+{
-+	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
-+	return x;
-+}
-+
-+#define a_and_64 a_and_64
-+static inline void a_and_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; and %1, %0"
-+			 : "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_or_64 a_or_64
-+static inline void a_or_64(volatile uint64_t *p, uint64_t v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+			 : "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_or_l a_or_l
-+static inline void a_or_l(volatile void *p, long v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+		: "=m"(*(long *)p) : "r"(v) : "memory" );
-+}
-+
-+#define a_cas_p a_cas_p
-+static inline void *a_cas_p(volatile void *p, void *t, void *s)
-+{
-+	__asm__( "lock ; cmpxchg %3, %1"
-+		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
-+	return t;
-+}
-+
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	__asm__( "lock ; cmpxchg %3, %1"
-+		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
-+	return t;
-+}
-+
-+#define a_or a_or
-+static inline void a_or(volatile int *p, int v)
-+{
-+	__asm__( "lock ; or %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_and a_and
-+static inline void a_and(volatile int *p, int v)
-+{
-+	__asm__( "lock ; and %1, %0"
-+		: "=m"(*p) : "r"(v) : "memory" );
-+}
-+
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *x, int v)
-+{
-+	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *x, int v)
-+{
-+	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
-+	return v;
-+}
-+
-+#define a_inc a_inc
-+static inline void a_inc(volatile int *x)
-+{
-+	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_dec a_dec
-+static inline void a_dec(volatile int *x)
-+{
-+	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
-+}
-+
-+#define a_store a_store
-+static inline void a_store(volatile int *p, int x)
-+{
-+	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
-+}
-+
-+#define a_spin a_spin
-+static inline void a_spin()
-+{
-+	__asm__ __volatile__( "pause" : : : "memory" );
-+}
-+
-+#define a_barrier a_barrier
-+static inline void a_barrier()
-+{
-+	__asm__ __volatile__( "" : : : "memory" );
-+}
-+
-+#define a_crash a_crash
-+static inline void a_crash()
-+{
-+	__asm__ __volatile__( "hlt" : : : "memory" );
-+}
---- a/arch/x86_64/pthread_arch.h
-+++ b/arch/x86_64/pthread_arch.h
-@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
- 
- #define TP_ADJ(p) (p)
- 
--#define CANCEL_REG_IP 16
-+#define MC_PC gregs[REG_RIP]
---- a/configure
-+++ b/configure
-@@ -9,6 +9,9 @@ VAR=VALUE.  See below for descriptions o
- 
- Defaults for the options are specified in brackets.
- 
-+Configuration:
-+  --srcdir=DIR            source directory [detected]
-+
- Installation directories:
-   --prefix=PREFIX         main installation prefix [/usr/local/musl]
-   --exec-prefix=EPREFIX   installation prefix for executable files [PREFIX]
-@@ -117,6 +120,7 @@ CFLAGS_TRY=
- LDFLAGS_AUTO=
- LDFLAGS_TRY=
- OPTIMIZE_GLOBS=
-+srcdir=
- prefix=/usr/local/musl
- exec_prefix='$(prefix)'
- bindir='$(exec_prefix)/bin'
-@@ -139,6 +143,7 @@ clang_wrapper=no
- for arg ; do
- case "$arg" in
- --help) usage ;;
-+--srcdir=*) srcdir=${arg#*=} ;;
- --prefix=*) prefix=${arg#*=} ;;
- --exec-prefix=*) exec_prefix=${arg#*=} ;;
- --bindir=*) bindir=${arg#*=} ;;
-@@ -179,11 +184,23 @@ LIBCC=*) LIBCC=${arg#*=} ;;
- esac
- done
- 
--for i in prefix exec_prefix bindir libdir includedir syslibdir ; do
-+for i in srcdir prefix exec_prefix bindir libdir includedir syslibdir ; do
- stripdir $i
- done
- 
- #
-+# Get the source dir for out-of-tree builds
-+#
-+if test -z "$srcdir" ; then
-+srcdir="${0%/configure}"
-+stripdir srcdir
-+fi
-+abs_builddir="$(pwd)" || fail "$0: cannot determine working directory"
-+abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir"
-+test "$abs_srcdir" = "$abs_builddir" && srcdir=.
-+test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory"
-+
-+#
- # Get a temp filename we can use
- #
- i=0
-@@ -263,11 +280,11 @@ fi
- fi
- 
- if test "$gcc_wrapper" = yes ; then
--tools="$tools tools/musl-gcc"
-+tools="$tools obj/musl-gcc"
- tool_libs="$tool_libs lib/musl-gcc.specs"
- fi
- if test "$clang_wrapper" = yes ; then
--tools="$tools tools/musl-clang tools/ld.musl-clang"
-+tools="$tools obj/musl-clang obj/ld.musl-clang"
- fi
- 
- #
-@@ -321,7 +338,7 @@ __attribute__((__may_alias__))
- #endif
- x;
- EOF
--if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
-+if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \
-   -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
- printf "no\n"
- else
-@@ -330,6 +347,13 @@ CFLAGS_C99FSE="$CFLAGS_C99FSE -D__may_al
- fi
- 
- #
-+# The GNU toolchain defaults to assuming unmarked files need an
-+# executable stack, potentially exposing vulnerabilities in programs
-+# linked with such object files. Fix this.
-+#
-+tryflag CFLAGS_C99FSE -Wa,--noexecstack
-+
-+#
- # Check for options to disable stack protector, which needs to be
- # disabled for a few early-bootstrap translation units. If not found,
- # this is not an error; we assume the toolchain does not do ssp.
-@@ -430,11 +454,15 @@ tryflag CFLAGS_AUTO -fno-unwind-tables
- tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables
- 
- #
--# The GNU toolchain defaults to assuming unmarked files need an
--# executable stack, potentially exposing vulnerabilities in programs
--# linked with such object files. Fix this.
-+# Attempt to put each function and each data object in its own
-+# section. This both allows additional size optimizations at link
-+# time and works around a dangerous class of compiler/assembler bugs
-+# whereby relative address expressions are constant-folded by the
-+# assembler even when one or more of the symbols involved is
-+# replaceable. See gas pr 18561 and gcc pr 66609, 68178, etc.
- #
--tryflag CFLAGS_AUTO -Wa,--noexecstack
-+tryflag CFLAGS_AUTO -ffunction-sections
-+tryflag CFLAGS_AUTO -fdata-sections
- 
- #
- # On x86, make sure we don't have incompatible instruction set
-@@ -489,7 +517,7 @@ int foo(void) { }
- int bar(void) { fp = foo; return foo(); }
- EOF
- if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \
--  -DSHARED -fPIC -I./src/internal -include vis.h \
-+  -DSHARED -fPIC -I$srcdir/src/internal -include vis.h \
-   -nostdlib -shared -Wl,-Bsymbolic-functions \
-   -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
- visibility=yes
-@@ -504,6 +532,16 @@ CFLAGS_AUTO="$CFLAGS_AUTO -include vis.h
- CFLAGS_AUTO="${CFLAGS_AUTO# }"
- fi
- 
-+# Reduce space lost to padding for alignment purposes by sorting data
-+# objects according to their alignment reqirements. This approximates
-+# optimal packing.
-+tryldflag LDFLAGS_AUTO -Wl,--sort-section,alignment
-+tryldflag LDFLAGS_AUTO -Wl,--sort-common
-+
-+# When linking shared library, drop dummy weak definitions that were
-+# replaced by strong definitions from other translation units.
-+tryldflag LDFLAGS_AUTO -Wl,--gc-sections
-+
- # Some patched GCC builds have these defaults messed up...
- tryldflag LDFLAGS_AUTO -Wl,--hash-style=both
- 
-@@ -513,6 +551,11 @@ tryldflag LDFLAGS_AUTO -Wl,--hash-style=
- # runtime library; implementation error is also a possibility.
- tryldflag LDFLAGS_AUTO -Wl,--no-undefined
- 
-+# Avoid exporting symbols from compiler runtime libraries. They
-+# should be hidden anyway, but some toolchains including old gcc
-+# versions built without shared library support and pcc are broken.
-+tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL
-+
- test "$shared" = "no" || {
- # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions
- LDFLAGS_DUMMY=
-@@ -599,7 +642,7 @@ echo '#include <float.h>' > "$tmpc"
- echo '#if LDBL_MANT_DIG == 53' >> "$tmpc"
- echo 'typedef char ldcheck[9-(int)sizeof(long double)];' >> "$tmpc"
- echo '#endif' >> "$tmpc"
--if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
-+if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \
-   -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
- printf "yes\n"
- else
-@@ -622,6 +665,7 @@ cat << EOF
- ARCH = $ARCH
- SUBARCH = $SUBARCH
- ASMSUBARCH = $ASMSUBARCH
-+srcdir = $srcdir
- prefix = $prefix
- exec_prefix = $exec_prefix
- bindir = $bindir
-@@ -629,12 +673,14 @@ libdir = $libdir
- includedir = $includedir
- syslibdir = $syslibdir
- CC = $CC
--CFLAGS = $CFLAGS_AUTO $CFLAGS
-+CFLAGS = $CFLAGS
-+CFLAGS_AUTO = $CFLAGS_AUTO
- CFLAGS_C99FSE = $CFLAGS_C99FSE
- CFLAGS_MEMOPS = $CFLAGS_MEMOPS
- CFLAGS_NOSSP = $CFLAGS_NOSSP
- CPPFLAGS = $CPPFLAGS
--LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
-+LDFLAGS = $LDFLAGS
-+LDFLAGS_AUTO = $LDFLAGS_AUTO
- CROSS_COMPILE = $CROSS_COMPILE
- LIBCC = $LIBCC
- OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
-@@ -648,4 +694,6 @@ test "x$cc_family" = xgcc && echo 'WRAPC
- test "x$cc_family" = xclang && echo 'WRAPCC_CLANG = $(CC)'
- exec 1>&3 3>&-
- 
-+test "$srcdir" = "." || ln -sf $srcdir/Makefile .
-+
- printf "done\n"
---- a/crt/arm/crti.s
-+++ b/crt/arm/crti.s
-@@ -1,3 +1,5 @@
-+.syntax unified
-+
- .section .init
- .global _init
- .type _init,%function
---- a/crt/arm/crtn.s
-+++ b/crt/arm/crtn.s
-@@ -1,11 +1,9 @@
-+.syntax unified
-+
- .section .init
- 	pop {r0,lr}
--	tst lr,#1
--	moveq pc,lr
- 	bx lr
- 
- .section .fini
- 	pop {r0,lr}
--	tst lr,#1
--	moveq pc,lr
- 	bx lr
---- a/include/complex.h
-+++ b/include/complex.h
-@@ -116,7 +116,7 @@ long double creall(long double complex);
- 
- #if __STDC_VERSION__ >= 201112L
- #if defined(_Imaginary_I)
--#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)))
-+#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))
- #elif defined(__clang__)
- #define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) })
- #else
---- a/include/netinet/tcp.h
-+++ b/include/netinet/tcp.h
-@@ -41,7 +41,20 @@
- #define TCP_CLOSING      11
- 
- #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-+#define TCPOPT_EOL              0
-+#define TCPOPT_NOP              1
-+#define TCPOPT_MAXSEG           2
-+#define TCPOPT_WINDOW           3
-+#define TCPOPT_SACK_PERMITTED   4
-+#define TCPOPT_SACK             5
-+#define TCPOPT_TIMESTAMP        8
-+#define TCPOLEN_SACK_PERMITTED  2
-+#define TCPOLEN_WINDOW          3
-+#define TCPOLEN_MAXSEG          4
-+#define TCPOLEN_TIMESTAMP       10
-+
- #define SOL_TCP 6
-+
- #include <sys/types.h>
- #include <sys/socket.h>
- #include <stdint.h>
---- a/src/env/__init_tls.c
-+++ b/src/env/__init_tls.c
-@@ -8,9 +8,6 @@
- #include "atomic.h"
- #include "syscall.h"
- 
--#ifndef SHARED
--static
--#endif
- int __init_tp(void *p)
- {
- 	pthread_t td = p;
-@@ -24,8 +21,6 @@ int __init_tp(void *p)
- 	return 0;
- }
- 
--#ifndef SHARED
--
- static struct builtin_tls {
- 	char c;
- 	struct pthread pt;
-@@ -33,33 +28,40 @@ static struct builtin_tls {
- } builtin_tls[1];
- #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
- 
--struct tls_image {
--	void *image;
--	size_t len, size, align;
--} __static_tls;
--
--#define T __static_tls
-+static struct tls_module main_tls;
- 
- void *__copy_tls(unsigned char *mem)
- {
- 	pthread_t td;
--	if (!T.image) return mem;
--	void **dtv = (void *)mem;
--	dtv[0] = (void *)1;
-+	struct tls_module *p;
-+	size_t i;
-+	void **dtv;
-+
- #ifdef TLS_ABOVE_TP
--	mem += sizeof(void *) * 2;
--	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
-+	dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
-+
-+	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
- 	td = (pthread_t)mem;
- 	mem += sizeof(struct pthread);
-+
-+	for (i=1, p=libc.tls_head; p; i++, p=p->next) {
-+		dtv[i] = mem + p->offset;
-+		memcpy(dtv[i], p->image, p->len);
-+	}
- #else
-+	dtv = (void **)mem;
-+
- 	mem += libc.tls_size - sizeof(struct pthread);
--	mem -= (uintptr_t)mem & (T.align-1);
-+	mem -= (uintptr_t)mem & (libc.tls_align-1);
- 	td = (pthread_t)mem;
--	mem -= T.size;
-+
-+	for (i=1, p=libc.tls_head; p; i++, p=p->next) {
-+		dtv[i] = mem - p->offset;
-+		memcpy(dtv[i], p->image, p->len);
-+	}
- #endif
-+	dtv[0] = (void *)libc.tls_cnt;
- 	td->dtv = td->dtv_copy = dtv;
--	dtv[1] = mem;
--	memcpy(mem, T.image, T.len);
- 	return td;
- }
- 
-@@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
- typedef Elf64_Phdr Phdr;
- #endif
- 
--void __init_tls(size_t *aux)
-+static void static_init_tls(size_t *aux)
- {
- 	unsigned char *p;
- 	size_t n;
-@@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
- 	}
- 
- 	if (tls_phdr) {
--		T.image = (void *)(base + tls_phdr->p_vaddr);
--		T.len = tls_phdr->p_filesz;
--		T.size = tls_phdr->p_memsz;
--		T.align = tls_phdr->p_align;
-+		main_tls.image = (void *)(base + tls_phdr->p_vaddr);
-+		main_tls.len = tls_phdr->p_filesz;
-+		main_tls.size = tls_phdr->p_memsz;
-+		main_tls.align = tls_phdr->p_align;
-+		libc.tls_cnt = 1;
-+		libc.tls_head = &main_tls;
- 	}
- 
--	T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
--	if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
-+	main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
-+		& (main_tls.align-1);
-+	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
-+#ifndef TLS_ABOVE_TP
-+	main_tls.offset = main_tls.size;
-+#endif
- 
--	libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
-+	libc.tls_align = main_tls.align;
-+	libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
-+		+ main_tls.size + main_tls.align
- 		+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
- 
- 	if (libc.tls_size > sizeof builtin_tls) {
-@@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
- 	if (__init_tp(__copy_tls(mem)) < 0)
- 		a_crash();
- }
--#else
--void __init_tls(size_t *auxv) { }
--#endif
-+
-+weak_alias(static_init_tls, __init_tls);
---- a/src/env/__libc_start_main.c
-+++ b/src/env/__libc_start_main.c
-@@ -8,21 +8,17 @@
- 
- void __init_tls(size_t *);
- 
--#ifndef SHARED
--static void dummy() {}
-+static void dummy(void) {}
- weak_alias(dummy, _init);
--extern void (*const __init_array_start)() __attribute__((weak));
--extern void (*const __init_array_end)() __attribute__((weak));
--#endif
-+
-+__attribute__((__weak__, __visibility__("hidden")))
-+extern void (*const __init_array_start)(void), (*const __init_array_end)(void);
- 
- static void dummy1(void *p) {}
- weak_alias(dummy1, __init_ssp);
- 
- #define AUX_CNT 38
- 
--#ifndef SHARED
--static
--#endif
- void __init_libc(char **envp, char *pn)
- {
- 	size_t i, *auxv, aux[AUX_CNT] = { 0 };
-@@ -57,20 +53,22 @@ void __init_libc(char **envp, char *pn)
- 	libc.secure = 1;
- }
- 
--int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
-+static void libc_start_init(void)
- {
--	char **envp = argv+argc+1;
--
--#ifndef SHARED
--	__init_libc(envp, argv[0]);
- 	_init();
- 	uintptr_t a = (uintptr_t)&__init_array_start;
- 	for (; a<(uintptr_t)&__init_array_end; a+=sizeof(void(*)()))
- 		(*(void (**)())a)();
--#else
--	void __libc_start_init(void);
-+}
-+
-+weak_alias(libc_start_init, __libc_start_init);
-+
-+int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
-+{
-+	char **envp = argv+argc+1;
-+
-+	__init_libc(envp, argv[0]);
- 	__libc_start_init();
--#endif
- 
- 	/* Pass control to the application */
- 	exit(main(argc, argv, envp));
---- a/src/env/__reset_tls.c
-+++ b/src/env/__reset_tls.c
-@@ -1,21 +1,16 @@
--#ifndef SHARED
--
- #include <string.h>
- #include "pthread_impl.h"
--
--extern struct tls_image {
--	void *image;
--	size_t len, size, align;
--} __static_tls;
--
--#define T __static_tls
-+#include "libc.h"
- 
- void __reset_tls()
- {
--	if (!T.size) return;
- 	pthread_t self = __pthread_self();
--	memcpy(self->dtv[1], T.image, T.len);
--	memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
-+	struct tls_module *p;
-+	size_t i, n = (size_t)self->dtv[0];
-+	if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
-+		if (!self->dtv[i]) continue;
-+		memcpy(self->dtv[i], p->image, p->len);
-+		memset((char *)self->dtv[i]+p->len, 0,
-+			p->size - p->len);
-+	}
- }
--
--#endif
---- a/src/env/__stack_chk_fail.c
-+++ b/src/env/__stack_chk_fail.c
-@@ -17,16 +17,7 @@ void __stack_chk_fail(void)
- 	a_crash();
- }
- 
--#ifdef SHARED
--
- __attribute__((__visibility__("hidden")))
--void __stack_chk_fail_local(void)
--{
--	a_crash();
--}
--
--#else
-+void __stack_chk_fail_local(void);
- 
- weak_alias(__stack_chk_fail, __stack_chk_fail_local);
--
--#endif
---- /dev/null
-+++ b/src/exit/arm/__aeabi_atexit.c
-@@ -0,0 +1,6 @@
-+int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
-+
-+int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
-+{
-+	return __cxa_atexit (func, obj, d);
-+}
---- a/src/exit/exit.c
-+++ b/src/exit/exit.c
-@@ -10,25 +10,25 @@ static void dummy()
-  * as a consequence of linking either __toread.c or __towrite.c. */
- weak_alias(dummy, __funcs_on_exit);
- weak_alias(dummy, __stdio_exit);
--
--#ifndef SHARED
- weak_alias(dummy, _fini);
--extern void (*const __fini_array_start)() __attribute__((weak));
--extern void (*const __fini_array_end)() __attribute__((weak));
--#endif
- 
--_Noreturn void exit(int code)
--{
--	__funcs_on_exit();
-+__attribute__((__weak__, __visibility__("hidden")))
-+extern void (*const __fini_array_start)(void), (*const __fini_array_end)(void);
- 
--#ifndef SHARED
-+static void libc_exit_fini(void)
-+{
- 	uintptr_t a = (uintptr_t)&__fini_array_end;
- 	for (; a>(uintptr_t)&__fini_array_start; a-=sizeof(void(*)()))
- 		(*(void (**)())(a-sizeof(void(*)())))();
- 	_fini();
--#endif
-+}
- 
--	__stdio_exit();
-+weak_alias(libc_exit_fini, __libc_exit_fini);
- 
-+_Noreturn void exit(int code)
-+{
-+	__funcs_on_exit();
-+	__libc_exit_fini();
-+	__stdio_exit();
- 	_Exit(code);
- }
---- /dev/null
-+++ b/src/fenv/arm/fenv-hf.S
-@@ -0,0 +1,69 @@
-+#if __ARM_PCS_VFP
-+
-+.syntax unified
-+.fpu vfp
-+
-+.global fegetround
-+.type fegetround,%function
-+fegetround:
-+	fmrx r0, fpscr
-+	and r0, r0, #0xc00000
-+	bx lr
-+
-+.global __fesetround
-+.type __fesetround,%function
-+__fesetround:
-+	fmrx r3, fpscr
-+	bic r3, r3, #0xc00000
-+	orr r3, r3, r0
-+	fmxr fpscr, r3
-+	mov r0, #0
-+	bx lr
-+
-+.global fetestexcept
-+.type fetestexcept,%function
-+fetestexcept:
-+	and r0, r0, #0x1f
-+	fmrx r3, fpscr
-+	and r0, r0, r3
-+	bx lr
-+
-+.global feclearexcept
-+.type feclearexcept,%function
-+feclearexcept:
-+	and r0, r0, #0x1f
-+	fmrx r3, fpscr
-+	bic r3, r3, r0
-+	fmxr fpscr, r3
-+	mov r0, #0
-+	bx lr
-+
-+.global feraiseexcept
-+.type feraiseexcept,%function
-+feraiseexcept:
-+	and r0, r0, #0x1f
-+	fmrx r3, fpscr
-+	orr r3, r3, r0
-+	fmxr fpscr, r3
-+	mov r0, #0
-+	bx lr
-+
-+.global fegetenv
-+.type fegetenv,%function
-+fegetenv:
-+	fmrx r3, fpscr
-+	str r3, [r0]
-+	mov r0, #0
-+	bx lr
-+
-+.global fesetenv
-+.type fesetenv,%function
-+fesetenv:
-+	cmn r0, #1
-+	moveq r3, #0
-+	ldrne r3, [r0]
-+	fmxr fpscr, r3
-+	mov r0, #0
-+	bx lr
-+
-+#endif
---- /dev/null
-+++ b/src/fenv/arm/fenv.c
-@@ -0,0 +1,3 @@
-+#if !__ARM_PCS_VFP
-+#include "../fenv.c"
-+#endif
---- a/src/fenv/armebhf/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armhf/fenv.s
---- a/src/fenv/armhf/fenv.s
-+++ /dev/null
-@@ -1,64 +0,0 @@
--.fpu vfp
--
--.global fegetround
--.type fegetround,%function
--fegetround:
--	mrc p10, 7, r0, cr1, cr0, 0
--	and r0, r0, #0xc00000
--	bx lr
--
--.global __fesetround
--.type __fesetround,%function
--__fesetround:
--	mrc p10, 7, r3, cr1, cr0, 0
--	bic r3, r3, #0xc00000
--	orr r3, r3, r0
--	mcr p10, 7, r3, cr1, cr0, 0
--	mov r0, #0
--	bx lr
--
--.global fetestexcept
--.type fetestexcept,%function
--fetestexcept:
--	and r0, r0, #0x1f
--	mrc p10, 7, r3, cr1, cr0, 0
--	and r0, r0, r3
--	bx lr
--
--.global feclearexcept
--.type feclearexcept,%function
--feclearexcept:
--	and r0, r0, #0x1f
--	mrc p10, 7, r3, cr1, cr0, 0
--	bic r3, r3, r0
--	mcr p10, 7, r3, cr1, cr0, 0
--	mov r0, #0
--	bx lr
--
--.global feraiseexcept
--.type feraiseexcept,%function
--feraiseexcept:
--	and r0, r0, #0x1f
--	mrc p10, 7, r3, cr1, cr0, 0
--	orr r3, r3, r0
--	mcr p10, 7, r3, cr1, cr0, 0
--	mov r0, #0
--	bx lr
--
--.global fegetenv
--.type fegetenv,%function
--fegetenv:
--	mrc p10, 7, r3, cr1, cr0, 0
--	str r3, [r0]
--	mov r0, #0
--	bx lr
--
--.global fesetenv
--.type fesetenv,%function
--fesetenv:
--	cmn r0, #1
--	moveq r3, #0
--	ldrne r3, [r0]
--	mcr p10, 7, r3, cr1, cr0, 0
--	mov r0, #0
--	bx lr
---- a/src/fenv/armhf/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--fenv.s
---- a/src/fenv/mips-sf/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../fenv.c
---- /dev/null
-+++ b/src/fenv/mips/fenv-sf.c
-@@ -0,0 +1,3 @@
-+#ifdef __mips_soft_float
-+#include "../fenv.c"
-+#endif
---- /dev/null
-+++ b/src/fenv/mips/fenv.S
-@@ -0,0 +1,71 @@
-+#ifndef __mips_soft_float
-+
-+.set noreorder
-+
-+.global feclearexcept
-+.type  feclearexcept,@function
-+feclearexcept:
-+	and     $4, $4, 0x7c
-+	cfc1    $5, $31
-+	or      $5, $5, $4
-+	xor     $5, $5, $4
-+	ctc1    $5, $31
-+	jr      $ra
-+	li      $2, 0
-+
-+.global feraiseexcept
-+.type  feraiseexcept,@function
-+feraiseexcept:
-+	and     $4, $4, 0x7c
-+	cfc1    $5, $31
-+	or      $5, $5, $4
-+	ctc1    $5, $31
-+	jr      $ra
-+	li      $2, 0
-+
-+.global fetestexcept
-+.type  fetestexcept,@function
-+fetestexcept:
-+	and     $4, $4, 0x7c
-+	cfc1    $2, $31
-+	jr      $ra
-+	and     $2, $2, $4
-+
-+.global fegetround
-+.type  fegetround,@function
-+fegetround:
-+	cfc1    $2, $31
-+	jr      $ra
-+	andi    $2, $2, 3
-+
-+.global __fesetround
-+.type __fesetround,@function
-+__fesetround:
-+	cfc1    $5, $31
-+	li      $6, -4
-+	and     $5, $5, $6
-+	or      $5, $5, $4
-+	ctc1    $5, $31
-+	jr      $ra
-+	li      $2, 0
-+
-+.global fegetenv
-+.type  fegetenv,@function
-+fegetenv:
-+	cfc1    $5, $31
-+	sw      $5, 0($4)
-+	jr      $ra
-+	li      $2, 0
-+
-+.global fesetenv
-+.type  fesetenv,@function
-+fesetenv:
-+	addiu   $5, $4, 1
-+	beq     $5, $0, 1f
-+	 nop
-+	lw      $5, 0($4)
-+1:	ctc1    $5, $31
-+	jr      $ra
-+	li      $2, 0
-+
-+#endif
---- a/src/fenv/mips/fenv.s
-+++ /dev/null
-@@ -1,67 +0,0 @@
--.set noreorder
--
--.global feclearexcept
--.type  feclearexcept,@function
--feclearexcept:
--	and     $4, $4, 0x7c
--	cfc1    $5, $31
--	or      $5, $5, $4
--	xor     $5, $5, $4
--	ctc1    $5, $31
--	jr      $ra
--	li      $2, 0
--
--.global feraiseexcept
--.type  feraiseexcept,@function
--feraiseexcept:
--	and     $4, $4, 0x7c
--	cfc1    $5, $31
--	or      $5, $5, $4
--	ctc1    $5, $31
--	jr      $ra
--	li      $2, 0
--
--.global fetestexcept
--.type  fetestexcept,@function
--fetestexcept:
--	and     $4, $4, 0x7c
--	cfc1    $2, $31
--	jr      $ra
--	and     $2, $2, $4
--
--.global fegetround
--.type  fegetround,@function
--fegetround:
--	cfc1    $2, $31
--	jr      $ra
--	andi    $2, $2, 3
--
--.global __fesetround
--.type __fesetround,@function
--__fesetround:
--	cfc1    $5, $31
--	li      $6, -4
--	and     $5, $5, $6
--	or      $5, $5, $4
--	ctc1    $5, $31
--	jr      $ra
--	li      $2, 0
--
--.global fegetenv
--.type  fegetenv,@function
--fegetenv:
--	cfc1    $5, $31
--	sw      $5, 0($4)
--	jr      $ra
--	li      $2, 0
--
--.global fesetenv
--.type  fesetenv,@function
--fesetenv:
--	addiu   $5, $4, 1
--	beq     $5, $0, 1f
--	 nop
--	lw      $5, 0($4)
--1:	ctc1    $5, $31
--	jr      $ra
--	li      $2, 0
---- a/src/fenv/mipsel-sf/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../fenv.c
---- a/src/fenv/sh-nofpu/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../fenv.c
---- /dev/null
-+++ b/src/fenv/sh/fenv-nofpu.c
-@@ -0,0 +1,3 @@
-+#if !__SH_FPU_ANY__ && !__SH4__
-+#include "../fenv.c"
-+#endif
---- /dev/null
-+++ b/src/fenv/sh/fenv.S
-@@ -0,0 +1,78 @@
-+#if __SH_FPU_ANY__ || __SH4__
-+
-+.global fegetround
-+.type   fegetround, @function
-+fegetround:
-+	sts fpscr, r0
-+	rts
-+	 and #3, r0
-+
-+.global __fesetround
-+.type   __fesetround, @function
-+__fesetround:
-+	sts fpscr, r0
-+	or  r4, r0
-+	lds r0, fpscr
-+	rts
-+	 mov #0, r0
-+
-+.global fetestexcept
-+.type   fetestexcept, @function
-+fetestexcept:
-+	sts fpscr, r0
-+	and r4, r0
-+	rts
-+	 and #0x7c, r0
-+
-+.global feclearexcept
-+.type   feclearexcept, @function
-+feclearexcept:
-+	mov r4, r0
-+	and #0x7c, r0
-+	not r0, r4
-+	sts fpscr, r0
-+	and r4, r0
-+	lds r0, fpscr
-+	rts
-+	 mov #0, r0
-+
-+.global feraiseexcept
-+.type   feraiseexcept, @function
-+feraiseexcept:
-+	mov r4, r0
-+	and #0x7c, r0
-+	sts fpscr, r4
-+	or  r4, r0
-+	lds r0, fpscr
-+	rts
-+	 mov #0, r0
-+
-+.global fegetenv
-+.type   fegetenv, @function
-+fegetenv:
-+	sts fpscr, r0
-+	mov.l r0, @r4
-+	rts
-+	 mov #0, r0
-+
-+.global fesetenv
-+.type   fesetenv, @function
-+fesetenv:
-+	mov r4, r0
-+	cmp/eq #-1, r0
-+	bf 1f
-+
-+	! the default environment is complicated by the fact that we need to
-+	! preserve the current precision bit, which we do not know a priori
-+	sts fpscr, r0
-+	mov #8, r1
-+	swap.w r1, r1
-+	bra 2f
-+	 and r1, r0
-+
-+1:	mov.l @r4, r0      ! non-default environment
-+2:	lds r0, fpscr
-+	rts
-+	 mov #0, r0
-+
-+#endif
---- a/src/fenv/sh/fenv.s
-+++ /dev/null
-@@ -1,74 +0,0 @@
--.global fegetround
--.type   fegetround, @function
--fegetround:
--	sts fpscr, r0
--	rts
--	 and #3, r0
--
--.global __fesetround
--.type   __fesetround, @function
--__fesetround:
--	sts fpscr, r0
--	or  r4, r0
--	lds r0, fpscr
--	rts
--	 mov #0, r0
--
--.global fetestexcept
--.type   fetestexcept, @function
--fetestexcept:
--	sts fpscr, r0
--	and r4, r0
--	rts
--	 and #0x7c, r0
--
--.global feclearexcept
--.type   feclearexcept, @function
--feclearexcept:
--	mov r4, r0
--	and #0x7c, r0
--	not r0, r4
--	sts fpscr, r0
--	and r4, r0
--	lds r0, fpscr
--	rts
--	 mov #0, r0
--
--.global feraiseexcept
--.type   feraiseexcept, @function
--feraiseexcept:
--	mov r4, r0
--	and #0x7c, r0
--	sts fpscr, r4
--	or  r4, r0
--	lds r0, fpscr
--	rts
--	 mov #0, r0
--
--.global fegetenv
--.type   fegetenv, @function
--fegetenv:
--	sts fpscr, r0
--	mov.l r0, @r4
--	rts
--	 mov #0, r0
--
--.global fesetenv
--.type   fesetenv, @function
--fesetenv:
--	mov r4, r0
--	cmp/eq #-1, r0
--	bf 1f
--
--	! the default environment is complicated by the fact that we need to
--	! preserve the current precision bit, which we do not know a priori
--	sts fpscr, r0
--	mov #8, r1
--	swap.w r1, r1
--	bra 2f
--	 and r1, r0
--
--1:	mov.l @r4, r0      ! non-default environment
--2:	lds r0, fpscr
--	rts
--	 mov #0, r0
---- a/src/fenv/sheb-nofpu/fenv.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../fenv.c
---- a/src/internal/arm/syscall.s
-+++ b/src/internal/arm/syscall.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .global __syscall
- .hidden __syscall
- .type __syscall,%function
-@@ -11,6 +12,4 @@ __syscall:
- 	ldmfd ip,{r3,r4,r5,r6}
- 	svc 0
- 	ldmfd sp!,{r4,r5,r6,r7}
--	tst lr,#1
--	moveq pc,lr
- 	bx lr
---- /dev/null
-+++ b/src/internal/atomic.h
-@@ -0,0 +1,275 @@
-+#ifndef _ATOMIC_H
-+#define _ATOMIC_H
-+
-+#include <stdint.h>
-+
-+#include "atomic_arch.h"
-+
-+#ifdef a_ll
-+
-+#ifndef a_pre_llsc
-+#define a_pre_llsc()
-+#endif
-+
-+#ifndef a_post_llsc
-+#define a_post_llsc()
-+#endif
-+
-+#ifndef a_cas
-+#define a_cas a_cas
-+static inline int a_cas(volatile int *p, int t, int s)
-+{
-+	int old;
-+	a_pre_llsc();
-+	do old = a_ll(p);
-+	while (old==t && !a_sc(p, s));
-+	a_post_llsc();
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_swap
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *p, int v)
-+{
-+	int old;
-+	a_pre_llsc();
-+	do old = a_ll(p);
-+	while (!a_sc(p, v));
-+	a_post_llsc();
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_fetch_add
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *p, int v)
-+{
-+	int old;
-+	a_pre_llsc();
-+	do old = a_ll(p);
-+	while (!a_sc(p, (unsigned)old + v));
-+	a_post_llsc();
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_fetch_and
-+#define a_fetch_and a_fetch_and
-+static inline int a_fetch_and(volatile int *p, int v)
-+{
-+	int old;
-+	a_pre_llsc();
-+	do old = a_ll(p);
-+	while (!a_sc(p, old & v));
-+	a_post_llsc();
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_fetch_or
-+#define a_fetch_or a_fetch_or
-+static inline int a_fetch_or(volatile int *p, int v)
-+{
-+	int old;
-+	a_pre_llsc();
-+	do old = a_ll(p);
-+	while (!a_sc(p, old | v));
-+	a_post_llsc();
-+	return old;
-+}
-+#endif
-+
-+#endif
-+
-+#ifndef a_cas
-+#error missing definition of a_cas
-+#endif
-+
-+#ifndef a_swap
-+#define a_swap a_swap
-+static inline int a_swap(volatile int *p, int v)
-+{
-+	int old;
-+	do old = *p;
-+	while (a_cas(p, old, v) != old);
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_fetch_add
-+#define a_fetch_add a_fetch_add
-+static inline int a_fetch_add(volatile int *p, int v)
-+{
-+	int old;
-+	do old = *p;
-+	while (a_cas(p, old, (unsigned)old+v) != old);
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_fetch_and
-+#define a_fetch_and a_fetch_and
-+static inline int a_fetch_and(volatile int *p, int v)
-+{
-+	int old;
-+	do old = *p;
-+	while (a_cas(p, old, old&v) != old);
-+	return old;
-+}
-+#endif
-+#ifndef a_fetch_or
-+#define a_fetch_or a_fetch_or
-+static inline int a_fetch_or(volatile int *p, int v)
-+{
-+	int old;
-+	do old = *p;
-+	while (a_cas(p, old, old|v) != old);
-+	return old;
-+}
-+#endif
-+
-+#ifndef a_and
-+#define a_and a_and
-+static inline void a_and(volatile int *p, int v)
-+{
-+	a_fetch_and(p, v);
-+}
-+#endif
-+
-+#ifndef a_or
-+#define a_or a_or
-+static inline void a_or(volatile int *p, int v)
-+{
-+	a_fetch_or(p, v);
-+}
-+#endif
-+
-+#ifndef a_inc
-+#define a_inc a_inc
-+static inline void a_inc(volatile int *p)
-+{
-+	a_fetch_add(p, 1);
-+}
-+#endif
-+
-+#ifndef a_dec
-+#define a_dec a_dec
-+static inline void a_dec(volatile int *p)
-+{
-+	a_fetch_add(p, -1);
-+}
-+#endif
-+
-+#ifndef a_store
-+#define a_store a_store
-+static inline void a_store(volatile int *p, int v)
-+{
-+#ifdef a_barrier
-+	a_barrier();
-+	*p = v;
-+	a_barrier();
-+#else
-+	a_swap(p, v);
-+#endif
-+}
-+#endif
-+
-+#ifndef a_barrier
-+#define a_barrier a_barrier
-+static void a_barrier()
-+{
-+	volatile int tmp = 0;
-+	a_cas(&tmp, 0, 0);
-+}
-+#endif
-+
-+#ifndef a_spin
-+#define a_spin a_barrier
-+#endif
-+
-+#ifndef a_and_64
-+#define a_and_64 a_and_64
-+static inline void a_and_64(volatile uint64_t *p, uint64_t v)
-+{
-+	union { uint64_t v; uint32_t r[2]; } u = { v };
-+	if (u.r[0]+1) a_and((int *)p, u.r[0]);
-+	if (u.r[1]+1) a_and((int *)p+1, u.r[1]);
-+}
-+#endif
-+
-+#ifndef a_or_64
-+#define a_or_64 a_or_64
-+static inline void a_or_64(volatile uint64_t *p, uint64_t v)
-+{
-+	union { uint64_t v; uint32_t r[2]; } u = { v };
-+	if (u.r[0]) a_or((int *)p, u.r[0]);
-+	if (u.r[1]) a_or((int *)p+1, u.r[1]);
-+}
-+#endif
-+
-+#ifndef a_cas_p
-+#define a_cas_p a_cas_p
-+static inline void *a_cas_p(volatile void *p, void *t, void *s)
-+{
-+	return (void *)a_cas((volatile int *)p, (int)t, (int)s);
-+}
-+#endif
-+
-+#ifndef a_or_l
-+#define a_or_l a_or_l
-+static inline void a_or_l(volatile void *p, long v)
-+{
-+	if (sizeof(long) == sizeof(int)) a_or(p, v);
-+	else a_or_64(p, v);
-+}
-+#endif
-+
-+#ifndef a_crash
-+#define a_crash a_crash
-+static inline void a_crash()
-+{
-+	*(volatile char *)0=0;
-+}
-+#endif
-+
-+#ifndef a_ctz_64
-+#define a_ctz_64 a_ctz_64
-+static inline int a_ctz_64(uint64_t x)
-+{
-+	static const char debruijn64[64] = {
-+		0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
-+		62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
-+		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
-+		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
-+	};
-+	static const char debruijn32[32] = {
-+		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-+		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-+	};
-+	if (sizeof(long) < 8) {
-+		uint32_t y = x;
-+		if (!y) {
-+			y = x>>32;
-+			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
-+		}
-+		return debruijn32[(y&-y)*0x076be629 >> 27];
-+	}
-+	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
-+}
-+#endif
-+
-+#ifndef a_ctz_l
-+#define a_ctz_l a_ctz_l
-+static inline int a_ctz_l(unsigned long x)
-+{
-+	static const char debruijn32[32] = {
-+		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-+		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-+	};
-+	if (sizeof(long) == 8) return a_ctz_64(x);
-+	return debruijn32[(x&-x)*0x076be629 >> 27];
-+}
-+#endif
-+
-+#endif
---- a/src/internal/dynlink.h
-+++ b/src/internal/dynlink.h
-@@ -64,6 +64,10 @@ struct fdpic_dummy_loadmap {
- #define DL_FDPIC 0
- #endif
- 
-+#ifndef DL_NOMMU_SUPPORT
-+#define DL_NOMMU_SUPPORT 0
-+#endif
-+
- #if !DL_FDPIC
- #define IS_RELATIVE(x,s) ( \
- 	(R_TYPE(x) == REL_RELATIVE) || \
---- a/src/internal/libc.h
-+++ b/src/internal/libc.h
-@@ -11,13 +11,20 @@ struct __locale_struct {
- 	const struct __locale_map *volatile cat[6];
- };
- 
-+struct tls_module {
-+	struct tls_module *next;
-+	void *image;
-+	size_t len, size, align, offset;
-+};
-+
- struct __libc {
- 	int can_do_threads;
- 	int threaded;
- 	int secure;
- 	volatile int threads_minus_1;
- 	size_t *auxv;
--	size_t tls_size;
-+	struct tls_module *tls_head;
-+	size_t tls_size, tls_align, tls_cnt;
- 	size_t page_size;
- 	struct __locale_struct global_locale;
- };
---- a/src/internal/syscall.h
-+++ b/src/internal/syscall.h
-@@ -17,9 +17,7 @@
- typedef long syscall_arg_t;
- #endif
- 
--#ifdef SHARED
- __attribute__((visibility("hidden")))
--#endif
- long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
- 	__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
- 	             syscall_arg_t, syscall_arg_t, syscall_arg_t);
---- a/src/internal/version.c
-+++ b/src/internal/version.c
-@@ -1,12 +1,9 @@
--#ifdef SHARED
--
- #include "version.h"
- 
- static const char version[] = VERSION;
- 
-+__attribute__((__visibility__("hidden")))
- const char *__libc_get_version()
- {
- 	return version;
- }
--
--#endif
---- a/src/internal/vis.h
-+++ b/src/internal/vis.h
-@@ -4,10 +4,9 @@
-  * override default visibilities to reduce the size and performance costs
-  * of position-independent code. */
- 
--#ifndef CRT
--#ifdef SHARED
-+#if !defined(CRT) && !defined(__ASSEMBLER__)
- 
--/* For shared libc.so, all symbols should be protected, but some toolchains
-+/* Conceptually, all symbols should be protected, but some toolchains
-  * fail to support copy relocations for protected data, so exclude all
-  * exported data symbols. */
- 
-@@ -25,16 +24,4 @@ extern char *optarg, **environ, **__envi
- 
- #pragma GCC visibility push(protected)
- 
--#elif defined(__PIC__)
--
--/* If building static libc.a as position-independent code, try to make
-- * everything hidden except possibly-undefined weak references. */
--
--__attribute__((__visibility__("default")))
--extern void (*const __init_array_start)(), (*const __init_array_end)(),
--	(*const __fini_array_start)(), (*const __fini_array_end)();
--
--#pragma GCC visibility push(hidden)
--
--#endif
- #endif
---- a/src/ldso/arm/dlsym.s
-+++ b/src/ldso/arm/dlsym.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .text
- .global dlsym
- .hidden __dlsym
---- /dev/null
-+++ b/src/ldso/arm/find_exidx.c
-@@ -0,0 +1,42 @@
-+#define _GNU_SOURCE
-+#include <link.h>
-+#include <stdint.h>
-+
-+struct find_exidx_data {
-+	uintptr_t pc, exidx_start;
-+	int exidx_len;
-+};
-+
-+static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
-+{
-+	struct find_exidx_data *data = ptr;
-+	const ElfW(Phdr) *phdr = info->dlpi_phdr;
-+	uintptr_t addr, exidx_start = 0;
-+	int i, match = 0, exidx_len = 0;
-+
-+	for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
-+		addr = info->dlpi_addr + phdr->p_vaddr;
-+		switch (phdr->p_type) {
-+		case PT_LOAD:
-+			match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
-+			break;
-+		case PT_ARM_EXIDX:
-+			exidx_start = addr;
-+			exidx_len = phdr->p_memsz;
-+			break;
-+		}
-+	}
-+	data->exidx_start = exidx_start;
-+	data->exidx_len = exidx_len;
-+	return match;
-+}
-+
-+uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
-+{
-+	struct find_exidx_data data;
-+	data.pc = pc;
-+	if (dl_iterate_phdr(find_exidx, &data) <= 0)
-+		return 0;
-+	*pcount = data.exidx_len / 8;
-+	return data.exidx_start;
-+}
---- a/src/ldso/dynlink.c
-+++ b/src/ldso/dynlink.c
-@@ -70,8 +70,8 @@ struct dso {
- 	char kernel_mapped;
- 	struct dso **deps, *needed_by;
- 	char *rpath_orig, *rpath;
--	void *tls_image;
--	size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
-+	struct tls_module tls;
-+	size_t tls_id;
- 	size_t relro_start, relro_end;
- 	void **new_dtv;
- 	unsigned char *new_tls;
-@@ -99,7 +99,9 @@ struct symdef {
- 
- int __init_tp(void *);
- void __init_libc(char **, char *);
-+void *__copy_tls(unsigned char *);
- 
-+__attribute__((__visibility__("hidden")))
- const char *__libc_get_version(void);
- 
- static struct builtin_tls {
-@@ -123,6 +125,7 @@ static int noload;
- static jmp_buf *rtld_fail;
- static pthread_rwlock_t lock;
- static struct debug debug;
-+static struct tls_module *tls_tail;
- static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
- static size_t static_tls_cnt;
- static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
-@@ -131,6 +134,15 @@ static struct fdpic_dummy_loadmap app_du
- 
- struct debug *_dl_debug_addr = &debug;
- 
-+__attribute__((__visibility__("hidden")))
-+void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
-+
-+__attribute__((__visibility__("hidden")))
-+extern void (*const __init_array_end)(void), (*const __fini_array_end)(void);
-+
-+weak_alias(__init_array_start, __init_array_end);
-+weak_alias(__fini_array_start, __fini_array_end);
-+
- static int dl_strcmp(const char *l, const char *r)
- {
- 	for (; *l==*r && *l; l++, r++);
-@@ -397,14 +409,14 @@ static void do_relocs(struct dso *dso, s
- 			break;
- #ifdef TLS_ABOVE_TP
- 		case REL_TPOFF:
--			*reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
-+			*reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
- 			break;
- #else
- 		case REL_TPOFF:
--			*reloc_addr = tls_val - def.dso->tls_offset + addend;
-+			*reloc_addr = tls_val - def.dso->tls.offset + addend;
- 			break;
- 		case REL_TPOFF_NEG:
--			*reloc_addr = def.dso->tls_offset - tls_val + addend;
-+			*reloc_addr = def.dso->tls.offset - tls_val + addend;
- 			break;
- #endif
- 		case REL_TLSDESC:
-@@ -426,10 +438,10 @@ static void do_relocs(struct dso *dso, s
- 			} else {
- 				reloc_addr[0] = (size_t)__tlsdesc_static;
- #ifdef TLS_ABOVE_TP
--				reloc_addr[1] = tls_val + def.dso->tls_offset
-+				reloc_addr[1] = tls_val + def.dso->tls.offset
- 					+ TPOFF_K + addend;
- #else
--				reloc_addr[1] = tls_val - def.dso->tls_offset
-+				reloc_addr[1] = tls_val - def.dso->tls.offset
- 					+ addend;
- #endif
- 			}
-@@ -482,8 +494,14 @@ static void reclaim_gaps(struct dso *dso
- 
- static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
- {
--	char *q = mmap(p, n, prot, flags, fd, off);
--	if (q != MAP_FAILED || errno != EINVAL) return q;
-+	static int no_map_fixed;
-+	char *q;
-+	if (!no_map_fixed) {
-+		q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
-+		if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
-+			return q;
-+		no_map_fixed = 1;
-+	}
- 	/* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
- 	if (flags & MAP_ANONYMOUS) {
- 		memset(p, 0, n);
-@@ -561,9 +579,9 @@ static void *map_library(int fd, struct
- 			dyn = ph->p_vaddr;
- 		} else if (ph->p_type == PT_TLS) {
- 			tls_image = ph->p_vaddr;
--			dso->tls_align = ph->p_align;
--			dso->tls_len = ph->p_filesz;
--			dso->tls_size = ph->p_memsz;
-+			dso->tls.align = ph->p_align;
-+			dso->tls.len = ph->p_filesz;
-+			dso->tls.size = ph->p_memsz;
- 		} else if (ph->p_type == PT_GNU_RELRO) {
- 			dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
- 			dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
-@@ -593,7 +611,7 @@ static void *map_library(int fd, struct
- 				((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
- 				((ph->p_flags&PF_X) ? PROT_EXEC : 0));
- 			map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
--				prot, (prot&PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED,
-+				prot, MAP_PRIVATE,
- 				fd, ph->p_offset & -PAGE_SIZE);
- 			if (map == MAP_FAILED) {
- 				unmap_library(dso);
-@@ -604,6 +622,19 @@ static void *map_library(int fd, struct
- 			dso->loadmap->segs[i].p_vaddr = ph->p_vaddr;
- 			dso->loadmap->segs[i].p_memsz = ph->p_memsz;
- 			i++;
-+			if (prot & PROT_WRITE) {
-+				size_t brk = (ph->p_vaddr & PAGE_SIZE-1)
-+					+ ph->p_filesz;
-+				size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE;
-+				size_t pgend = brk + ph->p_memsz - ph->p_filesz
-+					+ PAGE_SIZE-1 & -PAGE_SIZE;
-+				if (pgend > pgbrk && mmap_fixed(map+pgbrk,
-+					pgend-pgbrk, prot,
-+					MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
-+					-1, off_start) == MAP_FAILED)
-+					goto error;
-+				memset(map + brk, 0, pgbrk-brk);
-+			}
- 		}
- 		map = (void *)dso->loadmap->segs[0].addr;
- 		map_len = 0;
-@@ -618,7 +649,11 @@ static void *map_library(int fd, struct
- 	 * the length of the file. This is okay because we will not
- 	 * use the invalid part; we just need to reserve the right
- 	 * amount of virtual address space to map over later. */
--	map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start);
-+	map = DL_NOMMU_SUPPORT
-+		? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC,
-+			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
-+		: mmap((void *)addr_min, map_len, prot,
-+			MAP_PRIVATE, fd, off_start);
- 	if (map==MAP_FAILED) goto error;
- 	dso->map = map;
- 	dso->map_len = map_len;
-@@ -643,7 +678,8 @@ static void *map_library(int fd, struct
- 			dso->phentsize = eh->e_phentsize;
- 		}
- 		/* Reuse the existing mapping for the lowest-address LOAD */
--		if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue;
-+		if ((ph->p_vaddr & -PAGE_SIZE) == addr_min && !DL_NOMMU_SUPPORT)
-+			continue;
- 		this_min = ph->p_vaddr & -PAGE_SIZE;
- 		this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE;
- 		off_start = ph->p_offset & -PAGE_SIZE;
-@@ -670,7 +706,7 @@ static void *map_library(int fd, struct
- done_mapping:
- 	dso->base = base;
- 	dso->dynv = laddr(dso, dyn);
--	if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
-+	if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
- 	if (!runtime) reclaim_gaps(dso);
- 	free(allocated_buf);
- 	return map;
-@@ -987,8 +1023,8 @@ static struct dso *load_library(const ch
- 	 * extended DTV capable of storing an additional slot for
- 	 * the newly-loaded DSO. */
- 	alloc_size = sizeof *p + strlen(pathname) + 1;
--	if (runtime && temp_dso.tls_image) {
--		size_t per_th = temp_dso.tls_size + temp_dso.tls_align
-+	if (runtime && temp_dso.tls.image) {
-+		size_t per_th = temp_dso.tls.size + temp_dso.tls.align
- 			+ sizeof(void *) * (tls_cnt+3);
- 		n_th = libc.threads_minus_1 + 1;
- 		if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
-@@ -1009,22 +1045,25 @@ static struct dso *load_library(const ch
- 	strcpy(p->name, pathname);
- 	/* Add a shortname only if name arg was not an explicit pathname. */
- 	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
--	if (p->tls_image) {
-+	if (p->tls.image) {
- 		p->tls_id = ++tls_cnt;
--		tls_align = MAXP2(tls_align, p->tls_align);
-+		tls_align = MAXP2(tls_align, p->tls.align);
- #ifdef TLS_ABOVE_TP
--		p->tls_offset = tls_offset + ( (tls_align-1) &
--			-(tls_offset + (uintptr_t)p->tls_image) );
--		tls_offset += p->tls_size;
-+		p->tls.offset = tls_offset + ( (tls_align-1) &
-+			-(tls_offset + (uintptr_t)p->tls.image) );
-+		tls_offset += p->tls.size;
- #else
--		tls_offset += p->tls_size + p->tls_align - 1;
--		tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
--			& (p->tls_align-1);
--		p->tls_offset = tls_offset;
-+		tls_offset += p->tls.size + p->tls.align - 1;
-+		tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
-+			& (p->tls.align-1);
-+		p->tls.offset = tls_offset;
- #endif
- 		p->new_dtv = (void *)(-sizeof(size_t) &
- 			(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
- 		p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
-+		if (tls_tail) tls_tail->next = &p->tls;
-+		else libc.tls_head = &p->tls;
-+		tls_tail = &p->tls;
- 	}
- 
- 	tail->next = p;
-@@ -1151,7 +1190,7 @@ static void kernel_mapped_dso(struct dso
- 	p->kernel_mapped = 1;
- }
- 
--static void do_fini()
-+void __libc_exit_fini()
- {
- 	struct dso *p;
- 	size_t dyn[DYN_CNT];
-@@ -1214,53 +1253,8 @@ static void dl_debug_state(void)
- 
- weak_alias(dl_debug_state, _dl_debug_state);
- 
--void __reset_tls()
-+void __init_tls(size_t *auxv)
- {
--	pthread_t self = __pthread_self();
--	struct dso *p;
--	for (p=head; p; p=p->next) {
--		if (!p->tls_id || !self->dtv[p->tls_id]) continue;
--		memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
--		memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
--			p->tls_size - p->tls_len);
--		if (p->tls_id == (size_t)self->dtv[0]) break;
--	}
--}
--
--void *__copy_tls(unsigned char *mem)
--{
--	pthread_t td;
--	struct dso *p;
--	void **dtv;
--
--#ifdef TLS_ABOVE_TP
--	dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
--
--	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
--	td = (pthread_t)mem;
--	mem += sizeof(struct pthread);
--
--	for (p=head; p; p=p->next) {
--		if (!p->tls_id) continue;
--		dtv[p->tls_id] = mem + p->tls_offset;
--		memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
--	}
--#else
--	dtv = (void **)mem;
--
--	mem += libc.tls_size - sizeof(struct pthread);
--	mem -= (uintptr_t)mem & (tls_align-1);
--	td = (pthread_t)mem;
--
--	for (p=head; p; p=p->next) {
--		if (!p->tls_id) continue;
--		dtv[p->tls_id] = mem - p->tls_offset;
--		memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
--	}
--#endif
--	dtv[0] = (void *)tls_cnt;
--	td->dtv = td->dtv_copy = dtv;
--	return td;
- }
- 
- __attribute__((__visibility__("hidden")))
-@@ -1286,7 +1280,7 @@ void *__tls_get_new(size_t *v)
- 	/* Get new DTV space from new DSO if needed */
- 	if (v[0] > (size_t)self->dtv[0]) {
- 		void **newdtv = p->new_dtv +
--			(v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
-+			(v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
- 		memcpy(newdtv, self->dtv,
- 			((size_t)self->dtv[0]+1) * sizeof(void *));
- 		newdtv[0] = (void *)v[0];
-@@ -1297,12 +1291,12 @@ void *__tls_get_new(size_t *v)
- 	unsigned char *mem;
- 	for (p=head; ; p=p->next) {
- 		if (!p->tls_id || self->dtv[p->tls_id]) continue;
--		mem = p->new_tls + (p->tls_size + p->tls_align)
-+		mem = p->new_tls + (p->tls.size + p->tls.align)
- 			* a_fetch_add(&p->new_tls_idx,1);
--		mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
--			& (p->tls_align-1);
-+		mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
-+			& (p->tls.align-1);
- 		self->dtv[p->tls_id] = mem;
--		memcpy(mem, p->tls_image, p->tls_len);
-+		memcpy(mem, p->tls.image, p->tls.len);
- 		if (p->tls_id == v[0]) break;
- 	}
- 	__restore_sigs(&set);
-@@ -1311,6 +1305,8 @@ void *__tls_get_new(size_t *v)
- 
- static void update_tls_size()
- {
-+	libc.tls_cnt = tls_cnt;
-+	libc.tls_align = tls_align;
- 	libc.tls_size = ALIGN(
- 		(1+tls_cnt) * sizeof(void *) +
- 		tls_offset +
-@@ -1421,6 +1417,7 @@ _Noreturn void __dls3(size_t *sp)
- 	 * use during dynamic linking. If possible it will also serve as the
- 	 * thread pointer at runtime. */
- 	libc.tls_size = sizeof builtin_tls;
-+	libc.tls_align = tls_align;
- 	if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
- 		a_crash();
- 	}
-@@ -1448,13 +1445,13 @@ _Noreturn void __dls3(size_t *sp)
- 				interp_off = (size_t)phdr->p_vaddr;
- 			else if (phdr->p_type == PT_TLS) {
- 				tls_image = phdr->p_vaddr;
--				app.tls_len = phdr->p_filesz;
--				app.tls_size = phdr->p_memsz;
--				app.tls_align = phdr->p_align;
-+				app.tls.len = phdr->p_filesz;
-+				app.tls.size = phdr->p_memsz;
-+				app.tls.align = phdr->p_align;
- 			}
- 		}
- 		if (DL_FDPIC) app.loadmap = app_loadmap;
--		if (app.tls_size) app.tls_image = laddr(&app, tls_image);
-+		if (app.tls.size) app.tls.image = laddr(&app, tls_image);
- 		if (interp_off) ldso.name = laddr(&app, interp_off);
- 		if ((aux[0] & (1UL<<AT_EXECFN))
- 		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
-@@ -1523,19 +1520,20 @@ _Noreturn void __dls3(size_t *sp)
- 			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
- 		}
- 	}
--	if (app.tls_size) {
-+	if (app.tls.size) {
-+		libc.tls_head = &app.tls;
- 		app.tls_id = tls_cnt = 1;
- #ifdef TLS_ABOVE_TP
--		app.tls_offset = 0;
--		tls_offset = app.tls_size
--			+ ( -((uintptr_t)app.tls_image + app.tls_size)
--			& (app.tls_align-1) );
-+		app.tls.offset = 0;
-+		tls_offset = app.tls.size
-+			+ ( -((uintptr_t)app.tls.image + app.tls.size)
-+			& (app.tls.align-1) );
- #else
--		tls_offset = app.tls_offset = app.tls_size
--			+ ( -((uintptr_t)app.tls_image + app.tls_size)
--			& (app.tls_align-1) );
-+		tls_offset = app.tls.offset = app.tls.size
-+			+ ( -((uintptr_t)app.tls.image + app.tls.size)
-+			& (app.tls.align-1) );
- #endif
--		tls_align = MAXP2(tls_align, app.tls_align);
-+		tls_align = MAXP2(tls_align, app.tls.align);
- 	}
- 	app.global = 1;
- 	decode_dyn(&app);
-@@ -1635,8 +1633,6 @@ _Noreturn void __dls3(size_t *sp)
- 	debug.state = 0;
- 	_dl_debug_state();
- 
--	__init_libc(envp, argv[0]);
--	atexit(do_fini);
- 	errno = 0;
- 
- 	CRTJMP((void *)aux[AT_ENTRY], argv-1);
-@@ -1646,6 +1642,7 @@ _Noreturn void __dls3(size_t *sp)
- void *dlopen(const char *file, int mode)
- {
- 	struct dso *volatile p, *orig_tail, *next;
-+	struct tls_module *orig_tls_tail;
- 	size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
- 	size_t i;
- 	int cs;
-@@ -1658,6 +1655,7 @@ void *dlopen(const char *file, int mode)
- 	__inhibit_ptc();
- 
- 	p = 0;
-+	orig_tls_tail = tls_tail;
- 	orig_tls_cnt = tls_cnt;
- 	orig_tls_offset = tls_offset;
- 	orig_tls_align = tls_align;
-@@ -1684,6 +1682,8 @@ void *dlopen(const char *file, int mode)
- 			unmap_library(p);
- 			free(p);
- 		}
-+		if (!orig_tls_tail) libc.tls_head = 0;
-+		tls_tail = orig_tls_tail;
- 		tls_cnt = orig_tls_cnt;
- 		tls_offset = orig_tls_offset;
- 		tls_align = orig_tls_align;
-@@ -1900,7 +1900,7 @@ int dl_iterate_phdr(int(*callback)(struc
- 		info.dlpi_adds      = gencnt;
- 		info.dlpi_subs      = 0;
- 		info.dlpi_tls_modid = current->tls_id;
--		info.dlpi_tls_data  = current->tls_image;
-+		info.dlpi_tls_data  = current->tls.image;
- 
- 		ret = (callback)(&info, sizeof (info), data);
- 
---- a/src/locale/langinfo.c
-+++ b/src/locale/langinfo.c
-@@ -37,23 +37,23 @@ char *__nl_langinfo_l(nl_item item, loca
- 	
- 	switch (cat) {
- 	case LC_NUMERIC:
--		if (idx > 1) return NULL;
-+		if (idx > 1) return "";
- 		str = c_numeric;
- 		break;
- 	case LC_TIME:
--		if (idx > 0x31) return NULL;
-+		if (idx > 0x31) return "";
- 		str = c_time;
- 		break;
- 	case LC_MONETARY:
--		if (idx > 0) return NULL;
-+		if (idx > 0) return "";
- 		str = "";
- 		break;
- 	case LC_MESSAGES:
--		if (idx > 3) return NULL;
-+		if (idx > 3) return "";
- 		str = c_messages;
- 		break;
- 	default:
--		return NULL;
-+		return "";
- 	}
- 
- 	for (; idx; idx--, str++) for (; *str; str++);
---- a/src/malloc/lite_malloc.c
-+++ b/src/malloc/lite_malloc.c
-@@ -8,7 +8,7 @@
- 
- void *__expand_heap(size_t *);
- 
--void *__simple_malloc(size_t n)
-+static void *__simple_malloc(size_t n)
- {
- 	static char *cur, *end;
- 	static volatile int lock[2];
---- a/src/math/__rem_pio2.c
-+++ b/src/math/__rem_pio2.c
-@@ -118,7 +118,7 @@ int __rem_pio2(double x, double *y)
- 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
- medium:
- 		/* rint(x/(pi/2)), Assume round-to-nearest. */
--		fn = x*invpio2 + toint - toint;
-+		fn = (double_t)x*invpio2 + toint - toint;
- 		n = (int32_t)fn;
- 		r = x - fn*pio2_1;
- 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
---- a/src/math/__rem_pio2f.c
-+++ b/src/math/__rem_pio2f.c
-@@ -51,7 +51,7 @@ int __rem_pio2f(float x, double *y)
- 	/* 25+53 bit pi is good enough for medium size */
- 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
- 		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
--		fn = x*invpio2 + toint - toint;
-+		fn = (double_t)x*invpio2 + toint - toint;
- 		n  = (int32_t)fn;
- 		*y = x - fn*pio2_1 - fn*pio2_1t;
- 		return n;
---- /dev/null
-+++ b/src/math/arm/fabs.c
-@@ -0,0 +1,15 @@
-+#include <math.h>
-+
-+#if __ARM_PCS_VFP
-+
-+double fabs(double x)
-+{
-+	__asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x));
-+	return x;
-+}
-+
-+#else
-+
-+#include "../fabs.c"
-+
-+#endif
---- /dev/null
-+++ b/src/math/arm/fabsf.c
-@@ -0,0 +1,15 @@
-+#include <math.h>
-+
-+#if __ARM_PCS_VFP
-+
-+float fabsf(float x)
-+{
-+	__asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x));
-+	return x;
-+}
-+
-+#else
-+
-+#include "../fabsf.c"
-+
-+#endif
---- /dev/null
-+++ b/src/math/arm/sqrt.c
-@@ -0,0 +1,15 @@
-+#include <math.h>
-+
-+#if __VFP_FP__ && !__SOFTFP__
-+
-+double sqrt(double x)
-+{
-+	__asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x));
-+	return x;
-+}
-+
-+#else
-+
-+#include "../sqrt.c"
-+
-+#endif
---- /dev/null
-+++ b/src/math/arm/sqrtf.c
-@@ -0,0 +1,15 @@
-+#include <math.h>
-+
-+#if __VFP_FP__ && !__SOFTFP__
-+
-+float sqrtf(float x)
-+{
-+	__asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x));
-+	return x;
-+}
-+
-+#else
-+
-+#include "../sqrtf.c"
-+
-+#endif
---- a/src/math/armebhf/fabs.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armhf/fabs.s
---- a/src/math/armebhf/fabsf.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armhf/fabsf.s
---- a/src/math/armebhf/sqrt.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armhf/sqrt.s
---- a/src/math/armebhf/sqrtf.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armhf/sqrtf.s
---- a/src/math/armhf/fabs.s
-+++ /dev/null
-@@ -1,7 +0,0 @@
--.fpu vfp
--.text
--.global fabs
--.type   fabs,%function
--fabs:
--	vabs.f64 d0, d0
--	bx lr
---- a/src/math/armhf/fabs.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--fabs.s
---- a/src/math/armhf/fabsf.s
-+++ /dev/null
-@@ -1,7 +0,0 @@
--.fpu vfp
--.text
--.global fabsf
--.type   fabsf,%function
--fabsf:
--	vabs.f32 s0, s0
--	bx lr
---- a/src/math/armhf/fabsf.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--fabsf.s
---- a/src/math/armhf/sqrt.s
-+++ /dev/null
-@@ -1,7 +0,0 @@
--.fpu vfp
--.text
--.global sqrt
--.type   sqrt,%function
--sqrt:
--	vsqrt.f64 d0, d0
--	bx lr
---- a/src/math/armhf/sqrt.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--sqrt.s
---- a/src/math/armhf/sqrtf.s
-+++ /dev/null
-@@ -1,7 +0,0 @@
--.fpu vfp
--.text
--.global sqrtf
--.type   sqrtf,%function
--sqrtf:
--	vsqrt.f32 s0, s0
--	bx lr
---- a/src/math/armhf/sqrtf.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--sqrtf.s
---- a/src/math/hypot.c
-+++ b/src/math/hypot.c
-@@ -12,10 +12,10 @@ static void sq(double_t *hi, double_t *l
- {
- 	double_t xh, xl, xc;
- 
--	xc = x*SPLIT;
-+	xc = (double_t)x*SPLIT;
- 	xh = x - xc + xc;
- 	xl = x - xh;
--	*hi = x*x;
-+	*hi = (double_t)x*x;
- 	*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
- }
- 
---- a/src/mman/mremap.c
-+++ b/src/mman/mremap.c
-@@ -1,17 +1,31 @@
-+#define _GNU_SOURCE
- #include <unistd.h>
- #include <sys/mman.h>
-+#include <errno.h>
-+#include <stdint.h>
- #include <stdarg.h>
- #include "syscall.h"
- #include "libc.h"
- 
-+static void dummy(void) { }
-+weak_alias(dummy, __vm_wait);
-+
- void *__mremap(void *old_addr, size_t old_len, size_t new_len, int flags, ...)
- {
- 	va_list ap;
--	void *new_addr;
--	
--	va_start(ap, flags);
--	new_addr = va_arg(ap, void *);
--	va_end(ap);
-+	void *new_addr = 0;
-+
-+	if (new_len >= PTRDIFF_MAX) {
-+		errno = ENOMEM;
-+		return MAP_FAILED;
-+	}
-+
-+	if (flags & MREMAP_FIXED) {
-+		__vm_wait();
-+		va_start(ap, flags);
-+		new_addr = va_arg(ap, void *);
-+		va_end(ap);
-+	}
- 
- 	return (void *)syscall(SYS_mremap, old_addr, old_len, new_len, flags, new_addr);
- }
---- a/src/network/getifaddrs.c
-+++ b/src/network/getifaddrs.c
-@@ -162,13 +162,26 @@ static int netlink_msg_to_ifaddr(void *p
- 		for (rta = NLMSG_RTA(h, sizeof(*ifa)); NLMSG_RTAOK(rta, h); rta = RTA_NEXT(rta)) {
- 			switch (rta->rta_type) {
- 			case IFA_ADDRESS:
--				copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
-+				/* If ifa_addr is already set we, received an IFA_LOCAL before
-+				 * so treat this as destination address */
-+				if (ifs->ifa.ifa_addr)
-+					copy_addr(&ifs->ifa.ifa_dstaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
-+				else
-+					copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
- 				break;
- 			case IFA_BROADCAST:
--				/* For point-to-point links this is peer, but ifa_broadaddr
--				 * and ifa_dstaddr are union, so this works for both.  */
- 				copy_addr(&ifs->ifa.ifa_broadaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
- 				break;
-+			case IFA_LOCAL:
-+				/* If ifa_addr is set and we get IFA_LOCAL, assume we have
-+				 * a point-to-point network. Move address to correct field. */
-+				if (ifs->ifa.ifa_addr) {
-+					ifs->ifu = ifs->addr;
-+					ifs->ifa.ifa_dstaddr = &ifs->ifu.sa;
-+					memset(&ifs->addr, 0, sizeof(ifs->addr));
-+				}
-+				copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
-+				break;
- 			case IFA_LABEL:
- 				if (RTA_DATALEN(rta) < sizeof(ifs->name)) {
- 					memcpy(ifs->name, RTA_DATA(rta), RTA_DATALEN(rta));
---- a/src/network/getnameinfo.c
-+++ b/src/network/getnameinfo.c
-@@ -135,13 +135,13 @@ int getnameinfo(const struct sockaddr *r
- 	switch (af) {
- 	case AF_INET:
- 		a = (void *)&((struct sockaddr_in *)sa)->sin_addr;
--		if (sl != sizeof(struct sockaddr_in)) return EAI_FAMILY;
-+		if (sl < sizeof(struct sockaddr_in)) return EAI_FAMILY;
- 		mkptr4(ptr, a);
- 		scopeid = 0;
- 		break;
- 	case AF_INET6:
- 		a = (void *)&((struct sockaddr_in6 *)sa)->sin6_addr;
--		if (sl != sizeof(struct sockaddr_in6)) return EAI_FAMILY;
-+		if (sl < sizeof(struct sockaddr_in6)) return EAI_FAMILY;
- 		if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12))
- 			mkptr6(ptr, a);
- 		else
---- a/src/network/if_nametoindex.c
-+++ b/src/network/if_nametoindex.c
-@@ -10,7 +10,7 @@ unsigned if_nametoindex(const char *name
- 	struct ifreq ifr;
- 	int fd, r;
- 
--	if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return -1;
-+	if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return 0;
- 	strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- 	r = ioctl(fd, SIOCGIFINDEX, &ifr);
- 	__syscall(SYS_close, fd);
---- a/src/network/lookup_name.c
-+++ b/src/network/lookup_name.c
-@@ -9,6 +9,7 @@
- #include <fcntl.h>
- #include <unistd.h>
- #include <pthread.h>
-+#include <errno.h>
- #include "lookup.h"
- #include "stdio_impl.h"
- #include "syscall.h"
-@@ -51,7 +52,14 @@ static int name_from_hosts(struct addres
- 	int cnt = 0;
- 	unsigned char _buf[1032];
- 	FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf);
--	if (!f) return 0;
-+	if (!f) switch (errno) {
-+	case ENOENT:
-+	case ENOTDIR:
-+	case EACCES:
-+		return 0;
-+	default:
-+		return EAI_SYSTEM;
-+	}
- 	while (fgets(line, sizeof line, f) && cnt < MAXADDRS) {
- 		char *p, *z;
- 
---- a/src/network/lookup_serv.c
-+++ b/src/network/lookup_serv.c
-@@ -4,6 +4,7 @@
- #include <ctype.h>
- #include <string.h>
- #include <fcntl.h>
-+#include <errno.h>
- #include "lookup.h"
- #include "stdio_impl.h"
- 
-@@ -69,7 +70,14 @@ int __lookup_serv(struct service buf[sta
- 
- 	unsigned char _buf[1032];
- 	FILE _f, *f = __fopen_rb_ca("/etc/services", &_f, _buf, sizeof _buf);
--	if (!f) return EAI_SERVICE;
-+	if (!f) switch (errno) {
-+	case ENOENT:
-+	case ENOTDIR:
-+	case EACCES:
-+		return EAI_SERVICE;
-+	default:
-+		return EAI_SYSTEM;
-+	}
- 
- 	while (fgets(line, sizeof line, f) && cnt < MAXSERVS) {
- 		if ((p=strchr(line, '#'))) *p++='\n', *p=0;
---- a/src/network/proto.c
-+++ b/src/network/proto.c
-@@ -9,21 +9,36 @@ static const unsigned char protos[] = {
- 	"\001icmp\0"
- 	"\002igmp\0"
- 	"\003ggp\0"
-+	"\004ipencap\0"
-+	"\005st\0"
- 	"\006tcp\0"
-+	"\008egp\0"
- 	"\014pup\0"
- 	"\021udp\0"
--	"\026idp\0"
-+	"\024hmp\0"
-+	"\026xns-idp\0"
-+	"\033rdp\0"
-+	"\035iso-tp4\0"
-+	"\044xtp\0"
-+	"\045ddp\0"
-+	"\046idpr-cmtp\0"
- 	"\051ipv6\0"
- 	"\053ipv6-route\0"
- 	"\054ipv6-frag\0"
-+	"\055idrp\0"
-+	"\056rsvp\0"
- 	"\057gre\0"
- 	"\062esp\0"
- 	"\063ah\0"
-+	"\071skip\0"
- 	"\072ipv6-icmp\0"
- 	"\073ipv6-nonxt\0"
- 	"\074ipv6-opts\0"
-+	"\111rspf\0"
-+	"\121vmtp\0"
- 	"\131ospf\0"
- 	"\136ipip\0"
-+	"\142encap\0"
- 	"\147pim\0"
- 	"\377raw"
- };
---- a/src/network/res_msend.c
-+++ b/src/network/res_msend.c
-@@ -54,7 +54,15 @@ int __res_msend(int nqueries, const unsi
- 
- 	/* Get nameservers from resolv.conf, fallback to localhost */
- 	f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf);
--	if (f) for (nns=0; nns<3 && fgets(line, sizeof line, f); ) {
-+	if (!f) switch (errno) {
-+	case ENOENT:
-+	case ENOTDIR:
-+	case EACCES:
-+		goto no_resolv_conf;
-+	default:
-+		return -1;
-+	}
-+	for (nns=0; nns<3 && fgets(line, sizeof line, f); ) {
- 		if (!strncmp(line, "options", 7) && isspace(line[7])) {
- 			unsigned long x;
- 			char *p, *z;
-@@ -92,7 +100,8 @@ int __res_msend(int nqueries, const unsi
- 			}
- 		}
- 	}
--	if (f) __fclose_ca(f);
-+	__fclose_ca(f);
-+no_resolv_conf:
- 	if (!nns) {
- 		ns[0].sin.sin_family = AF_INET;
- 		ns[0].sin.sin_port = htons(53);
---- a/src/search/tsearch_avl.c
-+++ b/src/search/tsearch_avl.c
-@@ -77,38 +77,45 @@ static struct node *find(struct node *n,
- 		return find(n->right, k, cmp);
- }
- 
--static struct node *insert(struct node **n, const void *k,
--	int (*cmp)(const void *, const void *), int *new)
-+static struct node *insert(struct node *n, const void *k,
-+	int (*cmp)(const void *, const void *), struct node **found)
- {
--	struct node *r = *n;
-+	struct node *r;
- 	int c;
- 
--	if (!r) {
--		*n = r = malloc(sizeof **n);
--		if (r) {
--			r->key = k;
--			r->left = r->right = 0;
--			r->height = 1;
-+	if (!n) {
-+		n = malloc(sizeof *n);
-+		if (n) {
-+			n->key = k;
-+			n->left = n->right = 0;
-+			n->height = 1;
- 		}
--		*new = 1;
--		return r;
-+		*found = n;
-+		return n;
-+	}
-+	c = cmp(k, n->key);
-+	if (c == 0) {
-+		*found = n;
-+		return 0;
-+	}
-+	r = insert(c < 0 ? n->left : n->right, k, cmp, found);
-+	if (r) {
-+		if (c < 0)
-+			n->left = r;
-+		else
-+			n->right = r;
-+		r = balance(n);
- 	}
--	c = cmp(k, r->key);
--	if (c == 0)
--		return r;
--	if (c < 0)
--		r = insert(&r->left, k, cmp, new);
--	else
--		r = insert(&r->right, k, cmp, new);
--	if (*new)
--		*n = balance(*n);
- 	return r;
- }
- 
--static struct node *movr(struct node *n, struct node *r) {
--	if (!n)
--		return r;
--	n->right = movr(n->right, r);
-+static struct node *remove_rightmost(struct node *n, struct node **rightmost)
-+{
-+	if (!n->right) {
-+		*rightmost = n;
-+		return n->left;
-+	}
-+	n->right = remove_rightmost(n->right, rightmost);
- 	return balance(n);
- }
- 
-@@ -122,7 +129,13 @@ static struct node *remove(struct node *
- 	c = cmp(k, (*n)->key);
- 	if (c == 0) {
- 		struct node *r = *n;
--		*n = movr(r->left, r->right);
-+		if (r->left) {
-+			r->left = remove_rightmost(r->left, n);
-+			(*n)->left = r->left;
-+			(*n)->right = r->right;
-+			*n = balance(*n);
-+		} else
-+			*n = r->right;
- 		free(r);
- 		return parent;
- 	}
-@@ -138,6 +151,8 @@ static struct node *remove(struct node *
- void *tdelete(const void *restrict key, void **restrict rootp,
- 	int(*compar)(const void *, const void *))
- {
-+	if (!rootp)
-+		return 0;
- 	struct node *n = *rootp;
- 	struct node *ret;
- 	/* last argument is arbitrary non-null pointer
-@@ -150,17 +165,21 @@ void *tdelete(const void *restrict key,
- void *tfind(const void *key, void *const *rootp,
- 	int(*compar)(const void *, const void *))
- {
-+	if (!rootp)
-+		return 0;
- 	return find(*rootp, key, compar);
- }
- 
- void *tsearch(const void *key, void **rootp,
- 	int (*compar)(const void *, const void *))
- {
--	int new = 0;
--	struct node *n = *rootp;
-+	struct node *update;
- 	struct node *ret;
--	ret = insert(&n, key, compar, &new);
--	*rootp = n;
-+	if (!rootp)
-+		return 0;
-+	update = insert(*rootp, key, compar, &ret);
-+	if (update)
-+		*rootp = update;
- 	return ret;
- }
- 
---- a/src/setjmp/arm/longjmp.s
-+++ b/src/setjmp/arm/longjmp.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .global _longjmp
- .global longjmp
- .type _longjmp,%function
-@@ -20,7 +21,11 @@ longjmp:
- 	ldc p2, cr4, [ip], #48
- 2:	tst r1,#0x40
- 	beq 2f
--	.word 0xecbc8b10 /* vldmia ip!, {d8-d15} */
-+	.fpu vfp
-+	vldmia ip!, {d8-d15}
-+	.fpu softvfp
-+	.eabi_attribute 10, 0
-+	.eabi_attribute 27, 0
- 2:	tst r1,#0x200
- 	beq 3f
- 	ldcl p1, cr10, [ip], #8
-@@ -29,9 +34,7 @@ longjmp:
- 	ldcl p1, cr13, [ip], #8
- 	ldcl p1, cr14, [ip], #8
- 	ldcl p1, cr15, [ip], #8
--3:	tst lr,#1
--	moveq pc,lr
--	bx lr
-+3:	bx lr
- 
- .hidden __hwcap
- 1:	.word __hwcap-1b
---- a/src/setjmp/arm/setjmp.s
-+++ b/src/setjmp/arm/setjmp.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .global __setjmp
- .global _setjmp
- .global setjmp
-@@ -22,7 +23,11 @@ setjmp:
- 	stc p2, cr4, [ip], #48
- 2:	tst r1,#0x40
- 	beq 2f
--	.word 0xecac8b10 /* vstmia ip!, {d8-d15} */
-+	.fpu vfp
-+	vstmia ip!, {d8-d15}
-+	.fpu softvfp
-+	.eabi_attribute 10, 0
-+	.eabi_attribute 27, 0
- 2:	tst r1,#0x200
- 	beq 3f
- 	stcl p1, cr10, [ip], #8
-@@ -31,9 +36,7 @@ setjmp:
- 	stcl p1, cr13, [ip], #8
- 	stcl p1, cr14, [ip], #8
- 	stcl p1, cr15, [ip], #8
--3:	tst lr,#1
--	moveq pc,lr
--	bx lr
-+3:	bx lr
- 
- .hidden __hwcap
- 1:	.word __hwcap-1b
---- a/src/setjmp/mips-sf/longjmp.s
-+++ /dev/null
-@@ -1,25 +0,0 @@
--.set noreorder
--
--.global _longjmp
--.global longjmp
--.type   _longjmp,@function
--.type   longjmp,@function
--_longjmp:
--longjmp:
--	move    $2, $5
--	bne     $2, $0, 1f
--	nop
--	addu    $2, $2, 1
--1:	lw      $ra,  0($4)
--	lw      $sp,  4($4)
--	lw      $16,  8($4)
--	lw      $17, 12($4)
--	lw      $18, 16($4)
--	lw      $19, 20($4)
--	lw      $20, 24($4)
--	lw      $21, 28($4)
--	lw      $22, 32($4)
--	lw      $23, 36($4)
--	lw      $30, 40($4)
--	jr      $ra
--	lw      $28, 44($4)
---- a/src/setjmp/mips-sf/longjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--longjmp.s
---- a/src/setjmp/mips-sf/setjmp.s
-+++ /dev/null
-@@ -1,25 +0,0 @@
--.set noreorder
--
--.global __setjmp
--.global _setjmp
--.global setjmp
--.type   __setjmp,@function
--.type   _setjmp,@function
--.type   setjmp,@function
--__setjmp:
--_setjmp:
--setjmp:
--	sw      $ra,  0($4)
--	sw      $sp,  4($4)
--	sw      $16,  8($4)
--	sw      $17, 12($4)
--	sw      $18, 16($4)
--	sw      $19, 20($4)
--	sw      $20, 24($4)
--	sw      $21, 28($4)
--	sw      $22, 32($4)
--	sw      $23, 36($4)
--	sw      $30, 40($4)
--	sw      $28, 44($4)
--	jr      $ra
--	li      $2, 0
---- a/src/setjmp/mips-sf/setjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--setjmp.s
---- /dev/null
-+++ b/src/setjmp/mips/longjmp.S
-@@ -0,0 +1,40 @@
-+.set noreorder
-+
-+.global _longjmp
-+.global longjmp
-+.type   _longjmp,@function
-+.type   longjmp,@function
-+_longjmp:
-+longjmp:
-+	move    $2, $5
-+	bne     $2, $0, 1f
-+	nop
-+	addu    $2, $2, 1
-+1:
-+#ifndef __mips_soft_float
-+	lwc1    $20, 56($4)
-+	lwc1    $21, 60($4)
-+	lwc1    $22, 64($4)
-+	lwc1    $23, 68($4)
-+	lwc1    $24, 72($4)
-+	lwc1    $25, 76($4)
-+	lwc1    $26, 80($4)
-+	lwc1    $27, 84($4)
-+	lwc1    $28, 88($4)
-+	lwc1    $29, 92($4)
-+	lwc1    $30, 96($4)
-+	lwc1    $31, 100($4)
-+#endif
-+	lw      $ra,  0($4)
-+	lw      $sp,  4($4)
-+	lw      $16,  8($4)
-+	lw      $17, 12($4)
-+	lw      $18, 16($4)
-+	lw      $19, 20($4)
-+	lw      $20, 24($4)
-+	lw      $21, 28($4)
-+	lw      $22, 32($4)
-+	lw      $23, 36($4)
-+	lw      $30, 40($4)
-+	jr      $ra
-+	lw      $28, 44($4)
---- a/src/setjmp/mips/longjmp.s
-+++ /dev/null
-@@ -1,37 +0,0 @@
--.set noreorder
--
--.global _longjmp
--.global longjmp
--.type   _longjmp,@function
--.type   longjmp,@function
--_longjmp:
--longjmp:
--	move    $2, $5
--	bne     $2, $0, 1f
--	nop
--	addu    $2, $2, 1
--1:	lwc1    $20, 56($4)
--	lwc1    $21, 60($4)
--	lwc1    $22, 64($4)
--	lwc1    $23, 68($4)
--	lwc1    $24, 72($4)
--	lwc1    $25, 76($4)
--	lwc1    $26, 80($4)
--	lwc1    $27, 84($4)
--	lwc1    $28, 88($4)
--	lwc1    $29, 92($4)
--	lwc1    $30, 96($4)
--	lwc1    $31, 100($4)
--	lw      $ra,  0($4)
--	lw      $sp,  4($4)
--	lw      $16,  8($4)
--	lw      $17, 12($4)
--	lw      $18, 16($4)
--	lw      $19, 20($4)
--	lw      $20, 24($4)
--	lw      $21, 28($4)
--	lw      $22, 32($4)
--	lw      $23, 36($4)
--	lw      $30, 40($4)
--	jr      $ra
--	lw      $28, 44($4)
---- /dev/null
-+++ b/src/setjmp/mips/setjmp.S
-@@ -0,0 +1,39 @@
-+.set noreorder
-+
-+.global __setjmp
-+.global _setjmp
-+.global setjmp
-+.type   __setjmp,@function
-+.type   _setjmp,@function
-+.type   setjmp,@function
-+__setjmp:
-+_setjmp:
-+setjmp:
-+	sw      $ra,  0($4)
-+	sw      $sp,  4($4)
-+	sw      $16,  8($4)
-+	sw      $17, 12($4)
-+	sw      $18, 16($4)
-+	sw      $19, 20($4)
-+	sw      $20, 24($4)
-+	sw      $21, 28($4)
-+	sw      $22, 32($4)
-+	sw      $23, 36($4)
-+	sw      $30, 40($4)
-+	sw      $28, 44($4)
-+#ifndef __mips_soft_float
-+	swc1    $20, 56($4)
-+	swc1    $21, 60($4)
-+	swc1    $22, 64($4)
-+	swc1    $23, 68($4)
-+	swc1    $24, 72($4)
-+	swc1    $25, 76($4)
-+	swc1    $26, 80($4)
-+	swc1    $27, 84($4)
-+	swc1    $28, 88($4)
-+	swc1    $29, 92($4)
-+	swc1    $30, 96($4)
-+	swc1    $31, 100($4)
-+#endif
-+	jr      $ra
-+	li      $2, 0
---- a/src/setjmp/mips/setjmp.s
-+++ /dev/null
-@@ -1,37 +0,0 @@
--.set noreorder
--
--.global __setjmp
--.global _setjmp
--.global setjmp
--.type   __setjmp,@function
--.type   _setjmp,@function
--.type   setjmp,@function
--__setjmp:
--_setjmp:
--setjmp:
--	sw      $ra,  0($4)
--	sw      $sp,  4($4)
--	sw      $16,  8($4)
--	sw      $17, 12($4)
--	sw      $18, 16($4)
--	sw      $19, 20($4)
--	sw      $20, 24($4)
--	sw      $21, 28($4)
--	sw      $22, 32($4)
--	sw      $23, 36($4)
--	sw      $30, 40($4)
--	sw      $28, 44($4)
--	swc1    $20, 56($4)
--	swc1    $21, 60($4)
--	swc1    $22, 64($4)
--	swc1    $23, 68($4)
--	swc1    $24, 72($4)
--	swc1    $25, 76($4)
--	swc1    $26, 80($4)
--	swc1    $27, 84($4)
--	swc1    $28, 88($4)
--	swc1    $29, 92($4)
--	swc1    $30, 96($4)
--	swc1    $31, 100($4)
--	jr      $ra
--	li      $2, 0
---- a/src/setjmp/mipsel-sf/longjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../mips-sf/longjmp.s
---- a/src/setjmp/mipsel-sf/setjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../mips-sf/setjmp.s
---- a/src/setjmp/sh-nofpu/longjmp.s
-+++ /dev/null
-@@ -1,22 +0,0 @@
--.global _longjmp
--.global longjmp
--.type   _longjmp, @function
--.type   longjmp,  @function
--_longjmp:
--longjmp:
--	mov.l  @r4+, r8
--	mov.l  @r4+, r9
--	mov.l  @r4+, r10
--	mov.l  @r4+, r11
--	mov.l  @r4+, r12
--	mov.l  @r4+, r13
--	mov.l  @r4+, r14
--	mov.l  @r4+, r15
--	lds.l  @r4+, pr
--
--	tst  r5, r5
--	movt r0
--	add  r5, r0
--
--	rts
--	 nop
---- a/src/setjmp/sh-nofpu/longjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--longjmp.s
---- a/src/setjmp/sh-nofpu/setjmp.s
-+++ /dev/null
-@@ -1,24 +0,0 @@
--.global ___setjmp
--.hidden ___setjmp
--.global __setjmp
--.global _setjmp
--.global setjmp
--.type   __setjmp, @function
--.type   _setjmp,  @function
--.type   setjmp,   @function
--___setjmp:
--__setjmp:
--_setjmp:
--setjmp:
--	add   #36, r4
--	sts.l  pr,   @-r4
--	mov.l  r15   @-r4
--	mov.l  r14,  @-r4
--	mov.l  r13,  @-r4
--	mov.l  r12,  @-r4
--	mov.l  r11,  @-r4
--	mov.l  r10,  @-r4
--	mov.l  r9,   @-r4
--	mov.l  r8,   @-r4
--	rts
--	 mov  #0, r0
---- a/src/setjmp/sh-nofpu/setjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--setjmp.s
---- /dev/null
-+++ b/src/setjmp/sh/longjmp.S
-@@ -0,0 +1,28 @@
-+.global _longjmp
-+.global longjmp
-+.type   _longjmp, @function
-+.type   longjmp,  @function
-+_longjmp:
-+longjmp:
-+	mov.l  @r4+, r8
-+	mov.l  @r4+, r9
-+	mov.l  @r4+, r10
-+	mov.l  @r4+, r11
-+	mov.l  @r4+, r12
-+	mov.l  @r4+, r13
-+	mov.l  @r4+, r14
-+	mov.l  @r4+, r15
-+	lds.l  @r4+, pr
-+#if __SH_FPU_ANY__ || __SH4__
-+	fmov.s @r4+, fr12
-+	fmov.s @r4+, fr13
-+	fmov.s @r4+, fr14
-+	fmov.s @r4+, fr15
-+#endif
-+
-+	tst  r5, r5
-+	movt r0
-+	add  r5, r0
-+
-+	rts
-+	 nop
---- a/src/setjmp/sh/longjmp.s
-+++ /dev/null
-@@ -1,26 +0,0 @@
--.global _longjmp
--.global longjmp
--.type   _longjmp, @function
--.type   longjmp,  @function
--_longjmp:
--longjmp:
--	mov.l  @r4+, r8
--	mov.l  @r4+, r9
--	mov.l  @r4+, r10
--	mov.l  @r4+, r11
--	mov.l  @r4+, r12
--	mov.l  @r4+, r13
--	mov.l  @r4+, r14
--	mov.l  @r4+, r15
--	lds.l  @r4+, pr
--	fmov.s @r4+, fr12
--	fmov.s @r4+, fr13
--	fmov.s @r4+, fr14
--	fmov.s @r4+, fr15
--
--	tst  r5, r5
--	movt r0
--	add  r5, r0
--
--	rts
--	 nop
---- /dev/null
-+++ b/src/setjmp/sh/setjmp.S
-@@ -0,0 +1,32 @@
-+.global ___setjmp
-+.hidden ___setjmp
-+.global __setjmp
-+.global _setjmp
-+.global setjmp
-+.type   __setjmp, @function
-+.type   _setjmp,  @function
-+.type   setjmp,   @function
-+___setjmp:
-+__setjmp:
-+_setjmp:
-+setjmp:
-+#if __SH_FPU_ANY__ || __SH4__
-+	add   #52, r4
-+	fmov.s fr15, @-r4
-+	fmov.s fr14, @-r4
-+	fmov.s fr13, @-r4
-+	fmov.s fr12, @-r4
-+#else
-+	add   #36, r4
-+#endif
-+	sts.l  pr,   @-r4
-+	mov.l  r15,  @-r4
-+	mov.l  r14,  @-r4
-+	mov.l  r13,  @-r4
-+	mov.l  r12,  @-r4
-+	mov.l  r11,  @-r4
-+	mov.l  r10,  @-r4
-+	mov.l  r9,   @-r4
-+	mov.l  r8,   @-r4
-+	rts
-+	 mov  #0, r0
---- a/src/setjmp/sh/setjmp.s
-+++ /dev/null
-@@ -1,28 +0,0 @@
--.global ___setjmp
--.hidden ___setjmp
--.global __setjmp
--.global _setjmp
--.global setjmp
--.type   __setjmp, @function
--.type   _setjmp,  @function
--.type   setjmp,   @function
--___setjmp:
--__setjmp:
--_setjmp:
--setjmp:
--	add   #52, r4
--	fmov.s fr15, @-r4
--	fmov.s fr14, @-r4
--	fmov.s fr13, @-r4
--	fmov.s fr12, @-r4
--	sts.l  pr,   @-r4
--	mov.l  r15,  @-r4
--	mov.l  r14,  @-r4
--	mov.l  r13,  @-r4
--	mov.l  r12,  @-r4
--	mov.l  r11,  @-r4
--	mov.l  r10,  @-r4
--	mov.l  r9,   @-r4
--	mov.l  r8,   @-r4
--	rts
--	 mov  #0, r0
---- a/src/setjmp/sheb-nofpu/longjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../sh-nofpu/longjmp.s
---- a/src/setjmp/sheb-nofpu/setjmp.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../sh-nofpu/setjmp.s
---- a/src/signal/arm/restore.s
-+++ b/src/signal/arm/restore.s
-@@ -1,3 +1,5 @@
-+.syntax unified
-+
- .global __restore
- .type __restore,%function
- __restore:
---- a/src/signal/arm/sigsetjmp.s
-+++ b/src/signal/arm/sigsetjmp.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .global sigsetjmp
- .global __sigsetjmp
- .type sigsetjmp,%function
---- a/src/signal/sigaction.c
-+++ b/src/signal/sigaction.c
-@@ -17,10 +17,6 @@ void __get_handler_set(sigset_t *set)
- int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
- {
- 	struct k_sigaction ksa, ksa_old;
--	if (sig >= (unsigned)_NSIG) {
--		errno = EINVAL;
--		return -1;
--	}
- 	if (sa) {
- 		if ((uintptr_t)sa->sa_handler > 1UL) {
- 			a_or_l(handler_set+(sig-1)/(8*sizeof(long)),
-@@ -57,7 +53,7 @@ int __libc_sigaction(int sig, const stru
- 
- int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
- {
--	if (sig-32U < 3) {
-+	if (sig-32U < 3 || sig-1U >= _NSIG-1) {
- 		errno = EINVAL;
- 		return -1;
- 	}
---- a/src/signal/sigsetjmp_tail.c
-+++ b/src/signal/sigsetjmp_tail.c
-@@ -2,9 +2,7 @@
- #include <signal.h>
- #include "syscall.h"
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
- int __sigsetjmp_tail(sigjmp_buf jb, int ret)
- {
- 	void *p = jb->__ss;
---- a/src/stdio/getdelim.c
-+++ b/src/stdio/getdelim.c
-@@ -27,17 +27,18 @@ ssize_t getdelim(char **restrict s, size
- 	for (;;) {
- 		z = memchr(f->rpos, delim, f->rend - f->rpos);
- 		k = z ? z - f->rpos + 1 : f->rend - f->rpos;
--		if (i+k >= *n) {
-+		if (i+k+1 >= *n) {
- 			if (k >= SIZE_MAX/2-i) goto oom;
--			*n = i+k+2;
--			if (*n < SIZE_MAX/4) *n *= 2;
--			tmp = realloc(*s, *n);
-+			size_t m = i+k+2;
-+			if (!z && m < SIZE_MAX/4) m += m/2;
-+			tmp = realloc(*s, m);
- 			if (!tmp) {
--				*n = i+k+2;
--				tmp = realloc(*s, *n);
-+				m = i+k+2;
-+				tmp = realloc(*s, m);
- 				if (!tmp) goto oom;
- 			}
- 			*s = tmp;
-+			*n = m;
- 		}
- 		memcpy(*s+i, f->rpos, k);
- 		f->rpos += k;
---- /dev/null
-+++ b/src/string/arm/__aeabi_memclr.c
-@@ -0,0 +1,9 @@
-+#include <string.h>
-+#include "libc.h"
-+
-+void __aeabi_memclr(void *dest, size_t n)
-+{
-+	memset(dest, 0, n);
-+}
-+weak_alias(__aeabi_memclr, __aeabi_memclr4);
-+weak_alias(__aeabi_memclr, __aeabi_memclr8);
---- /dev/null
-+++ b/src/string/arm/__aeabi_memcpy.c
-@@ -0,0 +1,9 @@
-+#include <string.h>
-+#include "libc.h"
-+
-+void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
-+{
-+	memcpy(dest, src, n);
-+}
-+weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
-+weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
---- /dev/null
-+++ b/src/string/arm/__aeabi_memmove.c
-@@ -0,0 +1,9 @@
-+#include <string.h>
-+#include "libc.h"
-+
-+void __aeabi_memmove(void *dest, const void *src, size_t n)
-+{
-+	memmove(dest, src, n);
-+}
-+weak_alias(__aeabi_memmove, __aeabi_memmove4);
-+weak_alias(__aeabi_memmove, __aeabi_memmove8);
---- /dev/null
-+++ b/src/string/arm/__aeabi_memset.c
-@@ -0,0 +1,9 @@
-+#include <string.h>
-+#include "libc.h"
-+
-+void __aeabi_memset(void *dest, size_t n, int c)
-+{
-+	memset(dest, c, n);
-+}
-+weak_alias(__aeabi_memset, __aeabi_memset4);
-+weak_alias(__aeabi_memset, __aeabi_memset8);
---- /dev/null
-+++ b/src/string/arm/memcpy.c
-@@ -0,0 +1,3 @@
-+#if __ARMEB__
-+#include "../memcpy.c"
-+#endif
---- /dev/null
-+++ b/src/string/arm/memcpy_le.S
-@@ -0,0 +1,383 @@
-+#ifndef __ARMEB__
-+
-+/*
-+ * Copyright (C) 2008 The Android Open Source Project
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ *  * Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ *  * Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in
-+ *    the documentation and/or other materials provided with the
-+ *    distribution.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ */
-+
-+
-+/*
-+ * Optimized memcpy() for ARM.
-+ *
-+ * note that memcpy() always returns the destination pointer,
-+ * so we have to preserve R0.
-+  */
-+
-+/*
-+ * This file has been modified from the original for use in musl libc.
-+ * The main changes are: addition of .type memcpy,%function to make the
-+ * code safely callable from thumb mode, adjusting the return
-+ * instructions to be compatible with pre-thumb ARM cpus, and removal
-+ * of prefetch code that is not compatible with older cpus.
-+ */
-+
-+.syntax unified
-+
-+.global memcpy
-+.type memcpy,%function
-+memcpy:
-+	/* The stack must always be 64-bits aligned to be compliant with the
-+	 * ARM ABI. Since we have to save R0, we might as well save R4
-+	 * which we can use for better pipelining of the reads below
-+	 */
-+	.fnstart
-+	.save       {r0, r4, lr}
-+	stmfd       sp!, {r0, r4, lr}
-+	/* Making room for r5-r11 which will be spilled later */
-+	.pad        #28
-+	sub         sp, sp, #28
-+
-+	/* it simplifies things to take care of len<4 early */
-+	cmp     r2, #4
-+	blo     copy_last_3_and_return
-+
-+	/* compute the offset to align the source
-+	 * offset = (4-(src&3))&3 = -src & 3
-+	 */
-+	rsb     r3, r1, #0
-+	ands    r3, r3, #3
-+	beq     src_aligned
-+
-+	/* align source to 32 bits. We need to insert 2 instructions between
-+	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
-+	 * stall 2 cycles.
-+	 */
-+	movs    r12, r3, lsl #31
-+	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
-+	ldrbmi r3, [r1], #1
-+	ldrbcs r4, [r1], #1
-+	ldrbcs r12,[r1], #1
-+	strbmi r3, [r0], #1
-+	strbcs r4, [r0], #1
-+	strbcs r12,[r0], #1
-+
-+src_aligned:
-+
-+	/* see if src and dst are aligned together (congruent) */
-+	eor     r12, r0, r1
-+	tst     r12, #3
-+	bne     non_congruent
-+
-+	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
-+	 * frame. Don't update sp.
-+	 */
-+	stmea   sp, {r5-r11}
-+
-+	/* align the destination to a cache-line */
-+	rsb     r3, r0, #0
-+	ands    r3, r3, #0x1C
-+	beq     congruent_aligned32
-+	cmp     r3, r2
-+	andhi   r3, r2, #0x1C
-+
-+	/* conditionnaly copies 0 to 7 words (length in r3) */
-+	movs    r12, r3, lsl #28
-+	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
-+	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
-+	stmcs   r0!, {r4, r5, r6, r7}
-+	stmmi   r0!, {r8, r9}
-+	tst     r3, #0x4
-+	ldrne   r10,[r1], #4                    /*  4 bytes */
-+	strne   r10,[r0], #4
-+	sub     r2, r2, r3
-+
-+congruent_aligned32:
-+	/*
-+	 * here source is aligned to 32 bytes.
-+	 */
-+
-+cached_aligned32:
-+	subs    r2, r2, #32
-+	blo     less_than_32_left
-+
-+	/*
-+	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
-+	 * stall only until the requested world is fetched, but the linefill
-+	 * continues in the the background.
-+	 * While the linefill is going, we write our previous cache-line
-+	 * into the write-buffer (which should have some free space).
-+	 * When the linefill is done, the writebuffer will
-+	 * start dumping its content into memory
-+	 *
-+	 * While all this is going, we then load a full cache line into
-+	 * 8 registers, this cache line should be in the cache by now
-+	 * (or partly in the cache).
-+	 *
-+	 * This code should work well regardless of the source/dest alignment.
-+	 *
-+	 */
-+
-+	/* Align the preload register to a cache-line because the cpu does
-+	 * "critical word first" (the first word requested is loaded first).
-+	 */
-+	@ bic           r12, r1, #0x1F
-+	@ add           r12, r12, #64
-+
-+1:      ldmia   r1!, { r4-r11 }
-+	subs    r2, r2, #32
-+
-+	/* 
-+	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
-+	 * for ARM9 preload will not be safely guarded by the preceding subs.
-+	 * When it is safely guarded the only possibility to have SIGSEGV here
-+	 * is because the caller overstates the length.
-+	 */
-+	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
-+	stmia   r0!, { r4-r11 }
-+	bhs     1b
-+
-+	add     r2, r2, #32
-+
-+less_than_32_left:
-+	/*
-+	 * less than 32 bytes left at this point (length in r2)
-+	 */
-+
-+	/* skip all this if there is nothing to do, which should
-+	 * be a common case (if not executed the code below takes
-+	 * about 16 cycles)
-+	 */
-+	tst     r2, #0x1F
-+	beq     1f
-+
-+	/* conditionnaly copies 0 to 31 bytes */
-+	movs    r12, r2, lsl #28
-+	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
-+	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
-+	stmcs   r0!, {r4, r5, r6, r7}
-+	stmmi   r0!, {r8, r9}
-+	movs    r12, r2, lsl #30
-+	ldrcs   r3, [r1], #4                    /*  4 bytes */
-+	ldrhmi r4, [r1], #2                     /*  2 bytes */
-+	strcs   r3, [r0], #4
-+	strhmi r4, [r0], #2
-+	tst     r2, #0x1
-+	ldrbne r3, [r1]                         /*  last byte  */
-+	strbne r3, [r0]
-+
-+	/* we're done! restore everything and return */
-+1:      ldmfd   sp!, {r5-r11}
-+	ldmfd   sp!, {r0, r4, lr}
-+	bx      lr
-+
-+	/********************************************************************/
-+
-+non_congruent:
-+	/*
-+	 * here source is aligned to 4 bytes
-+	 * but destination is not.
-+	 *
-+	 * in the code below r2 is the number of bytes read
-+	 * (the number of bytes written is always smaller, because we have
-+	 * partial words in the shift queue)
-+	 */
-+	cmp     r2, #4
-+	blo     copy_last_3_and_return
-+
-+	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
-+	 * frame. Don't update sp.
-+	 */
-+	stmea   sp, {r5-r11}
-+
-+	/* compute shifts needed to align src to dest */
-+	rsb     r5, r0, #0
-+	and     r5, r5, #3                      /* r5 = # bytes in partial words */
-+	mov     r12, r5, lsl #3         /* r12 = right */
-+	rsb     lr, r12, #32            /* lr = left  */
-+
-+	/* read the first word */
-+	ldr     r3, [r1], #4
-+	sub     r2, r2, #4
-+
-+	/* write a partial word (0 to 3 bytes), such that destination
-+	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
-+	 */
-+	movs    r5, r5, lsl #31
-+	strbmi r3, [r0], #1
-+	movmi   r3, r3, lsr #8
-+	strbcs r3, [r0], #1
-+	movcs   r3, r3, lsr #8
-+	strbcs r3, [r0], #1
-+	movcs   r3, r3, lsr #8
-+
-+	cmp     r2, #4
-+	blo     partial_word_tail
-+
-+	/* Align destination to 32 bytes (cache line boundary) */
-+1:      tst     r0, #0x1c
-+	beq     2f
-+	ldr     r5, [r1], #4
-+	sub     r2, r2, #4
-+	orr     r4, r3, r5,             lsl lr
-+	mov     r3, r5,                 lsr r12
-+	str     r4, [r0], #4
-+	cmp     r2, #4
-+	bhs     1b
-+	blo     partial_word_tail
-+
-+	/* copy 32 bytes at a time */
-+2:      subs    r2, r2, #32
-+	blo     less_than_thirtytwo
-+
-+	/* Use immediate mode for the shifts, because there is an extra cycle
-+	 * for register shifts, which could account for up to 50% of
-+	 * performance hit.
-+	 */
-+
-+	cmp     r12, #24
-+	beq     loop24
-+	cmp     r12, #8
-+	beq     loop8
-+
-+loop16:
-+	ldr     r12, [r1], #4
-+1:      mov     r4, r12
-+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-+	subs    r2, r2, #32
-+	ldrhs   r12, [r1], #4
-+	orr     r3, r3, r4, lsl #16
-+	mov     r4, r4, lsr #16
-+	orr     r4, r4, r5, lsl #16
-+	mov     r5, r5, lsr #16
-+	orr     r5, r5, r6, lsl #16
-+	mov     r6, r6, lsr #16
-+	orr     r6, r6, r7, lsl #16
-+	mov     r7, r7, lsr #16
-+	orr     r7, r7, r8, lsl #16
-+	mov     r8, r8, lsr #16
-+	orr     r8, r8, r9, lsl #16
-+	mov     r9, r9, lsr #16
-+	orr     r9, r9, r10, lsl #16
-+	mov     r10, r10,               lsr #16
-+	orr     r10, r10, r11, lsl #16
-+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
-+	mov     r3, r11, lsr #16
-+	bhs     1b
-+	b       less_than_thirtytwo
-+
-+loop8:
-+	ldr     r12, [r1], #4
-+1:      mov     r4, r12
-+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-+	subs    r2, r2, #32
-+	ldrhs   r12, [r1], #4
-+	orr     r3, r3, r4, lsl #24
-+	mov     r4, r4, lsr #8
-+	orr     r4, r4, r5, lsl #24
-+	mov     r5, r5, lsr #8
-+	orr     r5, r5, r6, lsl #24
-+	mov     r6, r6,  lsr #8
-+	orr     r6, r6, r7, lsl #24
-+	mov     r7, r7,  lsr #8
-+	orr     r7, r7, r8,             lsl #24
-+	mov     r8, r8,  lsr #8
-+	orr     r8, r8, r9,             lsl #24
-+	mov     r9, r9,  lsr #8
-+	orr     r9, r9, r10,    lsl #24
-+	mov     r10, r10, lsr #8
-+	orr     r10, r10, r11,  lsl #24
-+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
-+	mov     r3, r11, lsr #8
-+	bhs     1b
-+	b       less_than_thirtytwo
-+
-+loop24:
-+	ldr     r12, [r1], #4
-+1:      mov     r4, r12
-+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-+	subs    r2, r2, #32
-+	ldrhs   r12, [r1], #4
-+	orr     r3, r3, r4, lsl #8
-+	mov     r4, r4, lsr #24
-+	orr     r4, r4, r5, lsl #8
-+	mov     r5, r5, lsr #24
-+	orr     r5, r5, r6, lsl #8
-+	mov     r6, r6, lsr #24
-+	orr     r6, r6, r7, lsl #8
-+	mov     r7, r7, lsr #24
-+	orr     r7, r7, r8, lsl #8
-+	mov     r8, r8, lsr #24
-+	orr     r8, r8, r9, lsl #8
-+	mov     r9, r9, lsr #24
-+	orr     r9, r9, r10, lsl #8
-+	mov     r10, r10, lsr #24
-+	orr     r10, r10, r11, lsl #8
-+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
-+	mov     r3, r11, lsr #24
-+	bhs     1b
-+
-+less_than_thirtytwo:
-+	/* copy the last 0 to 31 bytes of the source */
-+	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
-+	add     r2, r2, #32
-+	cmp     r2, #4
-+	blo     partial_word_tail
-+
-+1:      ldr     r5, [r1], #4
-+	sub     r2, r2, #4
-+	orr     r4, r3, r5,             lsl lr
-+	mov     r3,     r5,                     lsr r12
-+	str     r4, [r0], #4
-+	cmp     r2, #4
-+	bhs     1b
-+
-+partial_word_tail:
-+	/* we have a partial word in the input buffer */
-+	movs    r5, lr, lsl #(31-3)
-+	strbmi r3, [r0], #1
-+	movmi   r3, r3, lsr #8
-+	strbcs r3, [r0], #1
-+	movcs   r3, r3, lsr #8
-+	strbcs r3, [r0], #1
-+
-+	/* Refill spilled registers from the stack. Don't update sp. */
-+	ldmfd   sp, {r5-r11}
-+
-+copy_last_3_and_return:
-+	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
-+	ldrbmi r2, [r1], #1
-+	ldrbcs r3, [r1], #1
-+	ldrbcs r12,[r1]
-+	strbmi r2, [r0], #1
-+	strbcs r3, [r0], #1
-+	strbcs r12,[r0]
-+
-+	/* we're done! restore sp and spilled registers and return */
-+	add     sp,  sp, #28
-+	ldmfd   sp!, {r0, r4, lr}
-+	bx      lr
-+
-+#endif
---- a/src/string/armel/memcpy.s
-+++ /dev/null
-@@ -1,381 +0,0 @@
--/*
-- * Copyright (C) 2008 The Android Open Source Project
-- * All rights reserved.
-- *
-- * Redistribution and use in source and binary forms, with or without
-- * modification, are permitted provided that the following conditions
-- * are met:
-- *  * Redistributions of source code must retain the above copyright
-- *    notice, this list of conditions and the following disclaimer.
-- *  * Redistributions in binary form must reproduce the above copyright
-- *    notice, this list of conditions and the following disclaimer in
-- *    the documentation and/or other materials provided with the
-- *    distribution.
-- *
-- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-- * SUCH DAMAGE.
-- */
--
--
--/*
-- * Optimized memcpy() for ARM.
-- *
-- * note that memcpy() always returns the destination pointer,
-- * so we have to preserve R0.
--  */
--
--/*
-- * This file has been modified from the original for use in musl libc.
-- * The main changes are: addition of .type memcpy,%function to make the
-- * code safely callable from thumb mode, adjusting the return
-- * instructions to be compatible with pre-thumb ARM cpus, and removal
-- * of prefetch code that is not compatible with older cpus.
-- */
--
--.global memcpy
--.type memcpy,%function
--memcpy:
--	/* The stack must always be 64-bits aligned to be compliant with the
--	 * ARM ABI. Since we have to save R0, we might as well save R4
--	 * which we can use for better pipelining of the reads below
--	 */
--	.fnstart
--	.save       {r0, r4, lr}
--	stmfd       sp!, {r0, r4, lr}
--	/* Making room for r5-r11 which will be spilled later */
--	.pad        #28
--	sub         sp, sp, #28
--
--	/* it simplifies things to take care of len<4 early */
--	cmp     r2, #4
--	blo     copy_last_3_and_return
--
--	/* compute the offset to align the source
--	 * offset = (4-(src&3))&3 = -src & 3
--	 */
--	rsb     r3, r1, #0
--	ands    r3, r3, #3
--	beq     src_aligned
--
--	/* align source to 32 bits. We need to insert 2 instructions between
--	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
--	 * stall 2 cycles.
--	 */
--	movs    r12, r3, lsl #31
--	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
--	.word 0x44d13001 /* ldrbmi r3, [r1], #1 */
--	.word 0x24d14001 /* ldrbcs r4, [r1], #1 */
--	.word 0x24d1c001 /* ldrbcs r12,[r1], #1 */
--	.word 0x44c03001 /* strbmi r3, [r0], #1 */
--	.word 0x24c04001 /* strbcs r4, [r0], #1 */
--	.word 0x24c0c001 /* strbcs r12,[r0], #1 */
--
--src_aligned:
--
--	/* see if src and dst are aligned together (congruent) */
--	eor     r12, r0, r1
--	tst     r12, #3
--	bne     non_congruent
--
--	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
--	 * frame. Don't update sp.
--	 */
--	stmea   sp, {r5-r11}
--
--	/* align the destination to a cache-line */
--	rsb     r3, r0, #0
--	ands    r3, r3, #0x1C
--	beq     congruent_aligned32
--	cmp     r3, r2
--	andhi   r3, r2, #0x1C
--
--	/* conditionnaly copies 0 to 7 words (length in r3) */
--	movs    r12, r3, lsl #28
--	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
--	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
--	stmcs   r0!, {r4, r5, r6, r7}
--	stmmi   r0!, {r8, r9}
--	tst     r3, #0x4
--	ldrne   r10,[r1], #4                    /*  4 bytes */
--	strne   r10,[r0], #4
--	sub     r2, r2, r3
--
--congruent_aligned32:
--	/*
--	 * here source is aligned to 32 bytes.
--	 */
--
--cached_aligned32:
--	subs    r2, r2, #32
--	blo     less_than_32_left
--
--	/*
--	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
--	 * stall only until the requested world is fetched, but the linefill
--	 * continues in the the background.
--	 * While the linefill is going, we write our previous cache-line
--	 * into the write-buffer (which should have some free space).
--	 * When the linefill is done, the writebuffer will
--	 * start dumping its content into memory
--	 *
--	 * While all this is going, we then load a full cache line into
--	 * 8 registers, this cache line should be in the cache by now
--	 * (or partly in the cache).
--	 *
--	 * This code should work well regardless of the source/dest alignment.
--	 *
--	 */
--
--	/* Align the preload register to a cache-line because the cpu does
--	 * "critical word first" (the first word requested is loaded first).
--	 */
--	@ bic           r12, r1, #0x1F
--	@ add           r12, r12, #64
--
--1:      ldmia   r1!, { r4-r11 }
--	subs    r2, r2, #32
--
--	/* 
--	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
--	 * for ARM9 preload will not be safely guarded by the preceding subs.
--	 * When it is safely guarded the only possibility to have SIGSEGV here
--	 * is because the caller overstates the length.
--	 */
--	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
--	stmia   r0!, { r4-r11 }
--	bhs     1b
--
--	add     r2, r2, #32
--
--less_than_32_left:
--	/*
--	 * less than 32 bytes left at this point (length in r2)
--	 */
--
--	/* skip all this if there is nothing to do, which should
--	 * be a common case (if not executed the code below takes
--	 * about 16 cycles)
--	 */
--	tst     r2, #0x1F
--	beq     1f
--
--	/* conditionnaly copies 0 to 31 bytes */
--	movs    r12, r2, lsl #28
--	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
--	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
--	stmcs   r0!, {r4, r5, r6, r7}
--	stmmi   r0!, {r8, r9}
--	movs    r12, r2, lsl #30
--	ldrcs   r3, [r1], #4                    /*  4 bytes */
--	.word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /*  2 bytes */
--	strcs   r3, [r0], #4
--	.word 0x40c040b2 /* strhmi r4, [r0], #2 */
--	tst     r2, #0x1
--	.word 0x15d13000 /* ldrbne r3, [r1] */  /*  last byte  */
--	.word 0x15c03000 /* strbne r3, [r0] */
--
--	/* we're done! restore everything and return */
--1:      ldmfd   sp!, {r5-r11}
--	ldmfd   sp!, {r0, r4, lr}
--	tst     lr, #1
--	moveq   pc, lr
--	bx      lr
--
--	/********************************************************************/
--
--non_congruent:
--	/*
--	 * here source is aligned to 4 bytes
--	 * but destination is not.
--	 *
--	 * in the code below r2 is the number of bytes read
--	 * (the number of bytes written is always smaller, because we have
--	 * partial words in the shift queue)
--	 */
--	cmp     r2, #4
--	blo     copy_last_3_and_return
--
--	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
--	 * frame. Don't update sp.
--	 */
--	stmea   sp, {r5-r11}
--
--	/* compute shifts needed to align src to dest */
--	rsb     r5, r0, #0
--	and     r5, r5, #3                      /* r5 = # bytes in partial words */
--	mov     r12, r5, lsl #3         /* r12 = right */
--	rsb     lr, r12, #32            /* lr = left  */
--
--	/* read the first word */
--	ldr     r3, [r1], #4
--	sub     r2, r2, #4
--
--	/* write a partial word (0 to 3 bytes), such that destination
--	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
--	 */
--	movs    r5, r5, lsl #31
--	.word 0x44c03001 /* strbmi r3, [r0], #1 */
--	movmi   r3, r3, lsr #8
--	.word 0x24c03001 /* strbcs r3, [r0], #1 */
--	movcs   r3, r3, lsr #8
--	.word 0x24c03001 /* strbcs r3, [r0], #1 */
--	movcs   r3, r3, lsr #8
--
--	cmp     r2, #4
--	blo     partial_word_tail
--
--	/* Align destination to 32 bytes (cache line boundary) */
--1:      tst     r0, #0x1c
--	beq     2f
--	ldr     r5, [r1], #4
--	sub     r2, r2, #4
--	orr     r4, r3, r5,             lsl lr
--	mov     r3, r5,                 lsr r12
--	str     r4, [r0], #4
--	cmp     r2, #4
--	bhs     1b
--	blo     partial_word_tail
--
--	/* copy 32 bytes at a time */
--2:      subs    r2, r2, #32
--	blo     less_than_thirtytwo
--
--	/* Use immediate mode for the shifts, because there is an extra cycle
--	 * for register shifts, which could account for up to 50% of
--	 * performance hit.
--	 */
--
--	cmp     r12, #24
--	beq     loop24
--	cmp     r12, #8
--	beq     loop8
--
--loop16:
--	ldr     r12, [r1], #4
--1:      mov     r4, r12
--	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
--	subs    r2, r2, #32
--	ldrhs   r12, [r1], #4
--	orr     r3, r3, r4, lsl #16
--	mov     r4, r4, lsr #16
--	orr     r4, r4, r5, lsl #16
--	mov     r5, r5, lsr #16
--	orr     r5, r5, r6, lsl #16
--	mov     r6, r6, lsr #16
--	orr     r6, r6, r7, lsl #16
--	mov     r7, r7, lsr #16
--	orr     r7, r7, r8, lsl #16
--	mov     r8, r8, lsr #16
--	orr     r8, r8, r9, lsl #16
--	mov     r9, r9, lsr #16
--	orr     r9, r9, r10, lsl #16
--	mov     r10, r10,               lsr #16
--	orr     r10, r10, r11, lsl #16
--	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
--	mov     r3, r11, lsr #16
--	bhs     1b
--	b       less_than_thirtytwo
--
--loop8:
--	ldr     r12, [r1], #4
--1:      mov     r4, r12
--	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
--	subs    r2, r2, #32
--	ldrhs   r12, [r1], #4
--	orr     r3, r3, r4, lsl #24
--	mov     r4, r4, lsr #8
--	orr     r4, r4, r5, lsl #24
--	mov     r5, r5, lsr #8
--	orr     r5, r5, r6, lsl #24
--	mov     r6, r6,  lsr #8
--	orr     r6, r6, r7, lsl #24
--	mov     r7, r7,  lsr #8
--	orr     r7, r7, r8,             lsl #24
--	mov     r8, r8,  lsr #8
--	orr     r8, r8, r9,             lsl #24
--	mov     r9, r9,  lsr #8
--	orr     r9, r9, r10,    lsl #24
--	mov     r10, r10, lsr #8
--	orr     r10, r10, r11,  lsl #24
--	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
--	mov     r3, r11, lsr #8
--	bhs     1b
--	b       less_than_thirtytwo
--
--loop24:
--	ldr     r12, [r1], #4
--1:      mov     r4, r12
--	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
--	subs    r2, r2, #32
--	ldrhs   r12, [r1], #4
--	orr     r3, r3, r4, lsl #8
--	mov     r4, r4, lsr #24
--	orr     r4, r4, r5, lsl #8
--	mov     r5, r5, lsr #24
--	orr     r5, r5, r6, lsl #8
--	mov     r6, r6, lsr #24
--	orr     r6, r6, r7, lsl #8
--	mov     r7, r7, lsr #24
--	orr     r7, r7, r8, lsl #8
--	mov     r8, r8, lsr #24
--	orr     r8, r8, r9, lsl #8
--	mov     r9, r9, lsr #24
--	orr     r9, r9, r10, lsl #8
--	mov     r10, r10, lsr #24
--	orr     r10, r10, r11, lsl #8
--	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
--	mov     r3, r11, lsr #24
--	bhs     1b
--
--less_than_thirtytwo:
--	/* copy the last 0 to 31 bytes of the source */
--	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
--	add     r2, r2, #32
--	cmp     r2, #4
--	blo     partial_word_tail
--
--1:      ldr     r5, [r1], #4
--	sub     r2, r2, #4
--	orr     r4, r3, r5,             lsl lr
--	mov     r3,     r5,                     lsr r12
--	str     r4, [r0], #4
--	cmp     r2, #4
--	bhs     1b
--
--partial_word_tail:
--	/* we have a partial word in the input buffer */
--	movs    r5, lr, lsl #(31-3)
--	.word 0x44c03001 /* strbmi r3, [r0], #1 */
--	movmi   r3, r3, lsr #8
--	.word 0x24c03001 /* strbcs r3, [r0], #1 */
--	movcs   r3, r3, lsr #8
--	.word 0x24c03001 /* strbcs r3, [r0], #1 */
--
--	/* Refill spilled registers from the stack. Don't update sp. */
--	ldmfd   sp, {r5-r11}
--
--copy_last_3_and_return:
--	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
--	.word 0x44d12001 /* ldrbmi r2, [r1], #1 */
--	.word 0x24d13001 /* ldrbcs r3, [r1], #1 */
--	.word 0x25d1c000 /* ldrbcs r12,[r1] */
--	.word 0x44c02001 /* strbmi r2, [r0], #1 */
--	.word 0x24c03001 /* strbcs r3, [r0], #1 */
--	.word 0x25c0c000 /* strbcs r12,[r0] */
--
--	/* we're done! restore sp and spilled registers and return */
--	add     sp,  sp, #28
--	ldmfd   sp!, {r0, r4, lr}
--	tst     lr, #1
--	moveq   pc, lr
--	bx      lr
---- a/src/string/armel/memcpy.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--memcpy.s
---- a/src/string/armhf/memcpy.sub
-+++ /dev/null
-@@ -1 +0,0 @@
--../armel/memcpy.s
---- a/src/thread/__syscall_cp.c
-+++ b/src/thread/__syscall_cp.c
-@@ -1,9 +1,7 @@
- #include "pthread_impl.h"
- #include "syscall.h"
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
- long __syscall_cp_c();
- 
- static long sccp(syscall_arg_t nr,
---- a/src/thread/__tls_get_addr.c
-+++ b/src/thread/__tls_get_addr.c
-@@ -1,16 +1,16 @@
- #include <stddef.h>
- #include "pthread_impl.h"
-+#include "libc.h"
-+
-+__attribute__((__visibility__("hidden")))
-+void *__tls_get_new(size_t *);
- 
- void *__tls_get_addr(size_t *v)
- {
- 	pthread_t self = __pthread_self();
--#ifdef SHARED
--	__attribute__((__visibility__("hidden")))
--	void *__tls_get_new(size_t *);
- 	if (v[0]<=(size_t)self->dtv[0])
- 		return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
- 	return __tls_get_new(v);
--#else
--	return (char *)self->dtv[1]+v[1]+DTP_OFFSET;
--#endif
- }
-+
-+weak_alias(__tls_get_addr, __tls_get_new);
---- a/src/thread/aarch64/syscall_cp.s
-+++ b/src/thread/aarch64/syscall_cp.s
-@@ -17,7 +17,7 @@
- __syscall_cp_asm:
- __cp_begin:
- 	ldr w0,[x0]
--	cbnz w0,1f
-+	cbnz w0,__cp_cancel
- 	mov x8,x1
- 	mov x0,x2
- 	mov x1,x3
-@@ -28,6 +28,5 @@ __cp_begin:
- 	svc 0
- __cp_end:
- 	ret
--
--	// cbnz might not be able to jump far enough
--1:	b __cancel
-+__cp_cancel:
-+	b __cancel
---- /dev/null
-+++ b/src/thread/arm/__set_thread_area.c
-@@ -0,0 +1,49 @@
-+#include <stdint.h>
-+#include <elf.h>
-+#include "pthread_impl.h"
-+#include "libc.h"
-+
-+#define HWCAP_TLS (1 << 15)
-+
-+extern const unsigned char __attribute__((__visibility__("hidden")))
-+	__a_barrier_dummy[], __a_barrier_oldkuser[],
-+	__a_barrier_v6[], __a_barrier_v7[],
-+	__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
-+	__a_gettp_dummy[];
-+
-+#define __a_barrier_kuser 0xffff0fa0
-+#define __a_cas_kuser 0xffff0fc0
-+#define __a_gettp_kuser 0xffff0fe0
-+
-+extern uintptr_t __attribute__((__visibility__("hidden")))
-+	__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
-+
-+#define SET(op,ver) (__a_##op##_ptr = \
-+	(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
-+
-+int __set_thread_area(void *p)
-+{
-+#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
-+	if (__hwcap & HWCAP_TLS) {
-+		size_t *aux;
-+		SET(cas, v7);
-+		SET(barrier, v7);
-+		for (aux=libc.auxv; *aux; aux+=2) {
-+			if (*aux != AT_PLATFORM) continue;
-+			const char *s = (void *)aux[1];
-+			if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
-+			SET(cas, v6);
-+			SET(barrier, v6);
-+			break;
-+		}
-+	} else {
-+		int ver = *(int *)0xffff0ffc;
-+		SET(gettp, kuser);
-+		SET(cas, kuser);
-+		SET(barrier, kuser);
-+		if (ver < 2) a_crash();
-+		if (ver < 3) SET(barrier, oldkuser);
-+	}
-+#endif
-+	return __syscall(0xf0005, p);
-+}
---- a/src/thread/arm/__set_thread_area.s
-+++ /dev/null
-@@ -1 +0,0 @@
--/* Replaced by C code in arch/arm/src */
---- a/src/thread/arm/__unmapself.s
-+++ b/src/thread/arm/__unmapself.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .text
- .global __unmapself
- .type   __unmapself,%function
---- /dev/null
-+++ b/src/thread/arm/atomics.s
-@@ -0,0 +1,111 @@
-+.syntax unified
-+.text
-+
-+.global __a_barrier
-+.hidden __a_barrier
-+.type __a_barrier,%function
-+__a_barrier:
-+	ldr ip,1f
-+	ldr ip,[pc,ip]
-+	add pc,pc,ip
-+1:	.word __a_barrier_ptr-1b
-+.global __a_barrier_dummy
-+.hidden __a_barrier_dummy
-+__a_barrier_dummy:
-+	bx lr
-+.global __a_barrier_oldkuser
-+.hidden __a_barrier_oldkuser
-+__a_barrier_oldkuser:
-+	push {r0,r1,r2,r3,ip,lr}
-+	mov r1,r0
-+	mov r2,sp
-+	ldr ip,=0xffff0fc0
-+	mov lr,pc
-+	mov pc,ip
-+	pop {r0,r1,r2,r3,ip,lr}
-+	bx lr
-+.global __a_barrier_v6
-+.hidden __a_barrier_v6
-+__a_barrier_v6:
-+	mcr p15,0,r0,c7,c10,5
-+	bx lr
-+.global __a_barrier_v7
-+.hidden __a_barrier_v7
-+__a_barrier_v7:
-+	.word 0xf57ff05b        /* dmb ish */
-+	bx lr
-+
-+.global __a_cas
-+.hidden __a_cas
-+.type __a_cas,%function
-+__a_cas:
-+	ldr ip,1f
-+	ldr ip,[pc,ip]
-+	add pc,pc,ip
-+1:	.word __a_cas_ptr-1b
-+.global __a_cas_dummy
-+.hidden __a_cas_dummy
-+__a_cas_dummy:
-+	mov r3,r0
-+	ldr r0,[r2]
-+	subs r0,r3,r0
-+	streq r1,[r2]
-+	bx lr
-+.global __a_cas_v6
-+.hidden __a_cas_v6
-+__a_cas_v6:
-+	mov r3,r0
-+	mcr p15,0,r0,c7,c10,5
-+1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
-+	subs r0,r3,r0
-+	.word 0x01820f91        /* strexeq r0,r1,[r2] */
-+	teqeq r0,#1
-+	beq 1b
-+	mcr p15,0,r0,c7,c10,5
-+	bx lr
-+.global __a_cas_v7
-+.hidden __a_cas_v7
-+__a_cas_v7:
-+	mov r3,r0
-+	.word 0xf57ff05b        /* dmb ish */
-+1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
-+	subs r0,r3,r0
-+	.word 0x01820f91        /* strexeq r0,r1,[r2] */
-+	teqeq r0,#1
-+	beq 1b
-+	.word 0xf57ff05b        /* dmb ish */
-+	bx lr
-+
-+.global __aeabi_read_tp
-+.type __aeabi_read_tp,%function
-+__aeabi_read_tp:
-+
-+.global __a_gettp
-+.hidden __a_gettp
-+.type __a_gettp,%function
-+__a_gettp:
-+	ldr r0,1f
-+	ldr r0,[pc,r0]
-+	add pc,pc,r0
-+1:	.word __a_gettp_ptr-1b
-+.global __a_gettp_dummy
-+.hidden __a_gettp_dummy
-+__a_gettp_dummy:
-+	mrc p15,0,r0,c13,c0,3
-+	bx lr
-+
-+.data
-+.global __a_barrier_ptr
-+.hidden __a_barrier_ptr
-+__a_barrier_ptr:
-+	.word 0
-+
-+.global __a_cas_ptr
-+.hidden __a_cas_ptr
-+__a_cas_ptr:
-+	.word 0
-+
-+.global __a_gettp_ptr
-+.hidden __a_gettp_ptr
-+__a_gettp_ptr:
-+	.word 0
---- a/src/thread/arm/clone.s
-+++ b/src/thread/arm/clone.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .text
- .global __clone
- .type   __clone,%function
-@@ -15,8 +16,6 @@ __clone:
- 	tst r0,r0
- 	beq 1f
- 	ldmfd sp!,{r4,r5,r6,r7}
--	tst lr,#1
--	moveq pc,lr
- 	bx lr
- 
- 1:	mov r0,r6
---- a/src/thread/arm/syscall_cp.s
-+++ b/src/thread/arm/syscall_cp.s
-@@ -1,3 +1,4 @@
-+.syntax unified
- .global __cp_begin
- .hidden __cp_begin
- .global __cp_end
-@@ -22,8 +23,6 @@ __cp_begin:
- 	svc 0
- __cp_end:
- 	ldmfd sp!,{r4,r5,r6,r7,lr}
--	tst lr,#1
--	moveq pc,lr
- 	bx lr
- __cp_cancel:
- 	ldmfd sp!,{r4,r5,r6,r7,lr}
---- a/src/thread/microblaze/syscall_cp.s
-+++ b/src/thread/microblaze/syscall_cp.s
-@@ -11,7 +11,7 @@
- __syscall_cp_asm:
- __cp_begin:
- 	lwi     r5, r5, 0
--	bnei    r5, __cancel
-+	bnei    r5, __cp_cancel
- 	addi    r12, r6, 0
- 	add     r5, r7, r0
- 	add     r6, r8, r0
-@@ -23,3 +23,5 @@ __cp_begin:
- __cp_end:
- 	rtsd    r15, 8
- 	nop
-+__cp_cancel:
-+	bri     __cancel
---- a/src/thread/or1k/syscall_cp.s
-+++ b/src/thread/or1k/syscall_cp.s
-@@ -12,7 +12,7 @@ __syscall_cp_asm:
- __cp_begin:
- 	l.lwz	r3, 0(r3)
- 	l.sfeqi	r3, 0
--	l.bnf	__cancel
-+	l.bnf	__cp_cancel
- 	 l.ori	r11, r4, 0
- 	l.ori	r3, r5, 0
- 	l.ori	r4, r6, 0
-@@ -24,3 +24,6 @@ __cp_begin:
- __cp_end:
- 	l.jr	r9
- 	 l.nop
-+__cp_cancel:
-+	l.j	__cancel
-+	 l.nop
---- a/src/thread/powerpc/syscall_cp.s
-+++ b/src/thread/powerpc/syscall_cp.s
-@@ -38,7 +38,7 @@ __cp_begin:
- 	cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7. 
- 	beq+ cr7, 1f #jump to label 1 if r0 was 0
- 	
--	b __cancel #else call cancel 
-+	b __cp_cancel #else call cancel
- 1:
- 	#ok, the cancel flag was not set
- 	# syscall: number goes to r0, the rest 3-8
-@@ -55,3 +55,5 @@ __cp_end:
- 	#else negate result.
- 	neg 3, 3
- 	blr
-+__cp_cancel:
-+	b __cancel
---- a/src/thread/pthread_cancel.c
-+++ b/src/thread/pthread_cancel.c
-@@ -1,12 +1,11 @@
-+#define _GNU_SOURCE
- #include <string.h>
- #include "pthread_impl.h"
- #include "syscall.h"
- #include "libc.h"
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
--long __cancel(), __cp_cancel(), __syscall_cp_asm(), __syscall_cp_c();
-+long __cancel(), __syscall_cp_asm(), __syscall_cp_c();
- 
- long __cancel()
- {
-@@ -17,12 +16,6 @@ long __cancel()
- 	return -ECANCELED;
- }
- 
--/* If __syscall_cp_asm has adjusted the stack pointer, it must provide a
-- * definition of __cp_cancel to undo those adjustments and call __cancel.
-- * Otherwise, __cancel provides a definition for __cp_cancel. */
--
--weak_alias(__cancel, __cp_cancel);
--
- long __syscall_cp_asm(volatile void *, syscall_arg_t,
-                       syscall_arg_t, syscall_arg_t, syscall_arg_t,
-                       syscall_arg_t, syscall_arg_t, syscall_arg_t);
-@@ -52,24 +45,22 @@ static void _sigaddset(sigset_t *set, in
- 	set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1);
- }
- 
--#ifdef SHARED
- __attribute__((__visibility__("hidden")))
--#endif
--extern const char __cp_begin[1], __cp_end[1];
-+extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1];
- 
- static void cancel_handler(int sig, siginfo_t *si, void *ctx)
- {
- 	pthread_t self = __pthread_self();
- 	ucontext_t *uc = ctx;
--	const char *ip = ((char **)&uc->uc_mcontext)[CANCEL_REG_IP];
-+	uintptr_t pc = uc->uc_mcontext.MC_PC;
- 
- 	a_barrier();
- 	if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return;
- 
- 	_sigaddset(&uc->uc_sigmask, SIGCANCEL);
- 
--	if (self->cancelasync || ip >= __cp_begin && ip < __cp_end) {
--		((char **)&uc->uc_mcontext)[CANCEL_REG_IP] = (char *)__cp_cancel;
-+	if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
-+		uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel;
- 		return;
- 	}
- 
---- /dev/null
-+++ b/src/thread/sh/__set_thread_area.c
-@@ -0,0 +1,40 @@
-+#include "pthread_impl.h"
-+#include "libc.h"
-+#include <elf.h>
-+
-+/* Also perform sh-specific init */
-+
-+#define CPU_HAS_LLSC 0x0040
-+#define CPU_HAS_CAS_L 0x0400
-+
-+__attribute__((__visibility__("hidden")))
-+extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[];
-+
-+__attribute__((__visibility__("hidden")))
-+const void *__sh_cas_ptr;
-+
-+__attribute__((__visibility__("hidden")))
-+unsigned __sh_nommu;
-+
-+int __set_thread_area(void *p)
-+{
-+	size_t *aux;
-+	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
-+#ifndef __SH4A__
-+	__sh_cas_ptr = __sh_cas_gusa;
-+#if !defined(__SH3__) && !defined(__SH4__)
-+	for (aux=libc.auxv; *aux; aux+=2) {
-+		if (*aux != AT_PLATFORM) continue;
-+		const char *s = (void *)aux[1];
-+		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
-+		__sh_cas_ptr = __sh_cas_imask;
-+		__sh_nommu = 1;
-+	}
-+#endif
-+	if (__hwcap & CPU_HAS_CAS_L)
-+		__sh_cas_ptr = __sh_cas_cas_l;
-+	else if (__hwcap & CPU_HAS_LLSC)
-+		__sh_cas_ptr = __sh_cas_llsc;
-+#endif
-+	return 0;
-+}
---- /dev/null
-+++ b/src/thread/sh/atomics.s
-@@ -0,0 +1,65 @@
-+/* Contract for all versions is same as cas.l r2,r3,@r0
-+ * pr and r1 are also clobbered (by jsr & r1 as temp).
-+ * r0,r2,r4-r15 must be preserved.
-+ * r3 contains result (==r2 iff cas succeeded). */
-+
-+	.align 2
-+.global __sh_cas_gusa
-+.hidden __sh_cas_gusa
-+__sh_cas_gusa:
-+	mov.l r5,@-r15
-+	mov.l r4,@-r15
-+	mov r0,r4
-+	mova 1f,r0
-+	mov r15,r1
-+	mov #(0f-1f),r15
-+0:	mov.l @r4,r5
-+	cmp/eq r5,r2
-+	bf 1f
-+	mov.l r3,@r4
-+1:	mov r1,r15
-+	mov r5,r3
-+	mov r4,r0
-+	mov.l @r15+,r4
-+	rts
-+	 mov.l @r15+,r5
-+
-+.global __sh_cas_llsc
-+.hidden __sh_cas_llsc
-+__sh_cas_llsc:
-+	mov r0,r1
-+	synco
-+0:	movli.l @r1,r0
-+	cmp/eq r0,r2
-+	bf 1f
-+	mov r3,r0
-+	movco.l r0,@r1
-+	bf 0b
-+	mov r2,r0
-+1:	synco
-+	mov r0,r3
-+	rts
-+	 mov r1,r0
-+
-+.global __sh_cas_imask
-+.hidden __sh_cas_imask
-+__sh_cas_imask:
-+	mov r0,r1
-+	stc sr,r0
-+	mov.l r0,@-r15
-+	or #0xf0,r0
-+	ldc r0,sr
-+	mov.l @r1,r0
-+	cmp/eq r0,r2
-+	bf 1f
-+	mov.l r3,@r1
-+1:	ldc.l @r15+,sr
-+	mov r0,r3
-+	rts
-+	 mov r1,r0
-+
-+.global __sh_cas_cas_l
-+.hidden __sh_cas_cas_l
-+__sh_cas_cas_l:
-+	rts
-+	 .word 0x2323 /* cas.l r2,r3,@r0 */
---- a/src/thread/sh/syscall_cp.s
-+++ b/src/thread/sh/syscall_cp.s
-@@ -14,17 +14,8 @@ __syscall_cp_asm:
- __cp_begin:
- 	mov.l @r4, r4
- 	tst   r4, r4
--	bt    2f
--
--	mov.l L1, r0
--	braf  r0
--	 nop
--1:
--
--.align 2
--L1:	.long __cancel@PLT-(1b-.)
--
--2:	mov   r5, r3
-+	bf    __cp_cancel
-+	mov   r5, r3
- 	mov   r6, r4
- 	mov   r7, r5
- 	mov.l @r15, r6
-@@ -43,3 +34,12 @@ __cp_end:
- 
- 	rts
- 	 nop
-+
-+__cp_cancel:
-+	mov.l 2f, r0
-+	braf  r0
-+	 nop
-+1:
-+
-+.align 2
-+2:	.long __cancel@PCREL-(1b-.)
---- a/src/thread/x32/syscall_cp.s
-+++ b/src/thread/x32/syscall_cp.s
-@@ -14,7 +14,7 @@ __syscall_cp_internal:
- __cp_begin:
- 	mov (%rdi),%eax
- 	test %eax,%eax
--	jnz __cancel
-+	jnz __cp_cancel
- 	mov %rdi,%r11
- 	mov %rsi,%rax
- 	mov %rdx,%rdi
-@@ -27,3 +27,5 @@ __cp_begin:
- 	syscall
- __cp_end:
- 	ret
-+__cp_cancel:
-+	jmp __cancel
---- a/src/thread/x86_64/syscall_cp.s
-+++ b/src/thread/x86_64/syscall_cp.s
-@@ -14,7 +14,7 @@ __syscall_cp_asm:
- __cp_begin:
- 	mov (%rdi),%eax
- 	test %eax,%eax
--	jnz __cancel
-+	jnz __cp_cancel
- 	mov %rdi,%r11
- 	mov %rsi,%rax
- 	mov %rdx,%rdi
-@@ -27,3 +27,5 @@ __cp_begin:
- 	syscall
- __cp_end:
- 	ret
-+__cp_cancel:
-+	jmp __cancel
diff --git a/toolchain/musl/patches/000-update-to-git-2016-01-30.patch b/toolchain/musl/patches/000-update-to-git-2016-01-30.patch
new file mode 100644
index 00000000000..639a500756c
--- /dev/null
+++ b/toolchain/musl/patches/000-update-to-git-2016-01-30.patch
@@ -0,0 +1,17861 @@
+--- a/.gitignore
++++ b/.gitignore
+@@ -5,9 +5,6 @@
+ *.so.1
+ arch/*/bits/alltypes.h
+ config.mak
+-include/bits
+-tools/musl-gcc
+-tools/musl-clang
+-tools/ld.musl-clang
+ lib/musl-gcc.specs
+ src/internal/version.h
++/obj/
+--- a/Makefile
++++ b/Makefile
+@@ -8,6 +8,7 @@
+ # Do not make changes here.
+ #
+ 
++srcdir = .
+ exec_prefix = /usr/local
+ bindir = $(exec_prefix)/bin
+ 
+@@ -16,31 +17,42 @@ includedir = $(prefix)/include
+ libdir = $(prefix)/lib
+ syslibdir = /lib
+ 
+-SRCS = $(sort $(wildcard src/*/*.c arch/$(ARCH)/src/*.c))
+-OBJS = $(SRCS:.c=.o)
++BASE_SRCS = $(sort $(wildcard $(srcdir)/src/*/*.c))
++BASE_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(BASE_SRCS)))
++ARCH_SRCS = $(wildcard $(srcdir)/src/*/$(ARCH)/*.[csS])
++ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS)))
++REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS)))
++LDSO_SRCS = $(sort $(wildcard $(srcdir)/ldso/*.c))
++LDSO_OBJS = $(patsubst $(srcdir)/%,obj/%.lo,$(basename $(LDSO_SRCS)))
++OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS))))
++AOBJS = $(OBJS)
+ LOBJS = $(OBJS:.o=.lo)
+-GENH = include/bits/alltypes.h
+-GENH_INT = src/internal/version.h
+-IMPH = src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h
++GENH = obj/include/bits/alltypes.h
++GENH_INT = obj/src/internal/version.h
++IMPH = $(addprefix $(srcdir)/, src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h)
+ 
+-LDFLAGS = 
++LDFLAGS =
++LDFLAGS_AUTO =
+ LIBCC = -lgcc
+ CPPFLAGS =
+-CFLAGS = -Os -pipe
++CFLAGS =
++CFLAGS_AUTO = -Os -pipe
+ CFLAGS_C99FSE = -std=c99 -ffreestanding -nostdinc 
+ 
+ CFLAGS_ALL = $(CFLAGS_C99FSE)
+-CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I./arch/$(ARCH) -I./src/internal -I./include
+-CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS)
+-CFLAGS_ALL_STATIC = $(CFLAGS_ALL)
+-CFLAGS_ALL_SHARED = $(CFLAGS_ALL) -fPIC -DSHARED
++CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I$(srcdir)/arch/$(ARCH) -I$(srcdir)/arch/generic -Iobj/src/internal -I$(srcdir)/src/internal -Iobj/include -I$(srcdir)/include
++CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS_AUTO) $(CFLAGS)
++
++LDFLAGS_ALL = $(LDFLAGS_AUTO) $(LDFLAGS)
+ 
+ AR      = $(CROSS_COMPILE)ar
+ RANLIB  = $(CROSS_COMPILE)ranlib
+-INSTALL = ./tools/install.sh
++INSTALL = $(srcdir)/tools/install.sh
+ 
+-ARCH_INCLUDES = $(wildcard arch/$(ARCH)/bits/*.h)
+-ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH) $(ARCH_INCLUDES:arch/$(ARCH)/%=include/%))
++ARCH_INCLUDES = $(wildcard $(srcdir)/arch/$(ARCH)/bits/*.h)
++GENERIC_INCLUDES = $(wildcard $(srcdir)/arch/generic/bits/*.h)
++INCLUDES = $(wildcard $(srcdir)/include/*.h $(srcdir)/include/*/*.h)
++ALL_INCLUDES = $(sort $(INCLUDES:$(srcdir)/%=%) $(GENH:obj/%=%) $(ARCH_INCLUDES:$(srcdir)/arch/$(ARCH)/%=include/%) $(GENERIC_INCLUDES:$(srcdir)/arch/generic/%=include/%))
+ 
+ EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl
+ EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a)
+@@ -49,7 +61,7 @@ STATIC_LIBS = lib/libc.a
+ SHARED_LIBS = lib/libc.so
+ TOOL_LIBS = lib/musl-gcc.specs
+ ALL_LIBS = $(CRT_LIBS) $(STATIC_LIBS) $(SHARED_LIBS) $(EMPTY_LIBS) $(TOOL_LIBS)
+-ALL_TOOLS = tools/musl-gcc
++ALL_TOOLS = obj/musl-gcc
+ 
+ WRAPCC_GCC = gcc
+ WRAPCC_CLANG = clang
+@@ -58,122 +70,128 @@ LDSO_PATHNAME = $(syslibdir)/ld-musl-$(A
+ 
+ -include config.mak
+ 
++ifeq ($(ARCH),)
++$(error Please set ARCH in config.mak before running make.)
++endif
++
+ all: $(ALL_LIBS) $(ALL_TOOLS)
+ 
++OBJ_DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_LIBS) $(ALL_TOOLS) $(OBJS) $(LDSO_OBJS) $(GENH) $(GENH_INT))) $(addprefix obj/, crt crt/$(ARCH) include))
++
++$(ALL_LIBS) $(ALL_TOOLS) $(CRT_LIBS:lib/%=obj/crt/%) $(OBJS) $(LOBJS) $(GENH) $(GENH_INT): | $(OBJ_DIRS)
++
++$(OBJ_DIRS):
++	mkdir -p $@
++
+ install: install-libs install-headers install-tools
+ 
+ clean:
+-	rm -f crt/*.o
+-	rm -f $(OBJS)
+-	rm -f $(LOBJS)
+-	rm -f $(ALL_LIBS) lib/*.[ao] lib/*.so
+-	rm -f $(ALL_TOOLS)
+-	rm -f $(GENH) $(GENH_INT)
+-	rm -f include/bits
++	rm -rf obj lib
+ 
+ distclean: clean
+ 	rm -f config.mak
+ 
+-include/bits:
+-	@test "$(ARCH)" || { echo "Please set ARCH in config.mak before running make." ; exit 1 ; }
+-	ln -sf ../arch/$(ARCH)/bits $@
++obj/include/bits/alltypes.h: $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in $(srcdir)/tools/mkalltypes.sed
++	sed -f $(srcdir)/tools/mkalltypes.sed $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in > $@
+ 
+-include/bits/alltypes.h.in: include/bits
++obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git)
++	printf '#define VERSION "%s"\n' "$$(cd $(srcdir); sh tools/version.sh)" > $@
+ 
+-include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/mkalltypes.sed
+-	sed -f tools/mkalltypes.sed include/bits/alltypes.h.in include/alltypes.h.in > $@
++obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h
+ 
+-src/internal/version.h: $(wildcard VERSION .git)
+-	printf '#define VERSION "%s"\n' "$$(sh tools/version.sh)" > $@
++obj/crt/rcrt1.o obj/ldso/dlstart.lo obj/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
+ 
+-src/internal/version.lo: src/internal/version.h
++obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
+ 
+-crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
++obj/crt/rcrt1.o: $(srcdir)/ldso/dlstart.c
+ 
+-crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h)
++obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC
+ 
+-crt/rcrt1.o: src/ldso/dlstart.c
++obj/crt/$(ARCH)/crti.o: $(srcdir)/crt/$(ARCH)/crti.s
+ 
+-crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC
++obj/crt/$(ARCH)/crtn.o: $(srcdir)/crt/$(ARCH)/crtn.s
+ 
+-OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
+-$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
++OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%))
++$(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3
+ 
+ MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c
+-$(MEMOPS_SRCS:%.c=%.o) $(MEMOPS_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_MEMOPS)
++$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS)
+ 
+ NOSSP_SRCS = $(wildcard crt/*.c) \
+ 	src/env/__libc_start_main.c src/env/__init_tls.c \
+-	src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \
+-	src/string/memset.c src/string/memcpy.c \
+-	src/ldso/dlstart.c src/ldso/dynlink.c
+-$(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP)
++	src/env/__stack_chk_fail.c \
++	src/thread/__set_thread_area.c src/thread/$(ARCH)/__set_thread_area.c \
++	src/string/memset.c src/string/$(ARCH)/memset.c \
++	src/string/memcpy.c src/string/$(ARCH)/memcpy.c \
++	ldso/dlstart.c ldso/dynlink.c
++$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
+ 
+-$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT
++$(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT
+ 
+-# This incantation ensures that changes to any subarch asm files will
+-# force the corresponding object file to be rebuilt, even if the implicit
+-# rule below goes indirectly through a .sub file.
+-define mkasmdep
+-$(dir $(patsubst %/,%,$(dir $(1))))$(notdir $(1:.s=.o)): $(1)
+-endef
+-$(foreach s,$(wildcard src/*/$(ARCH)*/*.s),$(eval $(call mkasmdep,$(s))))
++$(LOBJS) $(LDSO_OBJS): CFLAGS_ALL += -fPIC
++
++CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $<
+ 
+ # Choose invocation of assembler to be used
+-# $(1) is input file, $(2) is output file, $(3) is assembler flags
+ ifeq ($(ADD_CFI),yes)
+-	AS_CMD = LC_ALL=C awk -f tools/add-cfi.common.awk -f tools/add-cfi.$(ARCH).awk $< | $(CC) -x assembler -c -o $@ -
++	AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
+ else
+-	AS_CMD = $(CC) -c -o $@ $<
++	AS_CMD = $(CC_CMD)
+ endif
+ 
+-%.o: $(ARCH)$(ASMSUBARCH)/%.sub
+-	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $(dir $<)$(shell cat $<)
++obj/%.o: $(srcdir)/%.s
++	$(AS_CMD)
+ 
+-%.o: $(ARCH)/%.s
+-	$(AS_CMD) $(CFLAGS_ALL_STATIC)
++obj/%.o: $(srcdir)/%.S
++	$(CC_CMD)
+ 
+-%.o: %.c $(GENH) $(IMPH)
+-	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
++obj/%.o: $(srcdir)/%.c $(GENH) $(IMPH)
++	$(CC_CMD)
+ 
+-%.lo: $(ARCH)$(ASMSUBARCH)/%.sub
+-	$(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $(dir $<)$(shell cat $<)
++obj/%.lo: $(srcdir)/%.s
++	$(AS_CMD)
+ 
+-%.lo: $(ARCH)/%.s
+-	$(AS_CMD) $(CFLAGS_ALL_SHARED)
++obj/%.lo: $(srcdir)/%.S
++	$(CC_CMD)
+ 
+-%.lo: %.c $(GENH) $(IMPH)
+-	$(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $<
++obj/%.lo: $(srcdir)/%.c $(GENH) $(IMPH)
++	$(CC_CMD)
+ 
+-lib/libc.so: $(LOBJS)
+-	$(CC) $(CFLAGS_ALL_SHARED) $(LDFLAGS) -nostdlib -shared \
++lib/libc.so: $(LOBJS) $(LDSO_OBJS)
++	$(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -nostdlib -shared \
+ 	-Wl,-e,_dlstart -Wl,-Bsymbolic-functions \
+-	-o $@ $(LOBJS) $(LIBCC)
++	-o $@ $(LOBJS) $(LDSO_OBJS) $(LIBCC)
+ 
+-lib/libc.a: $(OBJS)
++lib/libc.a: $(AOBJS)
+ 	rm -f $@
+-	$(AR) rc $@ $(OBJS)
++	$(AR) rc $@ $(AOBJS)
+ 	$(RANLIB) $@
+ 
+ $(EMPTY_LIBS):
+ 	rm -f $@
+ 	$(AR) rc $@
+ 
+-lib/%.o: crt/%.o
++lib/%.o: obj/crt/%.o
+ 	cp $< $@
+ 
+-lib/musl-gcc.specs: tools/musl-gcc.specs.sh config.mak
++lib/crti.o: obj/crt/$(ARCH)/crti.o
++	cp $< $@
++
++lib/crtn.o: obj/crt/$(ARCH)/crtn.o
++	cp $< $@
++
++lib/musl-gcc.specs: $(srcdir)/tools/musl-gcc.specs.sh config.mak
+ 	sh $< "$(includedir)" "$(libdir)" "$(LDSO_PATHNAME)" > $@
+ 
+-tools/musl-gcc: config.mak
++obj/musl-gcc: config.mak
+ 	printf '#!/bin/sh\nexec "$${REALGCC:-$(WRAPCC_GCC)}" "$$@" -specs "%s/musl-gcc.specs"\n' "$(libdir)" > $@
+ 	chmod +x $@
+ 
+-tools/%-clang: tools/%-clang.in config.mak
++obj/%-clang: $(srcdir)/tools/%-clang.in config.mak
+ 	sed -e 's!@CC@!$(WRAPCC_CLANG)!g' -e 's!@PREFIX@!$(prefix)!g' -e 's!@INCDIR@!$(includedir)!g' -e 's!@LIBDIR@!$(libdir)!g' -e 's!@LDSO@!$(LDSO_PATHNAME)!g' $< > $@
+ 	chmod +x $@
+ 
+-$(DESTDIR)$(bindir)/%: tools/%
++$(DESTDIR)$(bindir)/%: obj/%
+ 	$(INSTALL) -D $< $@
+ 
+ $(DESTDIR)$(libdir)/%.so: lib/%.so
+@@ -182,10 +200,16 @@ $(DESTDIR)$(libdir)/%.so: lib/%.so
+ $(DESTDIR)$(libdir)/%: lib/%
+ 	$(INSTALL) -D -m 644 $< $@
+ 
+-$(DESTDIR)$(includedir)/bits/%: arch/$(ARCH)/bits/%
++$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/$(ARCH)/bits/%
+ 	$(INSTALL) -D -m 644 $< $@
+ 
+-$(DESTDIR)$(includedir)/%: include/%
++$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/generic/bits/%
++	$(INSTALL) -D -m 644 $< $@
++
++$(DESTDIR)$(includedir)/bits/%: obj/include/bits/%
++	$(INSTALL) -D -m 644 $< $@
++
++$(DESTDIR)$(includedir)/%: $(srcdir)/include/%
+ 	$(INSTALL) -D -m 644 $< $@
+ 
+ $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so
+@@ -195,12 +219,12 @@ install-libs: $(ALL_LIBS:lib/%=$(DESTDIR
+ 
+ install-headers: $(ALL_INCLUDES:include/%=$(DESTDIR)$(includedir)/%)
+ 
+-install-tools: $(ALL_TOOLS:tools/%=$(DESTDIR)$(bindir)/%)
++install-tools: $(ALL_TOOLS:obj/%=$(DESTDIR)$(bindir)/%)
+ 
+ musl-git-%.tar.gz: .git
+-	 git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
++	 git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
+ 
+ musl-%.tar.gz: .git
+-	 git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
++	 git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
+ 
+ .PHONY: all clean install install-libs install-headers install-tools
+--- a/arch/aarch64/atomic.h
++++ /dev/null
+@@ -1,206 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	__asm__(
+-		"	rbit %0, %1\n"
+-		"	clz %0, %0\n"
+-		: "=r"(x) : "r"(x));
+-	return x;
+-}
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	return a_ctz_64(x);
+-}
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__("dmb ish");
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	void *old;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %0,%3\n"
+-		"	cmp %0,%1\n"
+-		"	b.ne 1f\n"
+-		"	stxr %w0,%2,%3\n"
+-		"	cbnz %w0,1b\n"
+-		"	mov %0,%1\n"
+-		"1:	dmb ish\n"
+-		: "=&r"(old)
+-		: "r"(t), "r"(s), "Q"(*(long*)p)
+-		: "memory", "cc");
+-	return old;
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	int old;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%3\n"
+-		"	cmp %w0,%w1\n"
+-		"	b.ne 1f\n"
+-		"	stxr %w0,%w2,%3\n"
+-		"	cbnz %w0,1b\n"
+-		"	mov %w0,%w1\n"
+-		"1:	dmb ish\n"
+-		: "=&r"(old)
+-		: "r"(t), "r"(s), "Q"(*p)
+-		: "memory", "cc");
+-	return old;
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old, tmp;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%3\n"
+-		"	stxr %w1,%w2,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old, tmp;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%3\n"
+-		"	add %w0,%w0,%w2\n"
+-		"	stxr %w1,%w0,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-	return old-v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%2\n"
+-		"	add %w0,%w0,#1\n"
+-		"	stxr %w1,%w0,%2\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%2\n"
+-		"	sub %w0,%w0,#1\n"
+-		"	stxr %w1,%w0,%2\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %0,%3\n"
+-		"	and %0,%0,%2\n"
+-		"	stxr %w1,%0,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*p)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%3\n"
+-		"	and %w0,%w0,%w2\n"
+-		"	stxr %w1,%w0,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*p)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %0,%3\n"
+-		"	orr %0,%0,%2\n"
+-		"	stxr %w1,%0,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*p)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	return a_or_64(p, v);
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldxr %w0,%3\n"
+-		"	orr %w0,%w0,%w2\n"
+-		"	stxr %w1,%w0,%3\n"
+-		"	cbnz %w1,1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*p)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"	str %w1,%0\n"
+-		"	dmb ish\n"
+-		: "=m"(*p)
+-		: "r"(x)
+-		: "memory", "cc" );
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-
+-#endif
+--- /dev/null
++++ b/arch/aarch64/atomic_arch.h
+@@ -0,0 +1,73 @@
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ ("ldaxr %w0,%1" : "=r"(v) : "Q"(*p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ ("stlxr %w0,%w1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
++	return !r;
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__ ("dmb ish" : : : "memory");
++}
++
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	int old;
++	do {
++		old = a_ll(p);
++		if (old != t) {
++			a_barrier();
++			break;
++		}
++	} while (!a_sc(p, s));
++	return old;
++}
++
++static inline void *a_ll_p(volatile void *p)
++{
++	void *v;
++	__asm__ __volatile__ ("ldaxr %0, %1" : "=r"(v) : "Q"(*(void *volatile *)p));
++	return v;
++}
++
++static inline int a_sc_p(volatile int *p, void *v)
++{
++	int r;
++	__asm__ __volatile__ ("stlxr %w0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*(void *volatile *)p) : "memory");
++	return !r;
++}
++
++#define a_cas_p a_cas_p
++static inline void *a_cas_p(volatile void *p, void *t, void *s)
++{
++	void *old;
++	do {
++		old = a_ll_p(p);
++		if (old != t) {
++			a_barrier();
++			break;
++		}
++	} while (!a_sc_p(p, s));
++	return old;
++}
++
++#define a_ctz_64 a_ctz_64
++static inline int a_ctz_64(uint64_t x)
++{
++	__asm__(
++		"	rbit %0, %1\n"
++		"	clz %0, %0\n"
++		: "=r"(x) : "r"(x));
++	return x;
++}
+--- a/arch/aarch64/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/aarch64/bits/mman.h
++++ b/arch/aarch64/bits/mman.h
+@@ -36,6 +36,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/aarch64/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/aarch64/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/aarch64/bits/syscall.h
++++ b/arch/aarch64/bits/syscall.h
+@@ -265,6 +265,9 @@
+ #define __NR_memfd_create 279
+ #define __NR_bpf 280
+ #define __NR_execveat 281
++#define __NR_userfaultfd 282
++#define __NR_membarrier 283
++#define __NR_mlock2 284
+ 
+ #define SYS_io_setup __NR_io_setup
+ #define SYS_io_destroy __NR_io_destroy
+@@ -533,3 +536,6 @@
+ #define SYS_memfd_create __NR_memfd_create
+ #define SYS_bpf __NR_bpf
+ #define SYS_execveat __NR_execveat
++#define SYS_userfaultfd __NR_userfaultfd
++#define SYS_membarrier __NR_membarrier
++#define SYS_mlock2 __NR_mlock2
+--- a/arch/aarch64/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/aarch64/pthread_arch.h
++++ b/arch/aarch64/pthread_arch.h
+@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
+ 
+-#define CANCEL_REG_IP 33
++#define MC_PC pc
+--- a/arch/arm/atomic.h
++++ /dev/null
+@@ -1,261 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__("dmb ish");
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	int old;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%3\n"
+-		"	cmp %0,%1\n"
+-		"	bne 1f\n"
+-		"	strex %0,%2,%3\n"
+-		"	cmp %0, #0\n"
+-		"	bne 1b\n"
+-		"	mov %0, %1\n"
+-		"1:	dmb ish\n"
+-		: "=&r"(old)
+-		: "r"(t), "r"(s), "Q"(*p)
+-		: "memory", "cc" );
+-	return old;
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old, tmp;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%3\n"
+-		"	strex %1,%2,%3\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old, tmp;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%3\n"
+-		"	add %0,%0,%2\n"
+-		"	strex %1,%0,%3\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-	return old-v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%2\n"
+-		"	add %0,%0,#1\n"
+-		"	strex %1,%0,%2\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%2\n"
+-		"	sub %0,%0,#1\n"
+-		"	strex %1,%0,%2\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_and(volatile int *x, int v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%3\n"
+-		"	and %0,%0,%2\n"
+-		"	strex %1,%0,%3\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_or(volatile int *x, int v)
+-{
+-	int tmp, tmp2;
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"1:	ldrex %0,%3\n"
+-		"	orr %0,%0,%2\n"
+-		"	strex %1,%0,%3\n"
+-		"	cmp %1, #0\n"
+-		"	bne 1b\n"
+-		"	dmb ish\n"
+-		: "=&r"(tmp), "=&r"(tmp2)
+-		: "r"(v), "Q"(*x)
+-		: "memory", "cc" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__ __volatile__(
+-		"	dmb ish\n"
+-		"	str %1,%0\n"
+-		"	dmb ish\n"
+-		: "=m"(*p)
+-		: "r"(x)
+-		: "memory", "cc" );
+-}
+-
+-#else
+-
+-int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
+-#define __k_cas __a_cas
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__("bl __a_barrier"
+-		: : : "memory", "cc", "ip", "lr" );
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	int old;
+-	for (;;) {
+-		if (!__k_cas(t, s, p))
+-			return t;
+-		if ((old=*p) != t)
+-			return old;
+-	}
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (__k_cas(old, v, x));
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (__k_cas(old, old+v, x));
+-	return old;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	a_fetch_add(x, 1);
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	a_fetch_add(x, -1);
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	a_barrier();
+-	*p = x;
+-	a_barrier();
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (__k_cas(old, old&v, p));
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (__k_cas(old, old|v, p));
+-}
+-
+-#endif
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p, u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p, u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/arm/atomic_arch.h
+@@ -0,0 +1,76 @@
++__attribute__((__visibility__("hidden")))
++extern const void *__arm_atomics[3]; /* gettp, cas, barrier */
++
++#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
++ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
++
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ ("ldrex %0, %1" : "=r"(v) : "Q"(*p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ ("strex %0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
++	return !r;
++}
++
++#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__ ("dmb ish" : : : "memory");
++}
++
++#endif
++
++#define a_pre_llsc a_barrier
++#define a_post_llsc a_barrier
++
++#else
++
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	for (;;) {
++		register int r0 __asm__("r0") = t;
++		register int r1 __asm__("r1") = s;
++		register volatile int *r2 __asm__("r2") = p;
++		int old;
++		__asm__ __volatile__ (
++			"bl __a_cas"
++			: "+r"(r0) : "r"(r1), "r"(r2)
++			: "memory", "r3", "lr", "ip", "cc" );
++		if (!r0) return t;
++		if ((old=*p)!=t) return old;
++	}
++}
++
++#endif
++
++#ifndef a_barrier
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__("bl __a_barrier"
++		: : : "memory", "cc", "ip", "lr" );
++}
++#endif
++
++#define a_crash a_crash
++static inline void a_crash()
++{
++	__asm__ __volatile__(
++#ifndef __thumb__
++		".word 0xe7f000f0"
++#else
++		".short 0xdeff"
++#endif
++		: : : "memory");
++}
+--- a/arch/arm/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/arm/bits/ioctl.h
++++ /dev/null
+@@ -1,197 +0,0 @@
+-#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
+-#define _IOC_NONE  0U
+-#define _IOC_WRITE 1U
+-#define _IOC_READ  2U
+-
+-#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
+-#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
+-#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
+-#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
+-
+-#define TCGETS		0x5401
+-#define TCSETS		0x5402
+-#define TCSETSW		0x5403
+-#define TCSETSF		0x5404
+-#define TCGETA		0x5405
+-#define TCSETA		0x5406
+-#define TCSETAW		0x5407
+-#define TCSETAF		0x5408
+-#define TCSBRK		0x5409
+-#define TCXONC		0x540A
+-#define TCFLSH		0x540B
+-#define TIOCEXCL	0x540C
+-#define TIOCNXCL	0x540D
+-#define TIOCSCTTY	0x540E
+-#define TIOCGPGRP	0x540F
+-#define TIOCSPGRP	0x5410
+-#define TIOCOUTQ	0x5411
+-#define TIOCSTI		0x5412
+-#define TIOCGWINSZ	0x5413
+-#define TIOCSWINSZ	0x5414
+-#define TIOCMGET	0x5415
+-#define TIOCMBIS	0x5416
+-#define TIOCMBIC	0x5417
+-#define TIOCMSET	0x5418
+-#define TIOCGSOFTCAR	0x5419
+-#define TIOCSSOFTCAR	0x541A
+-#define FIONREAD	0x541B
+-#define TIOCINQ		FIONREAD
+-#define TIOCLINUX	0x541C
+-#define TIOCCONS	0x541D
+-#define TIOCGSERIAL	0x541E
+-#define TIOCSSERIAL	0x541F
+-#define TIOCPKT		0x5420
+-#define FIONBIO		0x5421
+-#define TIOCNOTTY	0x5422
+-#define TIOCSETD	0x5423
+-#define TIOCGETD	0x5424
+-#define TCSBRKP		0x5425
+-#define TIOCTTYGSTRUCT	0x5426
+-#define TIOCSBRK	0x5427
+-#define TIOCCBRK	0x5428
+-#define TIOCGSID	0x5429
+-#define TIOCGPTN	0x80045430
+-#define TIOCSPTLCK	0x40045431
+-#define TCGETX		0x5432
+-#define TCSETX		0x5433
+-#define TCSETXF		0x5434
+-#define TCSETXW		0x5435
+-
+-#define FIONCLEX	0x5450
+-#define FIOCLEX		0x5451
+-#define FIOASYNC	0x5452
+-#define TIOCSERCONFIG	0x5453
+-#define TIOCSERGWILD	0x5454
+-#define TIOCSERSWILD	0x5455
+-#define TIOCGLCKTRMIOS	0x5456
+-#define TIOCSLCKTRMIOS	0x5457
+-#define TIOCSERGSTRUCT	0x5458
+-#define TIOCSERGETLSR   0x5459
+-#define TIOCSERGETMULTI 0x545A
+-#define TIOCSERSETMULTI 0x545B
+-
+-#define TIOCMIWAIT	0x545C
+-#define TIOCGICOUNT	0x545D
+-#define TIOCGHAYESESP   0x545E
+-#define TIOCSHAYESESP   0x545F
+-#define FIOQSIZE	0x5460
+-
+-#define TIOCPKT_DATA		 0
+-#define TIOCPKT_FLUSHREAD	 1
+-#define TIOCPKT_FLUSHWRITE	 2
+-#define TIOCPKT_STOP		 4
+-#define TIOCPKT_START		 8
+-#define TIOCPKT_NOSTOP		16
+-#define TIOCPKT_DOSTOP		32
+-#define TIOCPKT_IOCTL		64
+-
+-#define TIOCSER_TEMT    0x01
+-
+-struct winsize {
+-	unsigned short ws_row;
+-	unsigned short ws_col;
+-	unsigned short ws_xpixel;
+-	unsigned short ws_ypixel;
+-};
+-
+-#define TIOCM_LE        0x001
+-#define TIOCM_DTR       0x002
+-#define TIOCM_RTS       0x004
+-#define TIOCM_ST        0x008
+-#define TIOCM_SR        0x010
+-#define TIOCM_CTS       0x020
+-#define TIOCM_CAR       0x040
+-#define TIOCM_RNG       0x080
+-#define TIOCM_DSR       0x100
+-#define TIOCM_CD        TIOCM_CAR
+-#define TIOCM_RI        TIOCM_RNG
+-#define TIOCM_OUT1      0x2000
+-#define TIOCM_OUT2      0x4000
+-#define TIOCM_LOOP      0x8000
+-#define TIOCM_MODEM_BITS TIOCM_OUT2
+-
+-#define N_TTY           0
+-#define N_SLIP          1
+-#define N_MOUSE         2
+-#define N_PPP           3
+-#define N_STRIP         4
+-#define N_AX25          5
+-#define N_X25           6
+-#define N_6PACK         7
+-#define N_MASC          8
+-#define N_R3964         9
+-#define N_PROFIBUS_FDL  10
+-#define N_IRDA          11
+-#define N_SMSBLOCK      12
+-#define N_HDLC          13
+-#define N_SYNC_PPP      14
+-#define N_HCI           15
+-
+-#define FIOSETOWN       0x8901
+-#define SIOCSPGRP       0x8902
+-#define FIOGETOWN       0x8903
+-#define SIOCGPGRP       0x8904
+-#define SIOCATMARK      0x8905
+-#define SIOCGSTAMP      0x8906
+-
+-#define SIOCADDRT       0x890B
+-#define SIOCDELRT       0x890C
+-#define SIOCRTMSG       0x890D
+-
+-#define SIOCGIFNAME     0x8910
+-#define SIOCSIFLINK     0x8911
+-#define SIOCGIFCONF     0x8912
+-#define SIOCGIFFLAGS    0x8913
+-#define SIOCSIFFLAGS    0x8914
+-#define SIOCGIFADDR     0x8915
+-#define SIOCSIFADDR     0x8916
+-#define SIOCGIFDSTADDR  0x8917
+-#define SIOCSIFDSTADDR  0x8918
+-#define SIOCGIFBRDADDR  0x8919
+-#define SIOCSIFBRDADDR  0x891a
+-#define SIOCGIFNETMASK  0x891b
+-#define SIOCSIFNETMASK  0x891c
+-#define SIOCGIFMETRIC   0x891d
+-#define SIOCSIFMETRIC   0x891e
+-#define SIOCGIFMEM      0x891f
+-#define SIOCSIFMEM      0x8920
+-#define SIOCGIFMTU      0x8921
+-#define SIOCSIFMTU      0x8922
+-#define SIOCSIFHWADDR   0x8924
+-#define SIOCGIFENCAP    0x8925
+-#define SIOCSIFENCAP    0x8926
+-#define SIOCGIFHWADDR   0x8927
+-#define SIOCGIFSLAVE    0x8929
+-#define SIOCSIFSLAVE    0x8930
+-#define SIOCADDMULTI    0x8931
+-#define SIOCDELMULTI    0x8932
+-#define SIOCGIFINDEX    0x8933
+-#define SIOGIFINDEX     SIOCGIFINDEX
+-#define SIOCSIFPFLAGS   0x8934
+-#define SIOCGIFPFLAGS   0x8935
+-#define SIOCDIFADDR     0x8936
+-#define SIOCSIFHWBROADCAST 0x8937
+-#define SIOCGIFCOUNT    0x8938
+-
+-#define SIOCGIFBR       0x8940
+-#define SIOCSIFBR       0x8941
+-
+-#define SIOCGIFTXQLEN   0x8942
+-#define SIOCSIFTXQLEN   0x8943
+-
+-#define SIOCDARP        0x8953
+-#define SIOCGARP        0x8954
+-#define SIOCSARP        0x8955
+-
+-#define SIOCDRARP       0x8960
+-#define SIOCGRARP       0x8961
+-#define SIOCSRARP       0x8962
+-
+-#define SIOCGIFMAP      0x8970
+-#define SIOCSIFMAP      0x8971
+-
+-#define SIOCADDDLCI     0x8980
+-#define SIOCDELDLCI     0x8981
+-
+-#define SIOCDEVPRIVATE		0x89F0
+-#define SIOCPROTOPRIVATE	0x89E0
+--- a/arch/arm/bits/ipc.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-struct ipc_perm
+-{
+-	key_t __ipc_perm_key;
+-	uid_t uid;
+-	gid_t gid;
+-	uid_t cuid;
+-	gid_t cgid;
+-	mode_t mode;
+-	int __ipc_perm_seq;
+-	long __pad1;
+-	long __pad2;
+-};
+-
+-#define IPC_64 0x100
+--- a/arch/arm/bits/mman.h
++++ b/arch/arm/bits/mman.h
+@@ -37,6 +37,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/arm/bits/msg.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct msqid_ds
+-{
+-	struct ipc_perm msg_perm;
+-	time_t msg_stime;
+-	int __unused1;
+-	time_t msg_rtime;
+-	int __unused2;
+-	time_t msg_ctime;
+-	int __unused3;
+-	unsigned long msg_cbytes;
+-	msgqnum_t msg_qnum;
+-	msglen_t msg_qbytes;
+-	pid_t msg_lspid;
+-	pid_t msg_lrpid;
+-	unsigned long __unused[2];
+-};
+--- a/arch/arm/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/arm/bits/shm.h
++++ /dev/null
+@@ -1,29 +0,0 @@
+-#define SHMLBA 4096
+-
+-struct shmid_ds
+-{
+-	struct ipc_perm shm_perm;
+-	size_t shm_segsz;
+-	time_t shm_atime;
+-	int __unused1;
+-	time_t shm_dtime;
+-	int __unused2;
+-	time_t shm_ctime;
+-	int __unused3;
+-	pid_t shm_cpid;
+-	pid_t shm_lpid;
+-	unsigned long shm_nattch;
+-	unsigned long __pad1;
+-	unsigned long __pad2;
+-};
+-
+-struct shminfo {
+-	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+-};
+-
+-struct shm_info {
+-	int __used_ids;
+-	unsigned long shm_tot, shm_rss, shm_swp;
+-	unsigned long __swap_attempts, __swap_successes;
+-};
+-
+--- a/arch/arm/bits/socket.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-struct msghdr
+-{
+-	void *msg_name;
+-	socklen_t msg_namelen;
+-	struct iovec *msg_iov;
+-	int msg_iovlen;
+-	void *msg_control;
+-	socklen_t msg_controllen;
+-	int msg_flags;
+-};
+-
+-struct cmsghdr
+-{
+-	socklen_t cmsg_len;
+-	int cmsg_level;
+-	int cmsg_type;
+-};
+--- a/arch/arm/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/arm/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/arm/bits/syscall.h
++++ b/arch/arm/bits/syscall.h
+@@ -341,6 +341,9 @@
+ #define __NR_memfd_create	385
+ #define __NR_bpf	386
+ #define __NR_execveat	387
++#define __NR_userfaultfd	388
++#define __NR_membarrier		389
++#define __NR_mlock2		390
+ 
+ #define __ARM_NR_breakpoint	0x0f0001
+ #define __ARM_NR_cacheflush	0x0f0002
+@@ -693,3 +696,6 @@
+ #define SYS_memfd_create	385
+ #define SYS_bpf	386
+ #define SYS_execveat	387
++#define SYS_userfaultfd	388
++#define SYS_membarrier		389
++#define SYS_mlock2		390
+--- a/arch/arm/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/arm/pthread_arch.h
++++ b/arch/arm/pthread_arch.h
+@@ -27,4 +27,4 @@ static inline pthread_t __pthread_self()
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+ 
+-#define CANCEL_REG_IP 18
++#define MC_PC arm_pc
+--- a/arch/arm/reloc.h
++++ b/arch/arm/reloc.h
+@@ -6,10 +6,10 @@
+ #define ENDIAN_SUFFIX ""
+ #endif
+ 
+-#if __SOFTFP__
+-#define FP_SUFFIX ""
+-#else
++#if __ARM_PCS_VFP
+ #define FP_SUFFIX "hf"
++#else
++#define FP_SUFFIX ""
+ #endif
+ 
+ #define LDSO_ARCH "arm" ENDIAN_SUFFIX FP_SUFFIX
+@@ -28,10 +28,5 @@
+ #define REL_TPOFF       R_ARM_TLS_TPOFF32
+ //#define REL_TLSDESC     R_ARM_TLS_DESC
+ 
+-#ifdef __thumb__
+ #define CRTJMP(pc,sp) __asm__ __volatile__( \
+ 	"mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
+-#else
+-#define CRTJMP(pc,sp) __asm__ __volatile__( \
+-	"mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
+-#endif
+--- a/arch/arm/src/__aeabi_atexit.c
++++ /dev/null
+@@ -1,6 +0,0 @@
+-int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
+-
+-int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
+-{
+-	return __cxa_atexit (func, obj, d);
+-}
+--- a/arch/arm/src/__aeabi_memclr.c
++++ /dev/null
+@@ -1,9 +0,0 @@
+-#include <string.h>
+-#include "libc.h"
+-
+-void __aeabi_memclr(void *dest, size_t n)
+-{
+-	memset(dest, 0, n);
+-}
+-weak_alias(__aeabi_memclr, __aeabi_memclr4);
+-weak_alias(__aeabi_memclr, __aeabi_memclr8);
+--- a/arch/arm/src/__aeabi_memcpy.c
++++ /dev/null
+@@ -1,9 +0,0 @@
+-#include <string.h>
+-#include "libc.h"
+-
+-void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
+-{
+-	memcpy(dest, src, n);
+-}
+-weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
+-weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
+--- a/arch/arm/src/__aeabi_memmove.c
++++ /dev/null
+@@ -1,9 +0,0 @@
+-#include <string.h>
+-#include "libc.h"
+-
+-void __aeabi_memmove(void *dest, const void *src, size_t n)
+-{
+-	memmove(dest, src, n);
+-}
+-weak_alias(__aeabi_memmove, __aeabi_memmove4);
+-weak_alias(__aeabi_memmove, __aeabi_memmove8);
+--- a/arch/arm/src/__aeabi_memset.c
++++ /dev/null
+@@ -1,9 +0,0 @@
+-#include <string.h>
+-#include "libc.h"
+-
+-void __aeabi_memset(void *dest, size_t n, int c)
+-{
+-	memset(dest, c, n);
+-}
+-weak_alias(__aeabi_memset, __aeabi_memset4);
+-weak_alias(__aeabi_memset, __aeabi_memset8);
+--- a/arch/arm/src/__set_thread_area.c
++++ /dev/null
+@@ -1,49 +0,0 @@
+-#include <stdint.h>
+-#include <elf.h>
+-#include "pthread_impl.h"
+-#include "libc.h"
+-
+-#define HWCAP_TLS (1 << 15)
+-
+-extern const unsigned char __attribute__((__visibility__("hidden")))
+-	__a_barrier_dummy[], __a_barrier_oldkuser[],
+-	__a_barrier_v6[], __a_barrier_v7[],
+-	__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
+-	__a_gettp_dummy[];
+-
+-#define __a_barrier_kuser 0xffff0fa0
+-#define __a_cas_kuser 0xffff0fc0
+-#define __a_gettp_kuser 0xffff0fe0
+-
+-extern uintptr_t __attribute__((__visibility__("hidden")))
+-	__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
+-
+-#define SET(op,ver) (__a_##op##_ptr = \
+-	(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
+-
+-int __set_thread_area(void *p)
+-{
+-#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
+-	if (__hwcap & HWCAP_TLS) {
+-		size_t *aux;
+-		SET(cas, v7);
+-		SET(barrier, v7);
+-		for (aux=libc.auxv; *aux; aux+=2) {
+-			if (*aux != AT_PLATFORM) continue;
+-			const char *s = (void *)aux[1];
+-			if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
+-			SET(cas, v6);
+-			SET(barrier, v6);
+-			break;
+-		}
+-	} else {
+-		int ver = *(int *)0xffff0ffc;
+-		SET(gettp, kuser);
+-		SET(cas, kuser);
+-		SET(barrier, kuser);
+-		if (ver < 2) a_crash();
+-		if (ver < 3) SET(barrier, oldkuser);
+-	}
+-#endif
+-	return __syscall(0xf0005, p);
+-}
+--- a/arch/arm/src/arm/atomics.s
++++ /dev/null
+@@ -1,116 +0,0 @@
+-.text
+-
+-.global __a_barrier
+-.hidden __a_barrier
+-.type __a_barrier,%function
+-__a_barrier:
+-	ldr ip,1f
+-	ldr ip,[pc,ip]
+-	add pc,pc,ip
+-1:	.word __a_barrier_ptr-1b
+-.global __a_barrier_dummy
+-.hidden __a_barrier_dummy
+-__a_barrier_dummy:
+-	tst lr,#1
+-	moveq pc,lr
+-	bx lr
+-.global __a_barrier_oldkuser
+-.hidden __a_barrier_oldkuser
+-__a_barrier_oldkuser:
+-	push {r0,r1,r2,r3,ip,lr}
+-	mov r1,r0
+-	mov r2,sp
+-	ldr ip,=0xffff0fc0
+-	mov lr,pc
+-	mov pc,ip
+-	pop {r0,r1,r2,r3,ip,lr}
+-	tst lr,#1
+-	moveq pc,lr
+-	bx lr
+-.global __a_barrier_v6
+-.hidden __a_barrier_v6
+-__a_barrier_v6:
+-	mcr p15,0,r0,c7,c10,5
+-	bx lr
+-.global __a_barrier_v7
+-.hidden __a_barrier_v7
+-__a_barrier_v7:
+-	.word 0xf57ff05b        /* dmb ish */
+-	bx lr
+-
+-.global __a_cas
+-.hidden __a_cas
+-.type __a_cas,%function
+-__a_cas:
+-	ldr ip,1f
+-	ldr ip,[pc,ip]
+-	add pc,pc,ip
+-1:	.word __a_cas_ptr-1b
+-.global __a_cas_dummy
+-.hidden __a_cas_dummy
+-__a_cas_dummy:
+-	mov r3,r0
+-	ldr r0,[r2]
+-	subs r0,r3,r0
+-	streq r1,[r2]
+-	tst lr,#1
+-	moveq pc,lr
+-	bx lr
+-.global __a_cas_v6
+-.hidden __a_cas_v6
+-__a_cas_v6:
+-	mov r3,r0
+-	mcr p15,0,r0,c7,c10,5
+-1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
+-	subs r0,r3,r0
+-	.word 0x01820f91        /* strexeq r0,r1,[r2] */
+-	teqeq r0,#1
+-	beq 1b
+-	mcr p15,0,r0,c7,c10,5
+-	bx lr
+-.global __a_cas_v7
+-.hidden __a_cas_v7
+-__a_cas_v7:
+-	mov r3,r0
+-	.word 0xf57ff05b        /* dmb ish */
+-1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
+-	subs r0,r3,r0
+-	.word 0x01820f91        /* strexeq r0,r1,[r2] */
+-	teqeq r0,#1
+-	beq 1b
+-	.word 0xf57ff05b        /* dmb ish */
+-	bx lr
+-
+-.global __aeabi_read_tp
+-.type __aeabi_read_tp,%function
+-__aeabi_read_tp:
+-
+-.global __a_gettp
+-.hidden __a_gettp
+-.type __a_gettp,%function
+-__a_gettp:
+-	ldr r0,1f
+-	ldr r0,[pc,r0]
+-	add pc,pc,r0
+-1:	.word __a_gettp_ptr-1b
+-.global __a_gettp_dummy
+-.hidden __a_gettp_dummy
+-__a_gettp_dummy:
+-	mrc p15,0,r0,c13,c0,3
+-	bx lr
+-
+-.data
+-.global __a_barrier_ptr
+-.hidden __a_barrier_ptr
+-__a_barrier_ptr:
+-	.word 0
+-
+-.global __a_cas_ptr
+-.hidden __a_cas_ptr
+-__a_cas_ptr:
+-	.word 0
+-
+-.global __a_gettp_ptr
+-.hidden __a_gettp_ptr
+-__a_gettp_ptr:
+-	.word 0
+--- a/arch/arm/src/find_exidx.c
++++ /dev/null
+@@ -1,42 +0,0 @@
+-#define _GNU_SOURCE
+-#include <link.h>
+-#include <stdint.h>
+-
+-struct find_exidx_data {
+-	uintptr_t pc, exidx_start;
+-	int exidx_len;
+-};
+-
+-static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
+-{
+-	struct find_exidx_data *data = ptr;
+-	const ElfW(Phdr) *phdr = info->dlpi_phdr;
+-	uintptr_t addr, exidx_start = 0;
+-	int i, match = 0, exidx_len = 0;
+-
+-	for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
+-		addr = info->dlpi_addr + phdr->p_vaddr;
+-		switch (phdr->p_type) {
+-		case PT_LOAD:
+-			match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
+-			break;
+-		case PT_ARM_EXIDX:
+-			exidx_start = addr;
+-			exidx_len = phdr->p_memsz;
+-			break;
+-		}
+-	}
+-	data->exidx_start = exidx_start;
+-	data->exidx_len = exidx_len;
+-	return match;
+-}
+-
+-uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
+-{
+-	struct find_exidx_data data;
+-	data.pc = pc;
+-	if (dl_iterate_phdr(find_exidx, &data) <= 0)
+-		return 0;
+-	*pcount = data.exidx_len / 8;
+-	return data.exidx_start;
+-}
+--- /dev/null
++++ b/arch/generic/bits/errno.h
+@@ -0,0 +1,134 @@
++#define EPERM            1
++#define ENOENT           2
++#define ESRCH            3
++#define EINTR            4
++#define EIO              5
++#define ENXIO            6
++#define E2BIG            7
++#define ENOEXEC          8
++#define EBADF            9
++#define ECHILD          10
++#define EAGAIN          11
++#define ENOMEM          12
++#define EACCES          13
++#define EFAULT          14
++#define ENOTBLK         15
++#define EBUSY           16
++#define EEXIST          17
++#define EXDEV           18
++#define ENODEV          19
++#define ENOTDIR         20
++#define EISDIR          21
++#define EINVAL          22
++#define ENFILE          23
++#define EMFILE          24
++#define ENOTTY          25
++#define ETXTBSY         26
++#define EFBIG           27
++#define ENOSPC          28
++#define ESPIPE          29
++#define EROFS           30
++#define EMLINK          31
++#define EPIPE           32
++#define EDOM            33
++#define ERANGE          34
++#define EDEADLK         35
++#define ENAMETOOLONG    36
++#define ENOLCK          37
++#define ENOSYS          38
++#define ENOTEMPTY       39
++#define ELOOP           40
++#define EWOULDBLOCK     EAGAIN
++#define ENOMSG          42
++#define EIDRM           43
++#define ECHRNG          44
++#define EL2NSYNC        45
++#define EL3HLT          46
++#define EL3RST          47
++#define ELNRNG          48
++#define EUNATCH         49
++#define ENOCSI          50
++#define EL2HLT          51
++#define EBADE           52
++#define EBADR           53
++#define EXFULL          54
++#define ENOANO          55
++#define EBADRQC         56
++#define EBADSLT         57
++#define EDEADLOCK       EDEADLK
++#define EBFONT          59
++#define ENOSTR          60
++#define ENODATA         61
++#define ETIME           62
++#define ENOSR           63
++#define ENONET          64
++#define ENOPKG          65
++#define EREMOTE         66
++#define ENOLINK         67
++#define EADV            68
++#define ESRMNT          69
++#define ECOMM           70
++#define EPROTO          71
++#define EMULTIHOP       72
++#define EDOTDOT         73
++#define EBADMSG         74
++#define EOVERFLOW       75
++#define ENOTUNIQ        76
++#define EBADFD          77
++#define EREMCHG         78
++#define ELIBACC         79
++#define ELIBBAD         80
++#define ELIBSCN         81
++#define ELIBMAX         82
++#define ELIBEXEC        83
++#define EILSEQ          84
++#define ERESTART        85
++#define ESTRPIPE        86
++#define EUSERS          87
++#define ENOTSOCK        88
++#define EDESTADDRREQ    89
++#define EMSGSIZE        90
++#define EPROTOTYPE      91
++#define ENOPROTOOPT     92
++#define EPROTONOSUPPORT 93
++#define ESOCKTNOSUPPORT 94
++#define EOPNOTSUPP      95
++#define ENOTSUP         EOPNOTSUPP
++#define EPFNOSUPPORT    96
++#define EAFNOSUPPORT    97
++#define EADDRINUSE      98
++#define EADDRNOTAVAIL   99
++#define ENETDOWN        100
++#define ENETUNREACH     101
++#define ENETRESET       102
++#define ECONNABORTED    103
++#define ECONNRESET      104
++#define ENOBUFS         105
++#define EISCONN         106
++#define ENOTCONN        107
++#define ESHUTDOWN       108
++#define ETOOMANYREFS    109
++#define ETIMEDOUT       110
++#define ECONNREFUSED    111
++#define EHOSTDOWN       112
++#define EHOSTUNREACH    113
++#define EALREADY        114
++#define EINPROGRESS     115
++#define ESTALE          116
++#define EUCLEAN         117
++#define ENOTNAM         118
++#define ENAVAIL         119
++#define EISNAM          120
++#define EREMOTEIO       121
++#define EDQUOT          122
++#define ENOMEDIUM       123
++#define EMEDIUMTYPE     124
++#define ECANCELED       125
++#define ENOKEY          126
++#define EKEYEXPIRED     127
++#define EKEYREVOKED     128
++#define EKEYREJECTED    129
++#define EOWNERDEAD      130
++#define ENOTRECOVERABLE 131
++#define ERFKILL         132
++#define EHWPOISON       133
+--- /dev/null
++++ b/arch/generic/bits/fcntl.h
+@@ -0,0 +1,40 @@
++#define O_CREAT        0100
++#define O_EXCL         0200
++#define O_NOCTTY       0400
++#define O_TRUNC       01000
++#define O_APPEND      02000
++#define O_NONBLOCK    04000
++#define O_DSYNC      010000
++#define O_SYNC     04010000
++#define O_RSYNC    04010000
++#define O_DIRECTORY 0200000
++#define O_NOFOLLOW  0400000
++#define O_CLOEXEC  02000000
++
++#define O_ASYNC      020000
++#define O_DIRECT     040000
++#define O_LARGEFILE 0100000
++#define O_NOATIME  01000000
++#define O_PATH    010000000
++#define O_TMPFILE 020200000
++#define O_NDELAY O_NONBLOCK
++
++#define F_DUPFD  0
++#define F_GETFD  1
++#define F_SETFD  2
++#define F_GETFL  3
++#define F_SETFL  4
++
++#define F_SETOWN 8
++#define F_GETOWN 9
++#define F_SETSIG 10
++#define F_GETSIG 11
++
++#define F_GETLK 12
++#define F_SETLK 13
++#define F_SETLKW 14
++
++#define F_SETOWN_EX 15
++#define F_GETOWN_EX 16
++
++#define F_GETOWNER_UIDS 17
+--- /dev/null
++++ b/arch/generic/bits/fenv.h
+@@ -0,0 +1,10 @@
++#define FE_ALL_EXCEPT 0
++#define FE_TONEAREST  0
++
++typedef unsigned long fexcept_t;
++
++typedef struct {
++	unsigned long __cw;
++} fenv_t;
++
++#define FE_DFL_ENV      ((const fenv_t *) -1)
+--- /dev/null
++++ b/arch/generic/bits/ioctl.h
+@@ -0,0 +1,197 @@
++#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
++#define _IOC_NONE  0U
++#define _IOC_WRITE 1U
++#define _IOC_READ  2U
++
++#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
++#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
++#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
++#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
++
++#define TCGETS		0x5401
++#define TCSETS		0x5402
++#define TCSETSW		0x5403
++#define TCSETSF		0x5404
++#define TCGETA		0x5405
++#define TCSETA		0x5406
++#define TCSETAW		0x5407
++#define TCSETAF		0x5408
++#define TCSBRK		0x5409
++#define TCXONC		0x540A
++#define TCFLSH		0x540B
++#define TIOCEXCL	0x540C
++#define TIOCNXCL	0x540D
++#define TIOCSCTTY	0x540E
++#define TIOCGPGRP	0x540F
++#define TIOCSPGRP	0x5410
++#define TIOCOUTQ	0x5411
++#define TIOCSTI		0x5412
++#define TIOCGWINSZ	0x5413
++#define TIOCSWINSZ	0x5414
++#define TIOCMGET	0x5415
++#define TIOCMBIS	0x5416
++#define TIOCMBIC	0x5417
++#define TIOCMSET	0x5418
++#define TIOCGSOFTCAR	0x5419
++#define TIOCSSOFTCAR	0x541A
++#define FIONREAD	0x541B
++#define TIOCINQ		FIONREAD
++#define TIOCLINUX	0x541C
++#define TIOCCONS	0x541D
++#define TIOCGSERIAL	0x541E
++#define TIOCSSERIAL	0x541F
++#define TIOCPKT		0x5420
++#define FIONBIO		0x5421
++#define TIOCNOTTY	0x5422
++#define TIOCSETD	0x5423
++#define TIOCGETD	0x5424
++#define TCSBRKP		0x5425
++#define TIOCTTYGSTRUCT	0x5426
++#define TIOCSBRK	0x5427
++#define TIOCCBRK	0x5428
++#define TIOCGSID	0x5429
++#define TIOCGPTN	0x80045430
++#define TIOCSPTLCK	0x40045431
++#define TCGETX		0x5432
++#define TCSETX		0x5433
++#define TCSETXF		0x5434
++#define TCSETXW		0x5435
++
++#define FIONCLEX	0x5450
++#define FIOCLEX		0x5451
++#define FIOASYNC	0x5452
++#define TIOCSERCONFIG	0x5453
++#define TIOCSERGWILD	0x5454
++#define TIOCSERSWILD	0x5455
++#define TIOCGLCKTRMIOS	0x5456
++#define TIOCSLCKTRMIOS	0x5457
++#define TIOCSERGSTRUCT	0x5458
++#define TIOCSERGETLSR   0x5459
++#define TIOCSERGETMULTI 0x545A
++#define TIOCSERSETMULTI 0x545B
++
++#define TIOCMIWAIT	0x545C
++#define TIOCGICOUNT	0x545D
++#define TIOCGHAYESESP   0x545E
++#define TIOCSHAYESESP   0x545F
++#define FIOQSIZE	0x5460
++
++#define TIOCPKT_DATA		 0
++#define TIOCPKT_FLUSHREAD	 1
++#define TIOCPKT_FLUSHWRITE	 2
++#define TIOCPKT_STOP		 4
++#define TIOCPKT_START		 8
++#define TIOCPKT_NOSTOP		16
++#define TIOCPKT_DOSTOP		32
++#define TIOCPKT_IOCTL		64
++
++#define TIOCSER_TEMT    0x01
++
++struct winsize {
++	unsigned short ws_row;
++	unsigned short ws_col;
++	unsigned short ws_xpixel;
++	unsigned short ws_ypixel;
++};
++
++#define TIOCM_LE        0x001
++#define TIOCM_DTR       0x002
++#define TIOCM_RTS       0x004
++#define TIOCM_ST        0x008
++#define TIOCM_SR        0x010
++#define TIOCM_CTS       0x020
++#define TIOCM_CAR       0x040
++#define TIOCM_RNG       0x080
++#define TIOCM_DSR       0x100
++#define TIOCM_CD        TIOCM_CAR
++#define TIOCM_RI        TIOCM_RNG
++#define TIOCM_OUT1      0x2000
++#define TIOCM_OUT2      0x4000
++#define TIOCM_LOOP      0x8000
++#define TIOCM_MODEM_BITS TIOCM_OUT2
++
++#define N_TTY           0
++#define N_SLIP          1
++#define N_MOUSE         2
++#define N_PPP           3
++#define N_STRIP         4
++#define N_AX25          5
++#define N_X25           6
++#define N_6PACK         7
++#define N_MASC          8
++#define N_R3964         9
++#define N_PROFIBUS_FDL  10
++#define N_IRDA          11
++#define N_SMSBLOCK      12
++#define N_HDLC          13
++#define N_SYNC_PPP      14
++#define N_HCI           15
++
++#define FIOSETOWN       0x8901
++#define SIOCSPGRP       0x8902
++#define FIOGETOWN       0x8903
++#define SIOCGPGRP       0x8904
++#define SIOCATMARK      0x8905
++#define SIOCGSTAMP      0x8906
++
++#define SIOCADDRT       0x890B
++#define SIOCDELRT       0x890C
++#define SIOCRTMSG       0x890D
++
++#define SIOCGIFNAME     0x8910
++#define SIOCSIFLINK     0x8911
++#define SIOCGIFCONF     0x8912
++#define SIOCGIFFLAGS    0x8913
++#define SIOCSIFFLAGS    0x8914
++#define SIOCGIFADDR     0x8915
++#define SIOCSIFADDR     0x8916
++#define SIOCGIFDSTADDR  0x8917
++#define SIOCSIFDSTADDR  0x8918
++#define SIOCGIFBRDADDR  0x8919
++#define SIOCSIFBRDADDR  0x891a
++#define SIOCGIFNETMASK  0x891b
++#define SIOCSIFNETMASK  0x891c
++#define SIOCGIFMETRIC   0x891d
++#define SIOCSIFMETRIC   0x891e
++#define SIOCGIFMEM      0x891f
++#define SIOCSIFMEM      0x8920
++#define SIOCGIFMTU      0x8921
++#define SIOCSIFMTU      0x8922
++#define SIOCSIFHWADDR   0x8924
++#define SIOCGIFENCAP    0x8925
++#define SIOCSIFENCAP    0x8926
++#define SIOCGIFHWADDR   0x8927
++#define SIOCGIFSLAVE    0x8929
++#define SIOCSIFSLAVE    0x8930
++#define SIOCADDMULTI    0x8931
++#define SIOCDELMULTI    0x8932
++#define SIOCGIFINDEX    0x8933
++#define SIOGIFINDEX     SIOCGIFINDEX
++#define SIOCSIFPFLAGS   0x8934
++#define SIOCGIFPFLAGS   0x8935
++#define SIOCDIFADDR     0x8936
++#define SIOCSIFHWBROADCAST 0x8937
++#define SIOCGIFCOUNT    0x8938
++
++#define SIOCGIFBR       0x8940
++#define SIOCSIFBR       0x8941
++
++#define SIOCGIFTXQLEN   0x8942
++#define SIOCSIFTXQLEN   0x8943
++
++#define SIOCDARP        0x8953
++#define SIOCGARP        0x8954
++#define SIOCSARP        0x8955
++
++#define SIOCDRARP       0x8960
++#define SIOCGRARP       0x8961
++#define SIOCSRARP       0x8962
++
++#define SIOCGIFMAP      0x8970
++#define SIOCSIFMAP      0x8971
++
++#define SIOCADDDLCI     0x8980
++#define SIOCDELDLCI     0x8981
++
++#define SIOCDEVPRIVATE		0x89F0
++#define SIOCPROTOPRIVATE	0x89E0
+--- /dev/null
++++ b/arch/generic/bits/ipc.h
+@@ -0,0 +1,14 @@
++struct ipc_perm
++{
++	key_t __ipc_perm_key;
++	uid_t uid;
++	gid_t gid;
++	uid_t cuid;
++	gid_t cgid;
++	mode_t mode;
++	int __ipc_perm_seq;
++	long __pad1;
++	long __pad2;
++};
++
++#define IPC_64 0x100
+--- /dev/null
++++ b/arch/generic/bits/msg.h
+@@ -0,0 +1,16 @@
++struct msqid_ds
++{
++	struct ipc_perm msg_perm;
++	time_t msg_stime;
++	int __unused1;
++	time_t msg_rtime;
++	int __unused2;
++	time_t msg_ctime;
++	int __unused3;
++	unsigned long msg_cbytes;
++	msgqnum_t msg_qnum;
++	msglen_t msg_qbytes;
++	pid_t msg_lspid;
++	pid_t msg_lrpid;
++	unsigned long __unused[2];
++};
+--- /dev/null
++++ b/arch/generic/bits/sem.h
+@@ -0,0 +1,16 @@
++struct semid_ds {
++	struct ipc_perm sem_perm;
++	time_t sem_otime;
++	time_t __unused1;
++	time_t sem_ctime;
++	time_t __unused2;
++#if __BYTE_ORDER == __LITTLE_ENDIAN
++	unsigned short sem_nsems;
++	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
++#else
++	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
++	unsigned short sem_nsems;
++#endif
++	time_t __unused3;
++	time_t __unused4;
++};
+--- /dev/null
++++ b/arch/generic/bits/shm.h
+@@ -0,0 +1,29 @@
++#define SHMLBA 4096
++
++struct shmid_ds
++{
++	struct ipc_perm shm_perm;
++	size_t shm_segsz;
++	time_t shm_atime;
++	int __unused1;
++	time_t shm_dtime;
++	int __unused2;
++	time_t shm_ctime;
++	int __unused3;
++	pid_t shm_cpid;
++	pid_t shm_lpid;
++	unsigned long shm_nattch;
++	unsigned long __pad1;
++	unsigned long __pad2;
++};
++
++struct shminfo {
++	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
++};
++
++struct shm_info {
++	int __used_ids;
++	unsigned long shm_tot, shm_rss, shm_swp;
++	unsigned long __swap_attempts, __swap_successes;
++};
++
+--- /dev/null
++++ b/arch/generic/bits/socket.h
+@@ -0,0 +1,17 @@
++struct msghdr
++{
++	void *msg_name;
++	socklen_t msg_namelen;
++	struct iovec *msg_iov;
++	int msg_iovlen;
++	void *msg_control;
++	socklen_t msg_controllen;
++	int msg_flags;
++};
++
++struct cmsghdr
++{
++	socklen_t cmsg_len;
++	int cmsg_level;
++	int cmsg_type;
++};
+--- /dev/null
++++ b/arch/generic/bits/statfs.h
+@@ -0,0 +1,7 @@
++struct statfs {
++	unsigned long f_type, f_bsize;
++	fsblkcnt_t f_blocks, f_bfree, f_bavail;
++	fsfilcnt_t f_files, f_ffree;
++	fsid_t f_fsid;
++	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
++};
+--- /dev/null
++++ b/arch/generic/bits/stdarg.h
+@@ -0,0 +1,4 @@
++#define va_start(v,l)   __builtin_va_start(v,l)
++#define va_end(v)       __builtin_va_end(v)
++#define va_arg(v,l)     __builtin_va_arg(v,l)
++#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- /dev/null
++++ b/arch/generic/bits/termios.h
+@@ -0,0 +1,160 @@
++struct termios
++{
++	tcflag_t c_iflag;
++	tcflag_t c_oflag;
++	tcflag_t c_cflag;
++	tcflag_t c_lflag;
++	cc_t c_line;
++	cc_t c_cc[NCCS];
++	speed_t __c_ispeed;
++	speed_t __c_ospeed;
++};
++
++#define VINTR     0
++#define VQUIT     1
++#define VERASE    2
++#define VKILL     3
++#define VEOF      4
++#define VTIME     5
++#define VMIN      6
++#define VSWTC     7
++#define VSTART    8
++#define VSTOP     9
++#define VSUSP    10
++#define VEOL     11
++#define VREPRINT 12
++#define VDISCARD 13
++#define VWERASE  14
++#define VLNEXT   15
++#define VEOL2    16
++
++#define IGNBRK  0000001
++#define BRKINT  0000002
++#define IGNPAR  0000004
++#define PARMRK  0000010
++#define INPCK   0000020
++#define ISTRIP  0000040
++#define INLCR   0000100
++#define IGNCR   0000200
++#define ICRNL   0000400
++#define IUCLC   0001000
++#define IXON    0002000
++#define IXANY   0004000
++#define IXOFF   0010000
++#define IMAXBEL 0020000
++#define IUTF8   0040000
++
++#define OPOST  0000001
++#define OLCUC  0000002
++#define ONLCR  0000004
++#define OCRNL  0000010
++#define ONOCR  0000020
++#define ONLRET 0000040
++#define OFILL  0000100
++#define OFDEL  0000200
++#define NLDLY  0000400
++#define NL0    0000000
++#define NL1    0000400
++#define CRDLY  0003000
++#define CR0    0000000
++#define CR1    0001000
++#define CR2    0002000
++#define CR3    0003000
++#define TABDLY 0014000
++#define TAB0   0000000
++#define TAB1   0004000
++#define TAB2   0010000
++#define TAB3   0014000
++#define BSDLY  0020000
++#define BS0    0000000
++#define BS1    0020000
++#define FFDLY  0100000
++#define FF0    0000000
++#define FF1    0100000
++
++#define VTDLY  0040000
++#define VT0    0000000
++#define VT1    0040000
++
++#define B0       0000000
++#define B50      0000001
++#define B75      0000002
++#define B110     0000003
++#define B134     0000004
++#define B150     0000005
++#define B200     0000006
++#define B300     0000007
++#define B600     0000010
++#define B1200    0000011
++#define B1800    0000012
++#define B2400    0000013
++#define B4800    0000014
++#define B9600    0000015
++#define B19200   0000016
++#define B38400   0000017
++
++#define B57600   0010001
++#define B115200  0010002
++#define B230400  0010003
++#define B460800  0010004
++#define B500000  0010005
++#define B576000  0010006
++#define B921600  0010007
++#define B1000000 0010010
++#define B1152000 0010011
++#define B1500000 0010012
++#define B2000000 0010013
++#define B2500000 0010014
++#define B3000000 0010015
++#define B3500000 0010016
++#define B4000000 0010017
++
++#define CBAUD    0010017
++
++#define CSIZE  0000060
++#define CS5    0000000
++#define CS6    0000020
++#define CS7    0000040
++#define CS8    0000060
++#define CSTOPB 0000100
++#define CREAD  0000200
++#define PARENB 0000400
++#define PARODD 0001000
++#define HUPCL  0002000
++#define CLOCAL 0004000
++
++#define ISIG   0000001
++#define ICANON 0000002
++#define ECHO   0000010
++#define ECHOE  0000020
++#define ECHOK  0000040
++#define ECHONL 0000100
++#define NOFLSH 0000200
++#define TOSTOP 0000400
++#define IEXTEN 0100000
++
++#define ECHOCTL 0001000
++#define ECHOPRT 0002000
++#define ECHOKE 0004000
++#define FLUSHO 0010000
++#define PENDIN 0040000
++
++#define TCOOFF 0
++#define TCOON  1
++#define TCIOFF 2
++#define TCION  3
++
++#define TCIFLUSH  0
++#define TCOFLUSH  1
++#define TCIOFLUSH 2
++
++#define TCSANOW   0
++#define TCSADRAIN 1
++#define TCSAFLUSH 2
++
++#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
++#define CBAUDEX 0010000
++#define CRTSCTS  020000000000
++#define EXTPROC 0200000
++#define XTABS  0014000
++#endif
+--- a/arch/i386/atomic.h
++++ /dev/null
+@@ -1,110 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	int r;
+-	__asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:"
+-		: "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
+-	return r;
+-}
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	long r;
+-	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
+-	return r;
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)"
+-		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)"
+-		: : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	__asm__( "lock ; orl %1, %0"
+-		: "=m"(*(long *)p) : "r"(v) : "memory" );
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	__asm__( "lock ; orl %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	__asm__( "lock ; andl %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-#define a_xchg a_swap
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" );
+-}
+-
+-static inline void a_spin()
+-{
+-	__asm__ __volatile__( "pause" : : : "memory" );
+-}
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__( "" : : : "memory" );
+-}
+-
+-static inline void a_crash()
+-{
+-	__asm__ __volatile__( "hlt" : : : "memory" );
+-}
+-
+-
+-#endif
+--- /dev/null
++++ b/arch/i386/atomic_arch.h
+@@ -0,0 +1,101 @@
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	__asm__ __volatile__ (
++		"lock ; cmpxchg %3, %1"
++		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
++	return t;
++}
++
++#define a_swap a_swap
++static inline int a_swap(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"xchg %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; xadd %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_and a_and
++static inline void a_and(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; and %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_or a_or
++static inline void a_or(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; or %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_inc a_inc
++static inline void a_inc(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; incl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_dec a_dec
++static inline void a_dec(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; decl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_store a_store
++static inline void a_store(volatile int *p, int x)
++{
++	__asm__ __volatile__(
++		"mov %1, %0 ; lock ; orl $0,(%%esp)"
++		: "=m"(*p) : "r"(x) : "memory" );
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__( "" : : : "memory" );
++}
++
++#define a_pause a_pause
++static inline void a_spin()
++{
++	__asm__ __volatile__( "pause" : : : "memory" );
++}
++
++#define a_crash a_crash
++static inline void a_crash()
++{
++	__asm__ __volatile__( "hlt" : : : "memory" );
++}
++
++#define a_ctz_64 a_ctz_64
++static inline int a_ctz_64(uint64_t x)
++{
++	int r;
++	__asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; add $32,%0\n1:"
++		: "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
++	return r;
++}
++
++#define a_ctz_l a_ctz_l
++static inline int a_ctz_l(unsigned long x)
++{
++	long r;
++	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
++	return r;
++}
+--- a/arch/i386/bits/alltypes.h.in
++++ b/arch/i386/bits/alltypes.h.in
+@@ -26,10 +26,12 @@ TYPEDEF long double float_t;
+ TYPEDEF long double double_t;
+ #endif
+ 
+-#ifdef __cplusplus
+-TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
+-#else
++#if !defined(__cplusplus)
+ TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t;
++#elif defined(__GNUC__)
++TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t;
++#else
++TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
+ #endif
+ 
+ TYPEDEF long time_t;
+--- a/arch/i386/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/i386/bits/fcntl.h
++++ /dev/null
+@@ -1,40 +0,0 @@
+-#define O_CREAT        0100
+-#define O_EXCL         0200
+-#define O_NOCTTY       0400
+-#define O_TRUNC       01000
+-#define O_APPEND      02000
+-#define O_NONBLOCK    04000
+-#define O_DSYNC      010000
+-#define O_SYNC     04010000
+-#define O_RSYNC    04010000
+-#define O_DIRECTORY 0200000
+-#define O_NOFOLLOW  0400000
+-#define O_CLOEXEC  02000000
+-
+-#define O_ASYNC      020000
+-#define O_DIRECT     040000
+-#define O_LARGEFILE 0100000
+-#define O_NOATIME  01000000
+-#define O_PATH    010000000
+-#define O_TMPFILE 020200000
+-#define O_NDELAY O_NONBLOCK
+-
+-#define F_DUPFD  0
+-#define F_GETFD  1
+-#define F_SETFD  2
+-#define F_GETFL  3
+-#define F_SETFL  4
+-
+-#define F_SETOWN 8
+-#define F_GETOWN 9
+-#define F_SETSIG 10
+-#define F_GETSIG 11
+-
+-#define F_GETLK 12
+-#define F_SETLK 13
+-#define F_SETLKW 14
+-
+-#define F_SETOWN_EX 15
+-#define F_GETOWN_EX 16
+-
+-#define F_GETOWNER_UIDS 17
+--- a/arch/i386/bits/ioctl.h
++++ /dev/null
+@@ -1,197 +0,0 @@
+-#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
+-#define _IOC_NONE  0U
+-#define _IOC_WRITE 1U
+-#define _IOC_READ  2U
+-
+-#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
+-#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
+-#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
+-#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
+-
+-#define TCGETS		0x5401
+-#define TCSETS		0x5402
+-#define TCSETSW		0x5403
+-#define TCSETSF		0x5404
+-#define TCGETA		0x5405
+-#define TCSETA		0x5406
+-#define TCSETAW		0x5407
+-#define TCSETAF		0x5408
+-#define TCSBRK		0x5409
+-#define TCXONC		0x540A
+-#define TCFLSH		0x540B
+-#define TIOCEXCL	0x540C
+-#define TIOCNXCL	0x540D
+-#define TIOCSCTTY	0x540E
+-#define TIOCGPGRP	0x540F
+-#define TIOCSPGRP	0x5410
+-#define TIOCOUTQ	0x5411
+-#define TIOCSTI		0x5412
+-#define TIOCGWINSZ	0x5413
+-#define TIOCSWINSZ	0x5414
+-#define TIOCMGET	0x5415
+-#define TIOCMBIS	0x5416
+-#define TIOCMBIC	0x5417
+-#define TIOCMSET	0x5418
+-#define TIOCGSOFTCAR	0x5419
+-#define TIOCSSOFTCAR	0x541A
+-#define FIONREAD	0x541B
+-#define TIOCINQ		FIONREAD
+-#define TIOCLINUX	0x541C
+-#define TIOCCONS	0x541D
+-#define TIOCGSERIAL	0x541E
+-#define TIOCSSERIAL	0x541F
+-#define TIOCPKT		0x5420
+-#define FIONBIO		0x5421
+-#define TIOCNOTTY	0x5422
+-#define TIOCSETD	0x5423
+-#define TIOCGETD	0x5424
+-#define TCSBRKP		0x5425
+-#define TIOCTTYGSTRUCT	0x5426
+-#define TIOCSBRK	0x5427
+-#define TIOCCBRK	0x5428
+-#define TIOCGSID	0x5429
+-#define TIOCGPTN	0x80045430
+-#define TIOCSPTLCK	0x40045431
+-#define TCGETX		0x5432
+-#define TCSETX		0x5433
+-#define TCSETXF		0x5434
+-#define TCSETXW		0x5435
+-
+-#define FIONCLEX	0x5450
+-#define FIOCLEX		0x5451
+-#define FIOASYNC	0x5452
+-#define TIOCSERCONFIG	0x5453
+-#define TIOCSERGWILD	0x5454
+-#define TIOCSERSWILD	0x5455
+-#define TIOCGLCKTRMIOS	0x5456
+-#define TIOCSLCKTRMIOS	0x5457
+-#define TIOCSERGSTRUCT	0x5458
+-#define TIOCSERGETLSR   0x5459
+-#define TIOCSERGETMULTI 0x545A
+-#define TIOCSERSETMULTI 0x545B
+-
+-#define TIOCMIWAIT	0x545C
+-#define TIOCGICOUNT	0x545D
+-#define TIOCGHAYESESP   0x545E
+-#define TIOCSHAYESESP   0x545F
+-#define FIOQSIZE	0x5460
+-
+-#define TIOCPKT_DATA		 0
+-#define TIOCPKT_FLUSHREAD	 1
+-#define TIOCPKT_FLUSHWRITE	 2
+-#define TIOCPKT_STOP		 4
+-#define TIOCPKT_START		 8
+-#define TIOCPKT_NOSTOP		16
+-#define TIOCPKT_DOSTOP		32
+-#define TIOCPKT_IOCTL		64
+-
+-#define TIOCSER_TEMT    0x01
+-
+-struct winsize {
+-	unsigned short ws_row;
+-	unsigned short ws_col;
+-	unsigned short ws_xpixel;
+-	unsigned short ws_ypixel;
+-};
+-
+-#define TIOCM_LE        0x001
+-#define TIOCM_DTR       0x002
+-#define TIOCM_RTS       0x004
+-#define TIOCM_ST        0x008
+-#define TIOCM_SR        0x010
+-#define TIOCM_CTS       0x020
+-#define TIOCM_CAR       0x040
+-#define TIOCM_RNG       0x080
+-#define TIOCM_DSR       0x100
+-#define TIOCM_CD        TIOCM_CAR
+-#define TIOCM_RI        TIOCM_RNG
+-#define TIOCM_OUT1      0x2000
+-#define TIOCM_OUT2      0x4000
+-#define TIOCM_LOOP      0x8000
+-#define TIOCM_MODEM_BITS TIOCM_OUT2
+-
+-#define N_TTY           0
+-#define N_SLIP          1
+-#define N_MOUSE         2
+-#define N_PPP           3
+-#define N_STRIP         4
+-#define N_AX25          5
+-#define N_X25           6
+-#define N_6PACK         7
+-#define N_MASC          8
+-#define N_R3964         9
+-#define N_PROFIBUS_FDL  10
+-#define N_IRDA          11
+-#define N_SMSBLOCK      12
+-#define N_HDLC          13
+-#define N_SYNC_PPP      14
+-#define N_HCI           15
+-
+-#define FIOSETOWN       0x8901
+-#define SIOCSPGRP       0x8902
+-#define FIOGETOWN       0x8903
+-#define SIOCGPGRP       0x8904
+-#define SIOCATMARK      0x8905
+-#define SIOCGSTAMP      0x8906
+-
+-#define SIOCADDRT       0x890B
+-#define SIOCDELRT       0x890C
+-#define SIOCRTMSG       0x890D
+-
+-#define SIOCGIFNAME     0x8910
+-#define SIOCSIFLINK     0x8911
+-#define SIOCGIFCONF     0x8912
+-#define SIOCGIFFLAGS    0x8913
+-#define SIOCSIFFLAGS    0x8914
+-#define SIOCGIFADDR     0x8915
+-#define SIOCSIFADDR     0x8916
+-#define SIOCGIFDSTADDR  0x8917
+-#define SIOCSIFDSTADDR  0x8918
+-#define SIOCGIFBRDADDR  0x8919
+-#define SIOCSIFBRDADDR  0x891a
+-#define SIOCGIFNETMASK  0x891b
+-#define SIOCSIFNETMASK  0x891c
+-#define SIOCGIFMETRIC   0x891d
+-#define SIOCSIFMETRIC   0x891e
+-#define SIOCGIFMEM      0x891f
+-#define SIOCSIFMEM      0x8920
+-#define SIOCGIFMTU      0x8921
+-#define SIOCSIFMTU      0x8922
+-#define SIOCSIFHWADDR   0x8924
+-#define SIOCGIFENCAP    0x8925
+-#define SIOCSIFENCAP    0x8926
+-#define SIOCGIFHWADDR   0x8927
+-#define SIOCGIFSLAVE    0x8929
+-#define SIOCSIFSLAVE    0x8930
+-#define SIOCADDMULTI    0x8931
+-#define SIOCDELMULTI    0x8932
+-#define SIOCGIFINDEX    0x8933
+-#define SIOGIFINDEX     SIOCGIFINDEX
+-#define SIOCSIFPFLAGS   0x8934
+-#define SIOCGIFPFLAGS   0x8935
+-#define SIOCDIFADDR     0x8936
+-#define SIOCSIFHWBROADCAST 0x8937
+-#define SIOCGIFCOUNT    0x8938
+-
+-#define SIOCGIFBR       0x8940
+-#define SIOCSIFBR       0x8941
+-
+-#define SIOCGIFTXQLEN   0x8942
+-#define SIOCSIFTXQLEN   0x8943
+-
+-#define SIOCDARP        0x8953
+-#define SIOCGARP        0x8954
+-#define SIOCSARP        0x8955
+-
+-#define SIOCDRARP       0x8960
+-#define SIOCGRARP       0x8961
+-#define SIOCSRARP       0x8962
+-
+-#define SIOCGIFMAP      0x8970
+-#define SIOCSIFMAP      0x8971
+-
+-#define SIOCADDDLCI     0x8980
+-#define SIOCDELDLCI     0x8981
+-
+-#define SIOCDEVPRIVATE		0x89F0
+-#define SIOCPROTOPRIVATE	0x89E0
+--- a/arch/i386/bits/ipc.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-struct ipc_perm
+-{
+-	key_t __ipc_perm_key;
+-	uid_t uid;
+-	gid_t gid;
+-	uid_t cuid;
+-	gid_t cgid;
+-	mode_t mode;
+-	int __ipc_perm_seq;
+-	long __pad1;
+-	long __pad2;
+-};
+-
+-#define IPC_64 0x100
+--- a/arch/i386/bits/mman.h
++++ b/arch/i386/bits/mman.h
+@@ -38,6 +38,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/i386/bits/msg.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct msqid_ds
+-{
+-	struct ipc_perm msg_perm;
+-	time_t msg_stime;
+-	int __unused1;
+-	time_t msg_rtime;
+-	int __unused2;
+-	time_t msg_ctime;
+-	int __unused3;
+-	unsigned long msg_cbytes;
+-	msgqnum_t msg_qnum;
+-	msglen_t msg_qbytes;
+-	pid_t msg_lspid;
+-	pid_t msg_lrpid;
+-	unsigned long __unused[2];
+-};
+--- a/arch/i386/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/i386/bits/shm.h
++++ /dev/null
+@@ -1,29 +0,0 @@
+-#define SHMLBA 4096
+-
+-struct shmid_ds
+-{
+-	struct ipc_perm shm_perm;
+-	size_t shm_segsz;
+-	time_t shm_atime;
+-	int __unused1;
+-	time_t shm_dtime;
+-	int __unused2;
+-	time_t shm_ctime;
+-	int __unused3;
+-	pid_t shm_cpid;
+-	pid_t shm_lpid;
+-	unsigned long shm_nattch;
+-	unsigned long __pad1;
+-	unsigned long __pad2;
+-};
+-
+-struct shminfo {
+-	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+-};
+-
+-struct shm_info {
+-	int __used_ids;
+-	unsigned long shm_tot, shm_rss, shm_swp;
+-	unsigned long __swap_attempts, __swap_successes;
+-};
+-
+--- a/arch/i386/bits/socket.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-struct msghdr
+-{
+-	void *msg_name;
+-	socklen_t msg_namelen;
+-	struct iovec *msg_iov;
+-	int msg_iovlen;
+-	void *msg_control;
+-	socklen_t msg_controllen;
+-	int msg_flags;
+-};
+-
+-struct cmsghdr
+-{
+-	socklen_t cmsg_len;
+-	int cmsg_level;
+-	int cmsg_type;
+-};
+--- a/arch/i386/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/i386/bits/syscall.h
++++ b/arch/i386/bits/syscall.h
+@@ -357,6 +357,24 @@
+ #define __NR_memfd_create	356
+ #define __NR_bpf		357
+ #define __NR_execveat		358
++#define __NR_socket		359
++#define __NR_socketpair		360
++#define __NR_bind		361
++#define __NR_connect		362
++#define __NR_listen		363
++#define __NR_accept4		364
++#define __NR_getsockopt		365
++#define __NR_setsockopt		366
++#define __NR_getsockname	367
++#define __NR_getpeername	368
++#define __NR_sendto		369
++#define __NR_sendmsg		370
++#define __NR_recvfrom		371
++#define __NR_recvmsg		372
++#define __NR_shutdown		373
++#define __NR_userfaultfd	374
++#define __NR_membarrier		375
++#define __NR_mlock2		376
+ 
+ 
+ /* Repeated with SYS_ prefix */
+@@ -720,3 +738,21 @@
+ #define SYS_memfd_create	356
+ #define SYS_bpf			357
+ #define SYS_execveat		358
++#define SYS_socket		359
++#define SYS_socketpair		360
++#define SYS_bind		361
++#define SYS_connect		362
++#define SYS_listen		363
++#define SYS_accept4		364
++#define SYS_getsockopt		365
++#define SYS_setsockopt		366
++#define SYS_getsockname		367
++#define SYS_getpeername		368
++#define SYS_sendto		369
++#define SYS_sendmsg		370
++#define SYS_recvfrom		371
++#define SYS_recvmsg		372
++#define SYS_shutdown		373
++#define SYS_userfaultfd		374
++#define SYS_membarrier		375
++#define SYS_mlock2		376
+--- a/arch/i386/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/i386/pthread_arch.h
++++ b/arch/i386/pthread_arch.h
+@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
+ 
+ #define TP_ADJ(p) (p)
+ 
+-#define CANCEL_REG_IP 14
++#define MC_PC gregs[REG_EIP]
+--- a/arch/i386/syscall_arch.h
++++ b/arch/i386/syscall_arch.h
+@@ -55,3 +55,5 @@ static inline long __syscall6(long n, lo
+ #define VDSO_USEFUL
+ #define VDSO_CGT_SYM "__vdso_clock_gettime"
+ #define VDSO_CGT_VER "LINUX_2.6"
++
++#define SYSCALL_USE_SOCKETCALL
+--- a/arch/microblaze/atomic.h
++++ /dev/null
+@@ -1,143 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	register int old, tmp;
+-	__asm__ __volatile__ (
+-		"	addi %0, r0, 0\n"
+-		"1:	lwx %0, %2, r0\n"
+-		"	rsubk %1, %0, %3\n"
+-		"	bnei %1, 1f\n"
+-		"	swx %4, %2, r0\n"
+-		"	addic %1, r0, 0\n"
+-		"	bnei %1, 1b\n"
+-		"1:	"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(p), "r"(t), "r"(s)
+-		: "cc", "memory" );
+-	return old;
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	register int old, tmp;
+-	__asm__ __volatile__ (
+-		"	addi %0, r0, 0\n"
+-		"1:	lwx %0, %2, r0\n"
+-		"	swx %3, %2, r0\n"
+-		"	addic %1, r0, 0\n"
+-		"	bnei %1, 1b\n"
+-		"1:	"
+-		: "=&r"(old), "=&r"(tmp)
+-		: "r"(x), "r"(v)
+-		: "cc", "memory" );
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	register int new, tmp;
+-	__asm__ __volatile__ (
+-		"	addi %0, r0, 0\n"
+-		"1:	lwx %0, %2, r0\n"
+-		"	addk %0, %0, %3\n"
+-		"	swx %0, %2, r0\n"
+-		"	addic %1, r0, 0\n"
+-		"	bnei %1, 1b\n"
+-		"1:	"
+-		: "=&r"(new), "=&r"(tmp)
+-		: "r"(x), "r"(v)
+-		: "cc", "memory" );
+-	return new-v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	a_fetch_add(x, 1);
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	a_fetch_add(x, -1);
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__ __volatile__ (
+-		"swi %1, %0"
+-		: "=m"(*p) : "r"(x) : "memory" );
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	a_cas(&(int){0}, 0, 0);
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old&v) != old);
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old|v) != old);
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p, u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p, u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/microblaze/atomic_arch.h
+@@ -0,0 +1,53 @@
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	register int old, tmp;
++	__asm__ __volatile__ (
++		"	addi %0, r0, 0\n"
++		"1:	lwx %0, %2, r0\n"
++		"	rsubk %1, %0, %3\n"
++		"	bnei %1, 1f\n"
++		"	swx %4, %2, r0\n"
++		"	addic %1, r0, 0\n"
++		"	bnei %1, 1b\n"
++		"1:	"
++		: "=&r"(old), "=&r"(tmp)
++		: "r"(p), "r"(t), "r"(s)
++		: "cc", "memory" );
++	return old;
++}
++
++#define a_swap a_swap
++static inline int a_swap(volatile int *x, int v)
++{
++	register int old, tmp;
++	__asm__ __volatile__ (
++		"	addi %0, r0, 0\n"
++		"1:	lwx %0, %2, r0\n"
++		"	swx %3, %2, r0\n"
++		"	addic %1, r0, 0\n"
++		"	bnei %1, 1b\n"
++		"1:	"
++		: "=&r"(old), "=&r"(tmp)
++		: "r"(x), "r"(v)
++		: "cc", "memory" );
++	return old;
++}
++
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *x, int v)
++{
++	register int new, tmp;
++	__asm__ __volatile__ (
++		"	addi %0, r0, 0\n"
++		"1:	lwx %0, %2, r0\n"
++		"	addk %0, %0, %3\n"
++		"	swx %0, %2, r0\n"
++		"	addic %1, r0, 0\n"
++		"	bnei %1, 1b\n"
++		"1:	"
++		: "=&r"(new), "=&r"(tmp)
++		: "r"(x), "r"(v)
++		: "cc", "memory" );
++	return new-v;
++}
+--- a/arch/microblaze/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/microblaze/bits/fcntl.h
++++ /dev/null
+@@ -1,40 +0,0 @@
+-#define O_CREAT        0100
+-#define O_EXCL         0200
+-#define O_NOCTTY       0400
+-#define O_TRUNC       01000
+-#define O_APPEND      02000
+-#define O_NONBLOCK    04000
+-#define O_DSYNC      010000
+-#define O_SYNC     04010000
+-#define O_RSYNC    04010000
+-#define O_DIRECTORY 0200000
+-#define O_NOFOLLOW  0400000
+-#define O_CLOEXEC  02000000
+-
+-#define O_ASYNC      020000
+-#define O_DIRECT     040000
+-#define O_LARGEFILE 0100000
+-#define O_NOATIME  01000000
+-#define O_PATH    010000000
+-#define O_TMPFILE 020200000
+-#define O_NDELAY O_NONBLOCK
+-
+-#define F_DUPFD  0
+-#define F_GETFD  1
+-#define F_SETFD  2
+-#define F_GETFL  3
+-#define F_SETFL  4
+-
+-#define F_SETOWN 8
+-#define F_GETOWN 9
+-#define F_SETSIG 10
+-#define F_GETSIG 11
+-
+-#define F_GETLK 12
+-#define F_SETLK 13
+-#define F_SETLKW 14
+-
+-#define F_SETOWN_EX 15
+-#define F_GETOWN_EX 16
+-
+-#define F_GETOWNER_UIDS 17
+--- a/arch/microblaze/bits/fenv.h
++++ /dev/null
+@@ -1,10 +0,0 @@
+-#define FE_ALL_EXCEPT 0
+-#define FE_TONEAREST  0
+-
+-typedef unsigned long fexcept_t;
+-
+-typedef struct {
+-	unsigned long __cw;
+-} fenv_t;
+-
+-#define FE_DFL_ENV      ((const fenv_t *) -1)
+--- a/arch/microblaze/bits/ioctl.h
++++ /dev/null
+@@ -1,197 +0,0 @@
+-#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
+-#define _IOC_NONE  0U
+-#define _IOC_WRITE 1U
+-#define _IOC_READ  2U
+-
+-#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
+-#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
+-#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
+-#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
+-
+-#define TCGETS		0x5401
+-#define TCSETS		0x5402
+-#define TCSETSW		0x5403
+-#define TCSETSF		0x5404
+-#define TCGETA		0x5405
+-#define TCSETA		0x5406
+-#define TCSETAW		0x5407
+-#define TCSETAF		0x5408
+-#define TCSBRK		0x5409
+-#define TCXONC		0x540A
+-#define TCFLSH		0x540B
+-#define TIOCEXCL	0x540C
+-#define TIOCNXCL	0x540D
+-#define TIOCSCTTY	0x540E
+-#define TIOCGPGRP	0x540F
+-#define TIOCSPGRP	0x5410
+-#define TIOCOUTQ	0x5411
+-#define TIOCSTI		0x5412
+-#define TIOCGWINSZ	0x5413
+-#define TIOCSWINSZ	0x5414
+-#define TIOCMGET	0x5415
+-#define TIOCMBIS	0x5416
+-#define TIOCMBIC	0x5417
+-#define TIOCMSET	0x5418
+-#define TIOCGSOFTCAR	0x5419
+-#define TIOCSSOFTCAR	0x541A
+-#define FIONREAD	0x541B
+-#define TIOCINQ		FIONREAD
+-#define TIOCLINUX	0x541C
+-#define TIOCCONS	0x541D
+-#define TIOCGSERIAL	0x541E
+-#define TIOCSSERIAL	0x541F
+-#define TIOCPKT		0x5420
+-#define FIONBIO		0x5421
+-#define TIOCNOTTY	0x5422
+-#define TIOCSETD	0x5423
+-#define TIOCGETD	0x5424
+-#define TCSBRKP		0x5425
+-#define TIOCTTYGSTRUCT	0x5426
+-#define TIOCSBRK	0x5427
+-#define TIOCCBRK	0x5428
+-#define TIOCGSID	0x5429
+-#define TIOCGPTN	0x80045430
+-#define TIOCSPTLCK	0x40045431
+-#define TCGETX		0x5432
+-#define TCSETX		0x5433
+-#define TCSETXF		0x5434
+-#define TCSETXW		0x5435
+-
+-#define FIONCLEX	0x5450
+-#define FIOCLEX		0x5451
+-#define FIOASYNC	0x5452
+-#define TIOCSERCONFIG	0x5453
+-#define TIOCSERGWILD	0x5454
+-#define TIOCSERSWILD	0x5455
+-#define TIOCGLCKTRMIOS	0x5456
+-#define TIOCSLCKTRMIOS	0x5457
+-#define TIOCSERGSTRUCT	0x5458
+-#define TIOCSERGETLSR   0x5459
+-#define TIOCSERGETMULTI 0x545A
+-#define TIOCSERSETMULTI 0x545B
+-
+-#define TIOCMIWAIT	0x545C
+-#define TIOCGICOUNT	0x545D
+-#define TIOCGHAYESESP   0x545E
+-#define TIOCSHAYESESP   0x545F
+-#define FIOQSIZE	0x5460
+-
+-#define TIOCPKT_DATA		 0
+-#define TIOCPKT_FLUSHREAD	 1
+-#define TIOCPKT_FLUSHWRITE	 2
+-#define TIOCPKT_STOP		 4
+-#define TIOCPKT_START		 8
+-#define TIOCPKT_NOSTOP		16
+-#define TIOCPKT_DOSTOP		32
+-#define TIOCPKT_IOCTL		64
+-
+-#define TIOCSER_TEMT    0x01
+-
+-struct winsize {
+-	unsigned short ws_row;
+-	unsigned short ws_col;
+-	unsigned short ws_xpixel;
+-	unsigned short ws_ypixel;
+-};
+-
+-#define TIOCM_LE        0x001
+-#define TIOCM_DTR       0x002
+-#define TIOCM_RTS       0x004
+-#define TIOCM_ST        0x008
+-#define TIOCM_SR        0x010
+-#define TIOCM_CTS       0x020
+-#define TIOCM_CAR       0x040
+-#define TIOCM_RNG       0x080
+-#define TIOCM_DSR       0x100
+-#define TIOCM_CD        TIOCM_CAR
+-#define TIOCM_RI        TIOCM_RNG
+-#define TIOCM_OUT1      0x2000
+-#define TIOCM_OUT2      0x4000
+-#define TIOCM_LOOP      0x8000
+-#define TIOCM_MODEM_BITS TIOCM_OUT2
+-
+-#define N_TTY           0
+-#define N_SLIP          1
+-#define N_MOUSE         2
+-#define N_PPP           3
+-#define N_STRIP         4
+-#define N_AX25          5
+-#define N_X25           6
+-#define N_6PACK         7
+-#define N_MASC          8
+-#define N_R3964         9
+-#define N_PROFIBUS_FDL  10
+-#define N_IRDA          11
+-#define N_SMSBLOCK      12
+-#define N_HDLC          13
+-#define N_SYNC_PPP      14
+-#define N_HCI           15
+-
+-#define FIOSETOWN       0x8901
+-#define SIOCSPGRP       0x8902
+-#define FIOGETOWN       0x8903
+-#define SIOCGPGRP       0x8904
+-#define SIOCATMARK      0x8905
+-#define SIOCGSTAMP      0x8906
+-
+-#define SIOCADDRT       0x890B
+-#define SIOCDELRT       0x890C
+-#define SIOCRTMSG       0x890D
+-
+-#define SIOCGIFNAME     0x8910
+-#define SIOCSIFLINK     0x8911
+-#define SIOCGIFCONF     0x8912
+-#define SIOCGIFFLAGS    0x8913
+-#define SIOCSIFFLAGS    0x8914
+-#define SIOCGIFADDR     0x8915
+-#define SIOCSIFADDR     0x8916
+-#define SIOCGIFDSTADDR  0x8917
+-#define SIOCSIFDSTADDR  0x8918
+-#define SIOCGIFBRDADDR  0x8919
+-#define SIOCSIFBRDADDR  0x891a
+-#define SIOCGIFNETMASK  0x891b
+-#define SIOCSIFNETMASK  0x891c
+-#define SIOCGIFMETRIC   0x891d
+-#define SIOCSIFMETRIC   0x891e
+-#define SIOCGIFMEM      0x891f
+-#define SIOCSIFMEM      0x8920
+-#define SIOCGIFMTU      0x8921
+-#define SIOCSIFMTU      0x8922
+-#define SIOCSIFHWADDR   0x8924
+-#define SIOCGIFENCAP    0x8925
+-#define SIOCSIFENCAP    0x8926
+-#define SIOCGIFHWADDR   0x8927
+-#define SIOCGIFSLAVE    0x8929
+-#define SIOCSIFSLAVE    0x8930
+-#define SIOCADDMULTI    0x8931
+-#define SIOCDELMULTI    0x8932
+-#define SIOCGIFINDEX    0x8933
+-#define SIOGIFINDEX     SIOCGIFINDEX
+-#define SIOCSIFPFLAGS   0x8934
+-#define SIOCGIFPFLAGS   0x8935
+-#define SIOCDIFADDR     0x8936
+-#define SIOCSIFHWBROADCAST 0x8937
+-#define SIOCGIFCOUNT    0x8938
+-
+-#define SIOCGIFBR       0x8940
+-#define SIOCSIFBR       0x8941
+-
+-#define SIOCGIFTXQLEN   0x8942
+-#define SIOCSIFTXQLEN   0x8943
+-
+-#define SIOCDARP        0x8953
+-#define SIOCGARP        0x8954
+-#define SIOCSARP        0x8955
+-
+-#define SIOCDRARP       0x8960
+-#define SIOCGRARP       0x8961
+-#define SIOCSRARP       0x8962
+-
+-#define SIOCGIFMAP      0x8970
+-#define SIOCSIFMAP      0x8971
+-
+-#define SIOCADDDLCI     0x8980
+-#define SIOCDELDLCI     0x8981
+-
+-#define SIOCDEVPRIVATE		0x89F0
+-#define SIOCPROTOPRIVATE	0x89E0
+--- a/arch/microblaze/bits/ipc.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-struct ipc_perm
+-{
+-	key_t __ipc_perm_key;
+-	uid_t uid;
+-	gid_t gid;
+-	uid_t cuid;
+-	gid_t cgid;
+-	mode_t mode;
+-	int __ipc_perm_seq;
+-	long __pad1;
+-	long __pad2;
+-};
+-
+-#define IPC_64 0x100
+--- a/arch/microblaze/bits/mman.h
++++ b/arch/microblaze/bits/mman.h
+@@ -37,6 +37,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/microblaze/bits/msg.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct msqid_ds
+-{
+-	struct ipc_perm msg_perm;
+-	time_t msg_stime;
+-	int __unused1;
+-	time_t msg_rtime;
+-	int __unused2;
+-	time_t msg_ctime;
+-	int __unused3;
+-	unsigned long msg_cbytes;
+-	msgqnum_t msg_qnum;
+-	msglen_t msg_qbytes;
+-	pid_t msg_lspid;
+-	pid_t msg_lrpid;
+-	unsigned long __unused[2];
+-};
+--- a/arch/microblaze/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/microblaze/bits/shm.h
++++ /dev/null
+@@ -1,29 +0,0 @@
+-#define SHMLBA 4096
+-
+-struct shmid_ds
+-{
+-	struct ipc_perm shm_perm;
+-	size_t shm_segsz;
+-	time_t shm_atime;
+-	int __unused1;
+-	time_t shm_dtime;
+-	int __unused2;
+-	time_t shm_ctime;
+-	int __unused3;
+-	pid_t shm_cpid;
+-	pid_t shm_lpid;
+-	unsigned long shm_nattch;
+-	unsigned long __pad1;
+-	unsigned long __pad2;
+-};
+-
+-struct shminfo {
+-	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+-};
+-
+-struct shm_info {
+-	int __used_ids;
+-	unsigned long shm_tot, shm_rss, shm_swp;
+-	unsigned long __swap_attempts, __swap_successes;
+-};
+-
+--- a/arch/microblaze/bits/socket.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-struct msghdr
+-{
+-	void *msg_name;
+-	socklen_t msg_namelen;
+-	struct iovec *msg_iov;
+-	int msg_iovlen;
+-	void *msg_control;
+-	socklen_t msg_controllen;
+-	int msg_flags;
+-};
+-
+-struct cmsghdr
+-{
+-	socklen_t cmsg_len;
+-	int cmsg_level;
+-	int cmsg_type;
+-};
+--- a/arch/microblaze/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/microblaze/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/microblaze/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/microblaze/pthread_arch.h
++++ b/arch/microblaze/pthread_arch.h
+@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
+ 
+ #define TP_ADJ(p) (p)
+ 
+-#define CANCEL_REG_IP 32
++#define MC_PC regs.pc
+--- a/arch/mips/atomic.h
++++ /dev/null
+@@ -1,205 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	int dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %2\n"
+-		"	bne %0, %3, 1f\n"
+-		"	addu %1, %4, $0\n"
+-		"	sc %1, %2\n"
+-		"	beq %1, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		"1:	\n"
+-		".set pop\n"
+-		: "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
+-        return t;
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old, dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %2\n"
+-		"	addu %1, %3, $0\n"
+-		"	sc %1, %2\n"
+-		"	beq %1, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
+-        return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old, dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %2\n"
+-		"	addu %1, %0, %3\n"
+-		"	sc %1, %2\n"
+-		"	beq %1, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
+-        return old;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	int dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %1\n"
+-		"	addu %0, %0, 1\n"
+-		"	sc %0, %1\n"
+-		"	beq %0, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(dummy), "+m"(*x) : : "memory" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	int dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %1\n"
+-		"	subu %0, %0, 1\n"
+-		"	sc %0, %1\n"
+-		"	beq %0, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(dummy), "+m"(*x) : : "memory" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"	sw %1, %0\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "+m"(*p) : "r"(x) : "memory" );
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	a_cas(&(int){0}, 0, 0);
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %1\n"
+-		"	and %0, %0, %2\n"
+-		"	sc %0, %1\n"
+-		"	beq %0, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int dummy;
+-	__asm__ __volatile__(
+-		".set push\n"
+-		".set mips2\n"
+-		".set noreorder\n"
+-		"	sync\n"
+-		"1:	ll %0, %1\n"
+-		"	or %0, %0, %2\n"
+-		"	sc %0, %1\n"
+-		"	beq %0, $0, 1b\n"
+-		"	nop\n"
+-		"	sync\n"
+-		".set pop\n"
+-		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p, u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p, u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/mips/atomic_arch.h
+@@ -0,0 +1,39 @@
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ (
++		".set push ; .set mips2\n\t"
++		"ll %0, %1"
++		"\n\t.set pop"
++		: "=r"(v) : "m"(*p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ (
++		".set push ; .set mips2\n\t"
++		"sc %0, %1"
++		"\n\t.set pop"
++		: "=r"(r), "=m"(*p) : "0"(v) : "memory");
++	return r;
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	/* mips2 sync, but using too many directives causes
++	 * gcc not to inline it, so encode with .long instead. */
++	__asm__ __volatile__ (".long 0xf" : : : "memory");
++#if 0
++	__asm__ __volatile__ (
++		".set push ; .set mips2 ; sync ; .set pop"
++		: : : "memory");
++#endif
++}
++
++#define a_pre_llsc a_barrier
++#define a_post_llsc a_barrier
+--- a/arch/mips/bits/ipc.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-struct ipc_perm
+-{
+-	key_t __ipc_perm_key;
+-	uid_t uid;
+-	gid_t gid;
+-	uid_t cuid;
+-	gid_t cgid;
+-	mode_t mode;
+-	int __ipc_perm_seq;
+-	long __pad1;
+-	long __pad2;
+-};
+-
+-#define IPC_64 0x100
+--- a/arch/mips/bits/mman.h
++++ b/arch/mips/bits/mman.h
+@@ -37,6 +37,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/mips/bits/signal.h
++++ b/arch/mips/bits/signal.h
+@@ -73,6 +73,15 @@ typedef struct __ucontext {
+ #define SIG_UNBLOCK   2
+ #define SIG_SETMASK   3
+ 
++#undef SI_ASYNCIO
++#undef SI_MESGQ
++#undef SI_TIMER
++#define SI_ASYNCIO (-2)
++#define SI_MESGQ (-4)
++#define SI_TIMER (-3)
++
++#define __SI_SWAP_ERRNO_CODE
++
+ #endif
+ 
+ #define SIGHUP    1
+--- a/arch/mips/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/mips/bits/syscall.h
++++ b/arch/mips/bits/syscall.h
+@@ -354,6 +354,9 @@
+ #define __NR_memfd_create            4354
+ #define __NR_bpf                     4355
+ #define __NR_execveat                4356
++#define __NR_userfaultfd             4357
++#define __NR_membarrier              4358
++#define __NR_mlock2                  4359
+ 
+ 
+ /* Repeated with SYS_ prefix */
+@@ -713,3 +716,6 @@
+ #define SYS_memfd_create            4354
+ #define SYS_bpf                     4355
+ #define SYS_execveat                4356
++#define SYS_userfaultfd             4357
++#define SYS_membarrier              4358
++#define SYS_mlock2                  4359
+--- a/arch/mips/crt_arch.h
++++ b/arch/mips/crt_arch.h
+@@ -4,13 +4,16 @@ __asm__(
+ ".text \n"
+ ".global _" START "\n"
+ ".global " START "\n"
++".global " START "_data\n"
+ ".type   _" START ", @function\n"
+ ".type   " START ", @function\n"
++".type   " START "_data, @function\n"
+ "_" START ":\n"
+ "" START ":\n"
+ "	bal 1f \n"
+ "	 move $fp, $0 \n"
+-"2:	.gpword 2b \n"
++"" START "_data: \n"
++"	.gpword " START "_data \n"
+ "	.gpword " START "_c \n"
+ ".weak _DYNAMIC \n"
+ ".hidden _DYNAMIC \n"
+--- a/arch/mips/pthread_arch.h
++++ b/arch/mips/pthread_arch.h
+@@ -16,4 +16,4 @@ static inline struct pthread *__pthread_
+ 
+ #define DTP_OFFSET 0x8000
+ 
+-#define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b)
++#define MC_PC pc
+--- a/arch/mips/syscall_arch.h
++++ b/arch/mips/syscall_arch.h
+@@ -3,9 +3,7 @@
+ ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+ #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
+ 
+-#ifdef SHARED
+ __attribute__((visibility("hidden")))
+-#endif
+ long (__syscall)(long, ...);
+ 
+ #define SYSCALL_RLIM_INFINITY (-1UL/2)
+@@ -163,3 +161,7 @@ static inline long __syscall6(long n, lo
+ 	if (n == SYS_fstatat) __stat_fix(c);
+ 	return r2;
+ }
++
++#define VDSO_USEFUL
++#define VDSO_CGT_SYM "__vdso_clock_gettime"
++#define VDSO_CGT_VER "LINUX_2.6"
+--- a/arch/or1k/atomic.h
++++ /dev/null
+@@ -1,120 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	__asm__("1:	l.lwa %0, %1\n"
+-		"	l.sfeq %0, %2\n"
+-		"	l.bnf 1f\n"
+-		"	 l.nop\n"
+-		"	l.swa %1, %3\n"
+-		"	l.bnf 1b\n"
+-		"	 l.nop\n"
+-		"1:	\n"
+-		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
+-        return t;
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (a_cas(x, old, v) != old);
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (a_cas(x, old, old+v) != old);
+-	return old;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	a_fetch_add(x, 1);
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	a_fetch_add(x, -1);
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	a_swap(p, x);
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	a_cas(&(int){0}, 0, 0);
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old&v) != old);
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old|v) != old);
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p, u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p, u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/or1k/atomic_arch.h
+@@ -0,0 +1,14 @@
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	__asm__("1:	l.lwa %0, %1\n"
++		"	l.sfeq %0, %2\n"
++		"	l.bnf 1f\n"
++		"	 l.nop\n"
++		"	l.swa %1, %3\n"
++		"	l.bnf 1b\n"
++		"	 l.nop\n"
++		"1:	\n"
++		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
++        return t;
++}
+--- a/arch/or1k/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/or1k/bits/fcntl.h
++++ /dev/null
+@@ -1,40 +0,0 @@
+-#define O_CREAT        0100
+-#define O_EXCL         0200
+-#define O_NOCTTY       0400
+-#define O_TRUNC       01000
+-#define O_APPEND      02000
+-#define O_NONBLOCK    04000
+-#define O_DSYNC      010000
+-#define O_SYNC     04010000
+-#define O_RSYNC    04010000
+-#define O_DIRECTORY 0200000
+-#define O_NOFOLLOW  0400000
+-#define O_CLOEXEC  02000000
+-
+-#define O_ASYNC      020000
+-#define O_DIRECT     040000
+-#define O_LARGEFILE 0100000
+-#define O_NOATIME  01000000
+-#define O_PATH    010000000
+-#define O_TMPFILE 020200000
+-#define O_NDELAY O_NONBLOCK
+-
+-#define F_DUPFD  0
+-#define F_GETFD  1
+-#define F_SETFD  2
+-#define F_GETFL  3
+-#define F_SETFL  4
+-
+-#define F_SETOWN 8
+-#define F_GETOWN 9
+-#define F_SETSIG 10
+-#define F_GETSIG 11
+-
+-#define F_GETLK 12
+-#define F_SETLK 13
+-#define F_SETLKW 14
+-
+-#define F_SETOWN_EX 15
+-#define F_GETOWN_EX 16
+-
+-#define F_GETOWNER_UIDS 17
+--- a/arch/or1k/bits/fenv.h
++++ /dev/null
+@@ -1,10 +0,0 @@
+-#define FE_ALL_EXCEPT 0
+-#define FE_TONEAREST  0
+-
+-typedef unsigned long fexcept_t;
+-
+-typedef struct {
+-	unsigned long __cw;
+-} fenv_t;
+-
+-#define FE_DFL_ENV      ((const fenv_t *) -1)
+--- a/arch/or1k/bits/ioctl.h
++++ /dev/null
+@@ -1,197 +0,0 @@
+-#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
+-#define _IOC_NONE  0U
+-#define _IOC_WRITE 1U
+-#define _IOC_READ  2U
+-
+-#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
+-#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
+-#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
+-#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
+-
+-#define TCGETS		0x5401
+-#define TCSETS		0x5402
+-#define TCSETSW		0x5403
+-#define TCSETSF		0x5404
+-#define TCGETA		0x5405
+-#define TCSETA		0x5406
+-#define TCSETAW		0x5407
+-#define TCSETAF		0x5408
+-#define TCSBRK		0x5409
+-#define TCXONC		0x540A
+-#define TCFLSH		0x540B
+-#define TIOCEXCL	0x540C
+-#define TIOCNXCL	0x540D
+-#define TIOCSCTTY	0x540E
+-#define TIOCGPGRP	0x540F
+-#define TIOCSPGRP	0x5410
+-#define TIOCOUTQ	0x5411
+-#define TIOCSTI		0x5412
+-#define TIOCGWINSZ	0x5413
+-#define TIOCSWINSZ	0x5414
+-#define TIOCMGET	0x5415
+-#define TIOCMBIS	0x5416
+-#define TIOCMBIC	0x5417
+-#define TIOCMSET	0x5418
+-#define TIOCGSOFTCAR	0x5419
+-#define TIOCSSOFTCAR	0x541A
+-#define FIONREAD	0x541B
+-#define TIOCINQ		FIONREAD
+-#define TIOCLINUX	0x541C
+-#define TIOCCONS	0x541D
+-#define TIOCGSERIAL	0x541E
+-#define TIOCSSERIAL	0x541F
+-#define TIOCPKT		0x5420
+-#define FIONBIO		0x5421
+-#define TIOCNOTTY	0x5422
+-#define TIOCSETD	0x5423
+-#define TIOCGETD	0x5424
+-#define TCSBRKP		0x5425
+-#define TIOCTTYGSTRUCT	0x5426
+-#define TIOCSBRK	0x5427
+-#define TIOCCBRK	0x5428
+-#define TIOCGSID	0x5429
+-#define TIOCGPTN	0x80045430
+-#define TIOCSPTLCK	0x40045431
+-#define TCGETX		0x5432
+-#define TCSETX		0x5433
+-#define TCSETXF		0x5434
+-#define TCSETXW		0x5435
+-
+-#define FIONCLEX	0x5450
+-#define FIOCLEX		0x5451
+-#define FIOASYNC	0x5452
+-#define TIOCSERCONFIG	0x5453
+-#define TIOCSERGWILD	0x5454
+-#define TIOCSERSWILD	0x5455
+-#define TIOCGLCKTRMIOS	0x5456
+-#define TIOCSLCKTRMIOS	0x5457
+-#define TIOCSERGSTRUCT	0x5458
+-#define TIOCSERGETLSR   0x5459
+-#define TIOCSERGETMULTI 0x545A
+-#define TIOCSERSETMULTI 0x545B
+-
+-#define TIOCMIWAIT	0x545C
+-#define TIOCGICOUNT	0x545D
+-#define TIOCGHAYESESP   0x545E
+-#define TIOCSHAYESESP   0x545F
+-#define FIOQSIZE	0x5460
+-
+-#define TIOCPKT_DATA		 0
+-#define TIOCPKT_FLUSHREAD	 1
+-#define TIOCPKT_FLUSHWRITE	 2
+-#define TIOCPKT_STOP		 4
+-#define TIOCPKT_START		 8
+-#define TIOCPKT_NOSTOP		16
+-#define TIOCPKT_DOSTOP		32
+-#define TIOCPKT_IOCTL		64
+-
+-#define TIOCSER_TEMT    0x01
+-
+-struct winsize {
+-	unsigned short ws_row;
+-	unsigned short ws_col;
+-	unsigned short ws_xpixel;
+-	unsigned short ws_ypixel;
+-};
+-
+-#define TIOCM_LE        0x001
+-#define TIOCM_DTR       0x002
+-#define TIOCM_RTS       0x004
+-#define TIOCM_ST        0x008
+-#define TIOCM_SR        0x010
+-#define TIOCM_CTS       0x020
+-#define TIOCM_CAR       0x040
+-#define TIOCM_RNG       0x080
+-#define TIOCM_DSR       0x100
+-#define TIOCM_CD        TIOCM_CAR
+-#define TIOCM_RI        TIOCM_RNG
+-#define TIOCM_OUT1      0x2000
+-#define TIOCM_OUT2      0x4000
+-#define TIOCM_LOOP      0x8000
+-#define TIOCM_MODEM_BITS TIOCM_OUT2
+-
+-#define N_TTY           0
+-#define N_SLIP          1
+-#define N_MOUSE         2
+-#define N_PPP           3
+-#define N_STRIP         4
+-#define N_AX25          5
+-#define N_X25           6
+-#define N_6PACK         7
+-#define N_MASC          8
+-#define N_R3964         9
+-#define N_PROFIBUS_FDL  10
+-#define N_IRDA          11
+-#define N_SMSBLOCK      12
+-#define N_HDLC          13
+-#define N_SYNC_PPP      14
+-#define N_HCI           15
+-
+-#define FIOSETOWN       0x8901
+-#define SIOCSPGRP       0x8902
+-#define FIOGETOWN       0x8903
+-#define SIOCGPGRP       0x8904
+-#define SIOCATMARK      0x8905
+-#define SIOCGSTAMP      0x8906
+-
+-#define SIOCADDRT       0x890B
+-#define SIOCDELRT       0x890C
+-#define SIOCRTMSG       0x890D
+-
+-#define SIOCGIFNAME     0x8910
+-#define SIOCSIFLINK     0x8911
+-#define SIOCGIFCONF     0x8912
+-#define SIOCGIFFLAGS    0x8913
+-#define SIOCSIFFLAGS    0x8914
+-#define SIOCGIFADDR     0x8915
+-#define SIOCSIFADDR     0x8916
+-#define SIOCGIFDSTADDR  0x8917
+-#define SIOCSIFDSTADDR  0x8918
+-#define SIOCGIFBRDADDR  0x8919
+-#define SIOCSIFBRDADDR  0x891a
+-#define SIOCGIFNETMASK  0x891b
+-#define SIOCSIFNETMASK  0x891c
+-#define SIOCGIFMETRIC   0x891d
+-#define SIOCSIFMETRIC   0x891e
+-#define SIOCGIFMEM      0x891f
+-#define SIOCSIFMEM      0x8920
+-#define SIOCGIFMTU      0x8921
+-#define SIOCSIFMTU      0x8922
+-#define SIOCSIFHWADDR   0x8924
+-#define SIOCGIFENCAP    0x8925
+-#define SIOCSIFENCAP    0x8926
+-#define SIOCGIFHWADDR   0x8927
+-#define SIOCGIFSLAVE    0x8929
+-#define SIOCSIFSLAVE    0x8930
+-#define SIOCADDMULTI    0x8931
+-#define SIOCDELMULTI    0x8932
+-#define SIOCGIFINDEX    0x8933
+-#define SIOGIFINDEX     SIOCGIFINDEX
+-#define SIOCSIFPFLAGS   0x8934
+-#define SIOCGIFPFLAGS   0x8935
+-#define SIOCDIFADDR     0x8936
+-#define SIOCSIFHWBROADCAST 0x8937
+-#define SIOCGIFCOUNT    0x8938
+-
+-#define SIOCGIFBR       0x8940
+-#define SIOCSIFBR       0x8941
+-
+-#define SIOCGIFTXQLEN   0x8942
+-#define SIOCSIFTXQLEN   0x8943
+-
+-#define SIOCDARP        0x8953
+-#define SIOCGARP        0x8954
+-#define SIOCSARP        0x8955
+-
+-#define SIOCDRARP       0x8960
+-#define SIOCGRARP       0x8961
+-#define SIOCSRARP       0x8962
+-
+-#define SIOCGIFMAP      0x8970
+-#define SIOCSIFMAP      0x8971
+-
+-#define SIOCADDDLCI     0x8980
+-#define SIOCDELDLCI     0x8981
+-
+-#define SIOCDEVPRIVATE		0x89F0
+-#define SIOCPROTOPRIVATE	0x89E0
+--- a/arch/or1k/bits/mman.h
++++ b/arch/or1k/bits/mman.h
+@@ -37,6 +37,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/or1k/bits/shm.h
++++ /dev/null
+@@ -1,27 +0,0 @@
+-#define SHMLBA 4096
+-
+-struct shmid_ds {
+-	struct ipc_perm shm_perm;
+-	size_t shm_segsz;
+-	time_t shm_atime;
+-	int __unused1;
+-	time_t shm_dtime;
+-	int __unused2;
+-	time_t shm_ctime;
+-	int __unused3;
+-	pid_t shm_cpid;
+-	pid_t shm_lpid;
+-	unsigned long shm_nattch;
+-	unsigned long __pad1;
+-	unsigned long __pad2;
+-};
+-
+-struct shminfo {
+-	unsigned long shmmax, shmmin, shmmni, shmseg, shmall, __unused[4];
+-};
+-
+-struct shm_info {
+-	int __used_ids;
+-	unsigned long shm_tot, shm_rss, shm_swp;
+-	unsigned long __swap_attempts, __swap_successes;
+-};
+--- a/arch/or1k/bits/socket.h
++++ /dev/null
+@@ -1,15 +0,0 @@
+-struct msghdr {
+-	void *msg_name;
+-	socklen_t msg_namelen;
+-	struct iovec *msg_iov;
+-	int msg_iovlen;
+-	void *msg_control;
+-	socklen_t msg_controllen;
+-	int msg_flags;
+-};
+-
+-struct cmsghdr {
+-	socklen_t cmsg_len;
+-	int cmsg_level;
+-	int cmsg_type;
+-};
+--- a/arch/or1k/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/or1k/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/or1k/bits/syscall.h
++++ b/arch/or1k/bits/syscall.h
+@@ -265,6 +265,9 @@
+ #define __NR_memfd_create 279
+ #define __NR_bpf 280
+ #define __NR_execveat 281
++#define __NR_userfaultfd 282
++#define __NR_membarrier 283
++#define __NR_mlock2 284
+ 
+ #define SYS_io_setup __NR_io_setup
+ #define SYS_io_destroy __NR_io_destroy
+@@ -533,3 +536,6 @@
+ #define SYS_memfd_create __NR_memfd_create
+ #define SYS_bpf __NR_bpf
+ #define SYS_execveat __NR_execveat
++#define SYS_userfaultfd __NR_userfaultfd
++#define SYS_membarrier __NR_membarrier
++#define SYS_mlock2 __NR_mlock2
+--- a/arch/or1k/bits/termios.h
++++ /dev/null
+@@ -1,159 +0,0 @@
+-struct termios {
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/or1k/pthread_arch.h
++++ b/arch/or1k/pthread_arch.h
+@@ -14,5 +14,4 @@ static inline struct pthread *__pthread_
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
+ 
+-/* word-offset to 'pc' in mcontext_t */
+-#define CANCEL_REG_IP 32
++#define MC_PC regs.pc
+--- a/arch/powerpc/atomic.h
++++ /dev/null
+@@ -1,126 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-#include <endian.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	__asm__("\n"
+-		"	sync\n"
+-		"1:	lwarx %0, 0, %4\n"
+-		"	cmpw %0, %2\n"
+-		"	bne 1f\n"
+-		"	stwcx. %3, 0, %4\n"
+-		"	bne- 1b\n"
+-		"	isync\n"
+-		"1:	\n"
+-		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" );
+-        return t;
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (a_cas(x, old, v) != old);
+-	return old;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	int old;
+-	do old = *x;
+-	while (a_cas(x, old, old+v) != old);
+-	return old;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	a_fetch_add(x, 1);
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	a_fetch_add(x, -1);
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__ __volatile__ ("\n"
+-		"	sync\n"
+-		"	stw %1, %0\n"
+-		"	isync\n"
+-		: "=m"(*p) : "r"(x) : "memory" );
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	a_cas(&(int){0}, 0, 0);
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old&v) != old);
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	int old;
+-	do old = *p;
+-	while (a_cas(p, old, old|v) != old);
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p, u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p, u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/powerpc/atomic_arch.h
+@@ -0,0 +1,39 @@
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ ("lwarx %0, 0, %2" : "=r"(v) : "m"(*p), "r"(p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ (
++		"stwcx. %2, 0, %3 ; mfcr %0"
++		: "=r"(r), "=m"(*p) : "r"(v), "r"(p) : "memory", "cc");
++	return r & 0x20000000; /* "bit 2" of "cr0" (backwards bit order) */
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__ ("sync" : : : "memory");
++}
++
++#define a_pre_llsc a_barrier
++
++#define a_post_llsc a_post_llsc
++static inline void a_post_llsc()
++{
++	__asm__ __volatile__ ("isync" : : : "memory");
++}
++
++#define a_store a_store
++static inline void a_store(volatile int *p, int v)
++{
++	a_pre_llsc();
++	*p = v;
++	a_post_llsc();
++}
+--- a/arch/powerpc/bits/mman.h
++++ b/arch/powerpc/bits/mman.h
+@@ -4,6 +4,7 @@
+ #define	PROT_READ      1
+ #define	PROT_WRITE     2
+ #define	PROT_EXEC      4
++#define	PROT_SAO       0x10
+ #define	PROT_GROWSDOWN 0x01000000
+ #define	PROT_GROWSUP   0x02000000
+ 
+@@ -35,8 +36,9 @@
+ #define MS_INVALIDATE   2
+ #define MS_SYNC         4
+ 
+-#define MCL_CURRENT     1
+-#define MCL_FUTURE      2
++#define MCL_CURRENT     0x2000
++#define MCL_FUTURE      0x4000
++#define MCL_ONFAULT     0x8000
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/powerpc/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/powerpc/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/powerpc/bits/syscall.h
++++ b/arch/powerpc/bits/syscall.h
+@@ -194,23 +194,19 @@
+ #define __NR_vfork                  189
+ #define __NR_ugetrlimit             190
+ #define __NR_readahead              191
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define __NR_mmap2                  192
+ #define __NR_truncate64             193
+ #define __NR_ftruncate64            194
+ #define __NR_stat64                 195
+ #define __NR_lstat64                196
+ #define __NR_fstat64                197
+-#endif
+ #define __NR_pciconfig_read         198
+ #define __NR_pciconfig_write        199
+ #define __NR_pciconfig_iobase       200
+ #define __NR_multiplexer            201
+ #define __NR_getdents64             202
+ #define __NR_pivot_root             203
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define __NR_fcntl64                204
+-#endif
+ #define __NR_madvise                205
+ #define __NR_mincore                206
+ #define __NR_gettid                 207
+@@ -231,9 +227,7 @@
+ #define __NR_sched_setaffinity      222
+ #define __NR_sched_getaffinity      223
+ #define __NR_tuxcall                225
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define __NR_sendfile64             226
+-#endif
+ #define __NR_io_setup               227
+ #define __NR_io_destroy             228
+ #define __NR_io_getevents           229
+@@ -261,9 +255,7 @@
+ #define __NR_utimes                 251
+ #define __NR_statfs64               252
+ #define __NR_fstatfs64              253
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define __NR_fadvise64_64           254
+-#endif
+ #define __NR_rtas		255
+ #define __NR_sys_debug_setcontext 256
+ #define __NR_migrate_pages	258
+@@ -299,11 +291,7 @@
+ #define __NR_mknodat		288
+ #define __NR_fchownat		289
+ #define __NR_futimesat		290
+-#if defined(__PPC64) && !defined(__ABI32)
+-#define __NR_newfstatat		291
+-#else
+ #define __NR_fstatat64		291
+-#endif
+ #define __NR_unlinkat		292
+ #define __NR_renameat		293
+ #define __NR_linkat		294
+@@ -376,6 +364,10 @@
+ #define __NR_memfd_create          360
+ #define __NR_bpf                   361
+ #define __NR_execveat              362
++#define __NR_switch_endian         363
++#define __NR_userfaultfd           364
++#define __NR_membarrier            365
++#define __NR_mlock2                378
+ 
+ /*
+  * repeated with SYS prefix
+@@ -576,23 +568,19 @@
+ #define SYS_vfork                  189
+ #define SYS_ugetrlimit             190
+ #define SYS_readahead              191
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define SYS_mmap2                  192
+ #define SYS_truncate64             193
+ #define SYS_ftruncate64            194
+ #define SYS_stat64                 195
+ #define SYS_lstat64                196
+ #define SYS_fstat64                197
+-#endif
+ #define SYS_pciconfig_read         198
+ #define SYS_pciconfig_write        199
+ #define SYS_pciconfig_iobase       200
+ #define SYS_multiplexer            201
+ #define SYS_getdents64             202
+ #define SYS_pivot_root             203
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define SYS_fcntl64                204
+-#endif
+ #define SYS_madvise                205
+ #define SYS_mincore                206
+ #define SYS_gettid                 207
+@@ -613,9 +601,7 @@
+ #define SYS_sched_setaffinity      222
+ #define SYS_sched_getaffinity      223
+ #define SYS_tuxcall                225
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define SYS_sendfile64             226
+-#endif
+ #define SYS_io_setup               227
+ #define SYS_io_destroy             228
+ #define SYS_io_getevents           229
+@@ -643,9 +629,7 @@
+ #define SYS_utimes                 251
+ #define SYS_statfs64               252
+ #define SYS_fstatfs64              253
+-#if !defined(__PPC64) || defined(__ABI32)
+ #define SYS_fadvise64_64           254
+-#endif
+ #define SYS_rtas		255
+ #define SYS_sys_debug_setcontext 256
+ #define SYS_migrate_pages	258
+@@ -681,11 +665,7 @@
+ #define SYS_mknodat		288
+ #define SYS_fchownat		289
+ #define SYS_futimesat		290
+-#if defined(__PPC64) && !defined(__ABI32)
+-#define SYS_newfstatat		291
+-#else
+ #define SYS_fstatat64		291
+-#endif
+ #define SYS_unlinkat		292
+ #define SYS_renameat		293
+ #define SYS_linkat		294
+@@ -758,3 +738,7 @@
+ #define SYS_memfd_create          360
+ #define SYS_bpf                   361
+ #define SYS_execveat              362
++#define SYS_switch_endian         363
++#define SYS_userfaultfd           364
++#define SYS_membarrier            365
++#define SYS_mlock2                378
+--- a/arch/powerpc/pthread_arch.h
++++ b/arch/powerpc/pthread_arch.h
+@@ -15,9 +15,8 @@ static inline struct pthread *__pthread_
+ 
+ #define DTP_OFFSET 0x8000
+ 
+-// offset of the PC register in mcontext_t, divided by the system wordsize
+ // the kernel calls the ip "nip", it's the first saved value after the 32
+ // GPRs.
+-#define CANCEL_REG_IP 32
++#define MC_PC gregs[32]
+ 
+ #define CANARY canary_at_end
+--- a/arch/sh/atomic.h
++++ /dev/null
+@@ -1,168 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	static const char debruijn32[32] = {
+-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+-	};
+-	return debruijn32[(x&-x)*0x076be629 >> 27];
+-}
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	uint32_t y = x;
+-	if (!y) {
+-		y = x>>32;
+-		return 32 + a_ctz_l(y);
+-	}
+-	return a_ctz_l(y);
+-}
+-
+-#define LLSC_CLOBBERS "r0", "t", "memory"
+-#define LLSC_START(mem) "synco\n"  \
+-	"0:	movli.l @" mem ", r0\n"
+-#define LLSC_END(mem)              \
+-	"1:	movco.l r0, @" mem "\n"    \
+-	"	bf 0b\n"                   \
+-	"	synco\n"
+-
+-static inline int __sh_cas_llsc(volatile int *p, int t, int s)
+-{
+-	int old;
+-	__asm__ __volatile__(
+-		LLSC_START("%1")
+-		"	mov r0, %0\n"
+-		"	cmp/eq %0, %2\n"
+-		"	bf 1f\n"
+-		"	mov %3, r0\n"
+-		LLSC_END("%1")
+-		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
+-	return old;
+-}
+-
+-static inline int __sh_swap_llsc(volatile int *x, int v)
+-{
+-	int old;
+-	__asm__ __volatile__(
+-		LLSC_START("%1")
+-		"	mov r0, %0\n"
+-		"	mov %2, r0\n"
+-		LLSC_END("%1")
+-		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	return old;
+-}
+-
+-static inline int __sh_fetch_add_llsc(volatile int *x, int v)
+-{
+-	int old;
+-	__asm__ __volatile__(
+-		LLSC_START("%1")
+-		"	mov r0, %0\n"
+-		"	add %2, r0\n"
+-		LLSC_END("%1")
+-		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	return old;
+-}
+-
+-static inline void __sh_store_llsc(volatile int *p, int x)
+-{
+-	__asm__ __volatile__(
+-		"	synco\n"
+-		"	mov.l %1, @%0\n"
+-		"	synco\n"
+-		: : "r"(p), "r"(x) : "memory");
+-}
+-
+-static inline void __sh_and_llsc(volatile int *x, int v)
+-{
+-	__asm__ __volatile__(
+-		LLSC_START("%0")
+-		"	and %1, r0\n"
+-		LLSC_END("%0")
+-		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-}
+-
+-static inline void __sh_or_llsc(volatile int *x, int v)
+-{
+-	__asm__ __volatile__(
+-		LLSC_START("%0")
+-		"	or %1, r0\n"
+-		LLSC_END("%0")
+-		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-}
+-
+-#ifdef __SH4A__
+-#define a_cas(p,t,s)     __sh_cas_llsc(p,t,s)
+-#define a_swap(x,v)      __sh_swap_llsc(x,v)
+-#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
+-#define a_store(x,v)     __sh_store_llsc(x, v)
+-#define a_and(x,v)       __sh_and_llsc(x, v)
+-#define a_or(x,v)        __sh_or_llsc(x, v)
+-#else
+-
+-int  __sh_cas(volatile int *, int, int);
+-int  __sh_swap(volatile int *, int);
+-int  __sh_fetch_add(volatile int *, int);
+-void __sh_store(volatile int *, int);
+-void __sh_and(volatile int *, int);
+-void __sh_or(volatile int *, int);
+-
+-#define a_cas(p,t,s)     __sh_cas(p,t,s)
+-#define a_swap(x,v)      __sh_swap(x,v)
+-#define a_fetch_add(x,v) __sh_fetch_add(x, v)
+-#define a_store(x,v)     __sh_store(x, v)
+-#define a_and(x,v)       __sh_and(x, v)
+-#define a_or(x,v)        __sh_or(x, v)
+-#endif
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	return (void *)a_cas(p, (int)t, (int)s);
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	a_fetch_add(x, 1);
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	a_fetch_add(x, -1);
+-}
+-
+-#define a_spin a_barrier
+-
+-static inline void a_barrier()
+-{
+-	a_cas(&(int){0}, 0, 0);
+-}
+-
+-static inline void a_crash()
+-{
+-	*(volatile char *)0=0;
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	a_or(p, v);
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_and((int *)p,   u.r[0]);
+-	a_and((int *)p+1, u.r[1]);
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	union { uint64_t v; uint32_t r[2]; } u = { v };
+-	a_or((int *)p,   u.r[0]);
+-	a_or((int *)p+1, u.r[1]);
+-}
+-
+-#endif
+--- /dev/null
++++ b/arch/sh/atomic_arch.h
+@@ -0,0 +1,46 @@
++#if defined(__SH4A__)
++
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ (
++		"movco.l %2, @%3 ; movt %0"
++		: "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc");
++	return r;
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__ ("synco" : : "memory");
++}
++
++#define a_pre_llsc a_barrier
++#define a_post_llsc a_barrier
++
++#else
++
++#define a_cas a_cas
++__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr;
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	register int r1 __asm__("r1");
++	register int r2 __asm__("r2") = t;
++	register int r3 __asm__("r3") = s;
++	__asm__ __volatile__ (
++		"jsr @%4 ; nop"
++		: "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr)
++		: "memory", "pr", "cc");
++	return r3;
++}
++
++#endif
+--- a/arch/sh/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/sh/bits/fcntl.h
++++ /dev/null
+@@ -1,40 +0,0 @@
+-#define O_CREAT        0100
+-#define O_EXCL         0200
+-#define O_NOCTTY       0400
+-#define O_TRUNC       01000
+-#define O_APPEND      02000
+-#define O_NONBLOCK    04000
+-#define O_DSYNC      010000
+-#define O_SYNC     04010000
+-#define O_RSYNC    04010000
+-#define O_DIRECTORY 0200000
+-#define O_NOFOLLOW  0400000
+-#define O_CLOEXEC  02000000
+-
+-#define O_ASYNC      020000
+-#define O_DIRECT     040000
+-#define O_LARGEFILE 0100000
+-#define O_NOATIME  01000000
+-#define O_PATH    010000000
+-#define O_TMPFILE 020200000
+-#define O_NDELAY O_NONBLOCK
+-
+-#define F_DUPFD  0
+-#define F_GETFD  1
+-#define F_SETFD  2
+-#define F_GETFL  3
+-#define F_SETFL  4
+-
+-#define F_SETOWN 8
+-#define F_GETOWN 9
+-#define F_SETSIG 10
+-#define F_GETSIG 11
+-
+-#define F_GETLK 12
+-#define F_SETLK 13
+-#define F_SETLKW 14
+-
+-#define F_SETOWN_EX 15
+-#define F_GETOWN_EX 16
+-
+-#define F_GETOWNER_UIDS 17
+--- a/arch/sh/bits/ipc.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-struct ipc_perm
+-{
+-	key_t __ipc_perm_key;
+-	uid_t uid;
+-	gid_t gid;
+-	uid_t cuid;
+-	gid_t cgid;
+-	mode_t mode;
+-	int __ipc_perm_seq;
+-	long __pad1;
+-	long __pad2;
+-};
+-
+-#define IPC_64 0x100
+--- a/arch/sh/bits/mman.h
++++ b/arch/sh/bits/mman.h
+@@ -38,6 +38,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/sh/bits/msg.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct msqid_ds
+-{
+-	struct ipc_perm msg_perm;
+-	time_t msg_stime;
+-	int __unused1;
+-	time_t msg_rtime;
+-	int __unused2;
+-	time_t msg_ctime;
+-	int __unused3;
+-	unsigned long msg_cbytes;
+-	msgqnum_t msg_qnum;
+-	msglen_t msg_qbytes;
+-	pid_t msg_lspid;
+-	pid_t msg_lrpid;
+-	unsigned long __unused[2];
+-};
+--- a/arch/sh/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/sh/bits/socket.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-struct msghdr
+-{
+-	void *msg_name;
+-	socklen_t msg_namelen;
+-	struct iovec *msg_iov;
+-	int msg_iovlen;
+-	void *msg_control;
+-	socklen_t msg_controllen;
+-	int msg_flags;
+-};
+-
+-struct cmsghdr
+-{
+-	socklen_t cmsg_len;
+-	int cmsg_level;
+-	int cmsg_type;
+-};
+--- a/arch/sh/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/sh/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/sh/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/sh/crt_arch.h
++++ b/arch/sh/crt_arch.h
+@@ -22,7 +22,8 @@ START ": \n"
+ "	mov.l 1f, r5 \n"
+ "	mov.l 1f+4, r6 \n"
+ "	add r0, r5 \n"
+-"	bsr __fdpic_fixup \n"
++"	mov.l 4f, r1 \n"
++"5:	bsrf r1 \n"
+ "	 add r0, r6 \n"
+ "	mov r0, r12 \n"
+ #endif
+@@ -31,11 +32,16 @@ START ": \n"
+ "	mov.l r9, @-r15 \n"
+ "	mov.l r8, @-r15 \n"
+ "	mov #-16, r0 \n"
+-"	bsr " START "_c \n"
++"	mov.l 2f, r1 \n"
++"3:	bsrf r1 \n"
+ "	 and r0, r15 \n"
+ ".align 2 \n"
+ "1:	.long __ROFIXUP_LIST__@PCREL \n"
+ "	.long __ROFIXUP_END__@PCREL + 4 \n"
++"2:	.long " START "_c@PCREL - (3b+4-.) \n"
++#ifndef SHARED
++"4:	.long __fdpic_fixup@PCREL - (5b+4-.) \n"
++#endif
+ );
+ 
+ #ifndef SHARED
+@@ -53,13 +59,14 @@ START ": \n"
+ "	add r0, r5 \n"
+ "	mov r15, r4 \n"
+ "	mov #-16, r0 \n"
+-"	and r0, r15 \n"
+-"	bsr " START "_c \n"
+-"	nop \n"
++"	mov.l 2f, r1 \n"
++"3:	bsrf r1 \n"
++"	 and r0, r15 \n"
+ ".align 2 \n"
+ ".weak _DYNAMIC \n"
+ ".hidden _DYNAMIC \n"
+ "1:	.long _DYNAMIC-. \n"
++"2:	.long " START "_c@PCREL - (3b+4-.) \n"
+ );
+ 
+ #endif
+--- a/arch/sh/pthread_arch.h
++++ b/arch/sh/pthread_arch.h
+@@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
+ #define TLS_ABOVE_TP
+ #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+ 
+-#define CANCEL_REG_IP 17
++#define MC_PC sc_pc
+--- a/arch/sh/reloc.h
++++ b/arch/sh/reloc.h
+@@ -32,6 +32,8 @@
+ #define REL_DTPOFF      R_SH_TLS_DTPOFF32
+ #define REL_TPOFF       R_SH_TLS_TPOFF32
+ 
++#define DL_NOMMU_SUPPORT 1
++
+ #if __SH_FDPIC__
+ #define REL_FUNCDESC    R_SH_FUNCDESC
+ #define REL_FUNCDESC_VAL R_SH_FUNCDESC_VALUE
+--- a/arch/sh/src/__fpscr_values.c
++++ /dev/null
+@@ -1,5 +0,0 @@
+-#include "libc.h"
+-
+-/* used by gcc for switching the FPU between single and double precision */
+-//const unsigned long __fpscr_values[2] ATTR_LIBC_VISIBILITY = { 0, 0x80000 };
+-
+--- a/arch/sh/src/__set_thread_area.c
++++ /dev/null
+@@ -1,34 +0,0 @@
+-#include "pthread_impl.h"
+-#include "libc.h"
+-#include "sh_atomic.h"
+-#include <elf.h>
+-
+-/* Also perform sh-specific init */
+-
+-#define CPU_HAS_LLSC 0x0040
+-
+-__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
+-
+-int __set_thread_area(void *p)
+-{
+-	size_t *aux;
+-	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
+-#ifndef __SH4A__
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__sh_atomic_model = SH_A_LLSC;
+-		return 0;
+-	}
+-#if !defined(__SH3__) && !defined(__SH4__)
+-	for (aux=libc.auxv; *aux; aux+=2) {
+-		if (*aux != AT_PLATFORM) continue;
+-		const char *s = (void *)aux[1];
+-		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
+-		__sh_atomic_model = SH_A_IMASK;
+-		__sh_nommu = 1;
+-		return 0;
+-	}
+-#endif
+-	/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
+-#endif
+-	return 0;
+-}
+--- a/arch/sh/src/__shcall.c
++++ /dev/null
+@@ -1,5 +0,0 @@
+-__attribute__((__visibility__("hidden")))
+-int __shcall(void *arg, int (*func)(void *))
+-{
+-	return func(arg);
+-}
+--- a/arch/sh/src/__unmapself.c
++++ /dev/null
+@@ -1,24 +0,0 @@
+-#include "pthread_impl.h"
+-
+-void __unmapself_sh_mmu(void *, size_t);
+-void __unmapself_sh_nommu(void *, size_t);
+-
+-#if !defined(__SH3__) && !defined(__SH4__)
+-#define __unmapself __unmapself_sh_nommu
+-#include "dynlink.h"
+-#undef CRTJMP
+-#define CRTJMP(pc,sp) __asm__ __volatile__( \
+-	"mov.l @%0+,r0 ; mov.l @%0,r12 ; jmp @r0 ; mov %1,r15" \
+-	: : "r"(pc), "r"(sp) : "r0", "memory" )
+-#include "../../../src/thread/__unmapself.c"
+-#undef __unmapself
+-extern __attribute__((__visibility__("hidden"))) unsigned __sh_nommu;
+-#else
+-#define __sh_nommu 0
+-#endif
+-
+-void __unmapself(void *base, size_t size)
+-{
+-	if (__sh_nommu) __unmapself_sh_nommu(base, size);
+-	else __unmapself_sh_mmu(base, size);
+-}
+--- a/arch/sh/src/atomic.c
++++ /dev/null
+@@ -1,158 +0,0 @@
+-#ifndef __SH4A__
+-
+-#include "sh_atomic.h"
+-#include "atomic.h"
+-#include "libc.h"
+-
+-static inline unsigned mask()
+-{
+-	unsigned sr;
+-	__asm__ __volatile__ ( "\n"
+-	"	stc sr,r0 \n"
+-	"	mov r0,%0 \n"
+-	"	or #0xf0,r0 \n"
+-	"	ldc r0,sr \n"
+-	: "=&r"(sr) : : "memory", "r0" );
+-	return sr;
+-}
+-
+-static inline void unmask(unsigned sr)
+-{
+-	__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
+-}
+-
+-/* gusa is a hack in the kernel which lets you create a sequence of instructions
+- * which will be restarted if the process is preempted in the middle of the
+- * sequence. It will do for implementing atomics on non-smp systems. ABI is:
+- * r0  = address of first instruction after the atomic sequence
+- * r1  = original stack pointer
+- * r15 = -1 * length of atomic sequence in bytes
+- */
+-#define GUSA_CLOBBERS   "r0", "r1", "memory"
+-#define GUSA_START(mem,old,nop)    \
+-	"	.align 2\n"                \
+-	"	mova 1f, r0\n"             \
+-	nop                            \
+-	"	mov r15, r1\n"             \
+-	"	mov #(0f-1f), r15\n"       \
+-	"0:	mov.l @" mem ", " old "\n"
+-/* the target of mova must be 4 byte aligned, so we may need a nop */
+-#define GUSA_START_ODD(mem,old)  GUSA_START(mem,old,"")
+-#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n")
+-#define GUSA_END(mem,new)          \
+-	"	mov.l " new ", @" mem "\n" \
+-	"1:	mov r1, r15\n"
+-
+-int __sh_cas(volatile int *p, int t, int s)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
+-
+-	if (__sh_atomic_model == SH_A_IMASK) {
+-		unsigned sr = mask();
+-		int old = *p;
+-		if (old==t) *p = s;
+-		unmask(sr);
+-		return old;
+-	}
+-
+-	int old;
+-	__asm__ __volatile__(
+-		GUSA_START_EVEN("%1", "%0")
+-		"	cmp/eq %0, %2\n"
+-		"	bf 1f\n"
+-		GUSA_END("%1", "%3")
+-		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
+-	return old;
+-}
+-
+-int __sh_swap(volatile int *x, int v)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
+-
+-	if (__sh_atomic_model == SH_A_IMASK) {
+-		unsigned sr = mask();
+-		int old = *x;
+-		*x = v;
+-		unmask(sr);
+-		return old;
+-	}
+-
+-	int old;
+-	__asm__ __volatile__(
+-		GUSA_START_EVEN("%1", "%0")
+-		GUSA_END("%1", "%2")
+-		: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	return old;
+-}
+-
+-int __sh_fetch_add(volatile int *x, int v)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
+-
+-	if (__sh_atomic_model == SH_A_IMASK) {
+-		unsigned sr = mask();
+-		int old = *x;
+-		*x = old + v;
+-		unmask(sr);
+-		return old;
+-	}
+-
+-	int old, dummy;
+-	__asm__ __volatile__(
+-		GUSA_START_EVEN("%2", "%0")
+-		"	mov %0, %1\n"
+-		"	add %3, %1\n"
+-		GUSA_END("%2", "%1")
+-		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	return old;
+-}
+-
+-void __sh_store(volatile int *p, int x)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
+-	__asm__ __volatile__(
+-		"	mov.l %1, @%0\n"
+-		: : "r"(p), "r"(x) : "memory");
+-}
+-
+-void __sh_and(volatile int *x, int v)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
+-
+-	if (__sh_atomic_model == SH_A_IMASK) {
+-		unsigned sr = mask();
+-		int old = *x;
+-		*x = old & v;
+-		unmask(sr);
+-		return;
+-	}
+-
+-	int dummy;
+-	__asm__ __volatile__(
+-		GUSA_START_ODD("%1", "%0")
+-		"	and %2, %0\n"
+-		GUSA_END("%1", "%0")
+-		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-}
+-
+-void __sh_or(volatile int *x, int v)
+-{
+-	if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
+-
+-	if (__sh_atomic_model == SH_A_IMASK) {
+-		unsigned sr = mask();
+-		int old = *x;
+-		*x = old | v;
+-		unmask(sr);
+-		return;
+-	}
+-
+-	int dummy;
+-	__asm__ __volatile__(
+-		GUSA_START_ODD("%1", "%0")
+-		"	or %2, %0\n"
+-		GUSA_END("%1", "%0")
+-		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-}
+-
+-#endif
+--- a/arch/sh/src/sh_atomic.h
++++ /dev/null
+@@ -1,15 +0,0 @@
+-#ifndef _SH_ATOMIC_H
+-#define _SH_ATOMIC_H
+-
+-#define SH_A_GUSA 0
+-#define SH_A_LLSC 1
+-#define SH_A_CAS 2
+-#if !defined(__SH3__) && !defined(__SH4__)
+-#define SH_A_IMASK 3
+-#else
+-#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
+-#endif
+-
+-extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
+-
+-#endif
+--- a/arch/x32/atomic.h
++++ /dev/null
+@@ -1,105 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
+-	return x;
+-}
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
+-	return x;
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; and %1, %0"
+-			 : "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-			 : "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-		: "=m"(*(long *)p) : "r"(v) : "memory" );
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	__asm__( "lock ; and %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
+-}
+-
+-static inline void a_spin()
+-{
+-	__asm__ __volatile__( "pause" : : : "memory" );
+-}
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__( "" : : : "memory" );
+-}
+-
+-static inline void a_crash()
+-{
+-	__asm__ __volatile__( "hlt" : : : "memory" );
+-}
+-
+-
+-#endif
+--- /dev/null
++++ b/arch/x32/atomic_arch.h
+@@ -0,0 +1,114 @@
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	__asm__ __volatile__ (
++		"lock ; cmpxchg %3, %1"
++		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
++	return t;
++}
++
++#define a_swap a_swap
++static inline int a_swap(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"xchg %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; xadd %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_and a_and
++static inline void a_and(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; and %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_or a_or
++static inline void a_or(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; or %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_and_64 a_and_64
++static inline void a_and_64(volatile uint64_t *p, uint64_t v)
++{
++	__asm__ __volatile(
++		"lock ; and %1, %0"
++		 : "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_or_64 a_or_64
++static inline void a_or_64(volatile uint64_t *p, uint64_t v)
++{
++	__asm__ __volatile__(
++		"lock ; or %1, %0"
++		 : "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_inc a_inc
++static inline void a_inc(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; incl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_dec a_dec
++static inline void a_dec(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; decl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_store a_store
++static inline void a_store(volatile int *p, int x)
++{
++	__asm__ __volatile__(
++		"mov %1, %0 ; lock ; orl $0,(%%rsp)"
++		: "=m"(*p) : "r"(x) : "memory" );
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__( "" : : : "memory" );
++}
++
++#define a_pause a_pause
++static inline void a_spin()
++{
++	__asm__ __volatile__( "pause" : : : "memory" );
++}
++
++#define a_crash a_crash
++static inline void a_crash()
++{
++	__asm__ __volatile__( "hlt" : : : "memory" );
++}
++
++#define a_ctz_64 a_ctz_64
++static inline int a_ctz_64(uint64_t x)
++{
++	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
++	return x;
++}
++
++#define a_ctz_l a_ctz_l
++static inline int a_ctz_l(unsigned long x)
++{
++	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
++	return x;
++}
+--- a/arch/x32/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/x32/bits/mman.h
++++ b/arch/x32/bits/mman.h
+@@ -38,6 +38,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/x32/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/x32/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/x32/bits/syscall.h
++++ b/arch/x32/bits/syscall.h
+@@ -277,6 +277,9 @@
+ #define __NR_memfd_create (__X32_SYSCALL_BIT + 319)
+ #define __NR_kexec_file_load (__X32_SYSCALL_BIT + 320)
+ #define __NR_bpf (__X32_SYSCALL_BIT + 321)
++#define __NR_userfaultfd (__X32_SYSCALL_BIT + 323)
++#define __NR_membarrier (__X32_SYSCALL_BIT + 324)
++#define __NR_mlock2 (__X32_SYSCALL_BIT + 325)
+ 
+ #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
+ #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
+@@ -607,6 +610,9 @@
+ #define SYS_memfd_create __NR_memfd_create
+ #define SYS_kexec_file_load __NR_kexec_file_load
+ #define SYS_bpf __NR_bpf
++#define SYS_userfaultfd __NR_userfaultfd
++#define SYS_membarrier __NR_membarrier
++#define SYS_mlock2 __NR_mlock2
+ 
+ 
+ #define SYS_rt_sigaction __NR_rt_sigaction
+--- a/arch/x32/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/x32/pthread_arch.h
++++ b/arch/x32/pthread_arch.h
+@@ -7,6 +7,6 @@ static inline struct pthread *__pthread_
+ 
+ #define TP_ADJ(p) (p)
+ 
+-#define CANCEL_REG_IP 32
++#define MC_PC gregs[REG_RIP]
+ 
+ #define CANARY canary2
+--- a/arch/x32/src/syscall_cp_fixup.c
++++ /dev/null
+@@ -1,42 +0,0 @@
+-#include <sys/syscall.h>
+-
+-#ifdef SHARED
+-__attribute__((__visibility__("hidden")))
+-#endif
+-long __syscall_cp_internal(volatile void*, long long, long long, long long, long long,
+-                             long long, long long, long long);
+-
+-struct __timespec { long long tv_sec; long tv_nsec; };
+-struct __timespec_kernel { long long tv_sec; long long tv_nsec; };
+-#define __tsc(X) ((struct __timespec*)(unsigned long)(X))
+-#define __fixup(X) do { if(X) { \
+-	ts->tv_sec = __tsc(X)->tv_sec; \
+-	ts->tv_nsec = __tsc(X)->tv_nsec; \
+-	(X) = (unsigned long)ts; } } while(0)
+-
+-#ifdef SHARED
+-__attribute__((__visibility__("hidden")))
+-#endif
+-long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3,
+-	                     long long a4, long long a5, long long a6)
+-{
+-	struct __timespec_kernel ts[1];
+-	switch (n) {
+-	case SYS_mq_timedsend: case SYS_mq_timedreceive: case SYS_pselect6:
+-		__fixup(a5);
+-		break;
+-	case SYS_futex:
+-		if((a2 & (~128 /* FUTEX_PRIVATE_FLAG */)) == 0 /* FUTEX_WAIT */)
+-			__fixup(a4);
+-		break;
+-	case SYS_clock_nanosleep:
+-	case SYS_rt_sigtimedwait: case SYS_ppoll:
+-		__fixup(a3);
+-		break;
+-	case SYS_nanosleep:
+-		__fixup(a1);
+-		break;
+-	}
+-	return __syscall_cp_internal(foo, n, a1, a2, a3, a4, a5, a6);
+-}
+-
+--- a/arch/x32/src/sysinfo.c
++++ /dev/null
+@@ -1,50 +0,0 @@
+-#include <sys/sysinfo.h>
+-#include "syscall.h"
+-#include "libc.h"
+-
+-#define klong long long
+-#define kulong unsigned long long
+-
+-struct kernel_sysinfo {
+-	klong uptime;
+-	kulong loads[3];
+-	kulong totalram;
+-	kulong freeram;
+-	kulong sharedram;
+-	kulong bufferram;
+-	kulong totalswap;
+-	kulong freeswap;
+-	short procs;
+-	short pad;
+-	kulong totalhigh;
+-	kulong freehigh;
+-	unsigned mem_unit;
+-};
+-
+-int __lsysinfo(struct sysinfo *info)
+-{
+-	struct kernel_sysinfo tmp;
+-	int ret = syscall(SYS_sysinfo, &tmp);
+-	if(ret == -1) return ret;
+-	info->uptime = tmp.uptime;
+-	info->loads[0] = tmp.loads[0];
+-	info->loads[1] = tmp.loads[1];
+-	info->loads[2] = tmp.loads[2];
+-	kulong shifts;
+-	kulong max = tmp.totalram | tmp.totalswap;
+-	__asm__("bsr %1,%0" : "=r"(shifts) : "r"(max));
+-	shifts = shifts >= 32 ? shifts - 31 : 0;
+-	info->totalram = tmp.totalram >> shifts;
+-	info->freeram = tmp.freeram >> shifts;
+-	info->sharedram = tmp.sharedram >> shifts;
+-	info->bufferram = tmp.bufferram >> shifts;
+-	info->totalswap = tmp.totalswap >> shifts;
+-	info->freeswap = tmp.freeswap >> shifts;
+-	info->procs = tmp.procs ;
+-	info->totalhigh = tmp.totalhigh >> shifts;
+-	info->freehigh = tmp.freehigh >> shifts;
+-	info->mem_unit = (tmp.mem_unit ? tmp.mem_unit : 1) << shifts;
+-	return ret;
+-}
+-
+-weak_alias(__lsysinfo, sysinfo);
+--- a/arch/x86_64/atomic.h
++++ /dev/null
+@@ -1,105 +0,0 @@
+-#ifndef _INTERNAL_ATOMIC_H
+-#define _INTERNAL_ATOMIC_H
+-
+-#include <stdint.h>
+-
+-static inline int a_ctz_64(uint64_t x)
+-{
+-	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
+-	return x;
+-}
+-
+-static inline int a_ctz_l(unsigned long x)
+-{
+-	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
+-	return x;
+-}
+-
+-static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; and %1, %0"
+-			 : "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-			 : "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_or_l(volatile void *p, long v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-		: "=m"(*(long *)p) : "r"(v) : "memory" );
+-}
+-
+-static inline void *a_cas_p(volatile void *p, void *t, void *s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline int a_cas(volatile int *p, int t, int s)
+-{
+-	__asm__( "lock ; cmpxchg %3, %1"
+-		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
+-	return t;
+-}
+-
+-static inline void a_or(volatile int *p, int v)
+-{
+-	__asm__( "lock ; or %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline void a_and(volatile int *p, int v)
+-{
+-	__asm__( "lock ; and %1, %0"
+-		: "=m"(*p) : "r"(v) : "memory" );
+-}
+-
+-static inline int a_swap(volatile int *x, int v)
+-{
+-	__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-static inline int a_fetch_add(volatile int *x, int v)
+-{
+-	__asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
+-	return v;
+-}
+-
+-static inline void a_inc(volatile int *x)
+-{
+-	__asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_dec(volatile int *x)
+-{
+-	__asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
+-}
+-
+-static inline void a_store(volatile int *p, int x)
+-{
+-	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
+-}
+-
+-static inline void a_spin()
+-{
+-	__asm__ __volatile__( "pause" : : : "memory" );
+-}
+-
+-static inline void a_barrier()
+-{
+-	__asm__ __volatile__( "" : : : "memory" );
+-}
+-
+-static inline void a_crash()
+-{
+-	__asm__ __volatile__( "hlt" : : : "memory" );
+-}
+-
+-
+-#endif
+--- /dev/null
++++ b/arch/x86_64/atomic_arch.h
+@@ -0,0 +1,116 @@
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	__asm__ __volatile__ (
++		"lock ; cmpxchg %3, %1"
++		: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
++	return t;
++}
++
++#define a_cas_p a_cas_p
++static inline void *a_cas_p(volatile void *p, void *t, void *s)
++{
++	__asm__( "lock ; cmpxchg %3, %1"
++		: "=a"(t), "=m"(*(void *volatile *)p)
++		: "a"(t), "r"(s) : "memory" );
++	return t;
++}
++
++#define a_swap a_swap
++static inline int a_swap(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"xchg %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; xadd %0, %1"
++		: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
++	return v;
++}
++
++#define a_and a_and
++static inline void a_and(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; and %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_or a_or
++static inline void a_or(volatile int *p, int v)
++{
++	__asm__ __volatile__(
++		"lock ; or %1, %0"
++		: "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_and_64 a_and_64
++static inline void a_and_64(volatile uint64_t *p, uint64_t v)
++{
++	__asm__ __volatile(
++		"lock ; and %1, %0"
++		 : "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_or_64 a_or_64
++static inline void a_or_64(volatile uint64_t *p, uint64_t v)
++{
++	__asm__ __volatile__(
++		"lock ; or %1, %0"
++		 : "=m"(*p) : "r"(v) : "memory" );
++}
++
++#define a_inc a_inc
++static inline void a_inc(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; incl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_dec a_dec
++static inline void a_dec(volatile int *p)
++{
++	__asm__ __volatile__(
++		"lock ; decl %0"
++		: "=m"(*p) : "m"(*p) : "memory" );
++}
++
++#define a_store a_store
++static inline void a_store(volatile int *p, int x)
++{
++	__asm__ __volatile__(
++		"mov %1, %0 ; lock ; orl $0,(%%rsp)"
++		: "=m"(*p) : "r"(x) : "memory" );
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__( "" : : : "memory" );
++}
++
++#define a_pause a_pause
++static inline void a_spin()
++{
++	__asm__ __volatile__( "pause" : : : "memory" );
++}
++
++#define a_crash a_crash
++static inline void a_crash()
++{
++	__asm__ __volatile__( "hlt" : : : "memory" );
++}
++
++#define a_ctz_64 a_ctz_64
++static inline int a_ctz_64(uint64_t x)
++{
++	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
++	return x;
++}
+--- a/arch/x86_64/bits/errno.h
++++ /dev/null
+@@ -1,134 +0,0 @@
+-#define EPERM            1
+-#define ENOENT           2
+-#define ESRCH            3
+-#define EINTR            4
+-#define EIO              5
+-#define ENXIO            6
+-#define E2BIG            7
+-#define ENOEXEC          8
+-#define EBADF            9
+-#define ECHILD          10
+-#define EAGAIN          11
+-#define ENOMEM          12
+-#define EACCES          13
+-#define EFAULT          14
+-#define ENOTBLK         15
+-#define EBUSY           16
+-#define EEXIST          17
+-#define EXDEV           18
+-#define ENODEV          19
+-#define ENOTDIR         20
+-#define EISDIR          21
+-#define EINVAL          22
+-#define ENFILE          23
+-#define EMFILE          24
+-#define ENOTTY          25
+-#define ETXTBSY         26
+-#define EFBIG           27
+-#define ENOSPC          28
+-#define ESPIPE          29
+-#define EROFS           30
+-#define EMLINK          31
+-#define EPIPE           32
+-#define EDOM            33
+-#define ERANGE          34
+-#define EDEADLK         35
+-#define ENAMETOOLONG    36
+-#define ENOLCK          37
+-#define ENOSYS          38
+-#define ENOTEMPTY       39
+-#define ELOOP           40
+-#define EWOULDBLOCK     EAGAIN
+-#define ENOMSG          42
+-#define EIDRM           43
+-#define ECHRNG          44
+-#define EL2NSYNC        45
+-#define EL3HLT          46
+-#define EL3RST          47
+-#define ELNRNG          48
+-#define EUNATCH         49
+-#define ENOCSI          50
+-#define EL2HLT          51
+-#define EBADE           52
+-#define EBADR           53
+-#define EXFULL          54
+-#define ENOANO          55
+-#define EBADRQC         56
+-#define EBADSLT         57
+-#define EDEADLOCK       EDEADLK
+-#define EBFONT          59
+-#define ENOSTR          60
+-#define ENODATA         61
+-#define ETIME           62
+-#define ENOSR           63
+-#define ENONET          64
+-#define ENOPKG          65
+-#define EREMOTE         66
+-#define ENOLINK         67
+-#define EADV            68
+-#define ESRMNT          69
+-#define ECOMM           70
+-#define EPROTO          71
+-#define EMULTIHOP       72
+-#define EDOTDOT         73
+-#define EBADMSG         74
+-#define EOVERFLOW       75
+-#define ENOTUNIQ        76
+-#define EBADFD          77
+-#define EREMCHG         78
+-#define ELIBACC         79
+-#define ELIBBAD         80
+-#define ELIBSCN         81
+-#define ELIBMAX         82
+-#define ELIBEXEC        83
+-#define EILSEQ          84
+-#define ERESTART        85
+-#define ESTRPIPE        86
+-#define EUSERS          87
+-#define ENOTSOCK        88
+-#define EDESTADDRREQ    89
+-#define EMSGSIZE        90
+-#define EPROTOTYPE      91
+-#define ENOPROTOOPT     92
+-#define EPROTONOSUPPORT 93
+-#define ESOCKTNOSUPPORT 94
+-#define EOPNOTSUPP      95
+-#define ENOTSUP         EOPNOTSUPP
+-#define EPFNOSUPPORT    96
+-#define EAFNOSUPPORT    97
+-#define EADDRINUSE      98
+-#define EADDRNOTAVAIL   99
+-#define ENETDOWN        100
+-#define ENETUNREACH     101
+-#define ENETRESET       102
+-#define ECONNABORTED    103
+-#define ECONNRESET      104
+-#define ENOBUFS         105
+-#define EISCONN         106
+-#define ENOTCONN        107
+-#define ESHUTDOWN       108
+-#define ETOOMANYREFS    109
+-#define ETIMEDOUT       110
+-#define ECONNREFUSED    111
+-#define EHOSTDOWN       112
+-#define EHOSTUNREACH    113
+-#define EALREADY        114
+-#define EINPROGRESS     115
+-#define ESTALE          116
+-#define EUCLEAN         117
+-#define ENOTNAM         118
+-#define ENAVAIL         119
+-#define EISNAM          120
+-#define EREMOTEIO       121
+-#define EDQUOT          122
+-#define ENOMEDIUM       123
+-#define EMEDIUMTYPE     124
+-#define ECANCELED       125
+-#define ENOKEY          126
+-#define EKEYEXPIRED     127
+-#define EKEYREVOKED     128
+-#define EKEYREJECTED    129
+-#define EOWNERDEAD      130
+-#define ENOTRECOVERABLE 131
+-#define ERFKILL         132
+-#define EHWPOISON       133
+--- a/arch/x86_64/bits/mman.h
++++ b/arch/x86_64/bits/mman.h
+@@ -38,6 +38,7 @@
+ 
+ #define MCL_CURRENT     1
+ #define MCL_FUTURE      2
++#define MCL_ONFAULT     4
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+ #define MADV_NORMAL      0
+--- a/arch/x86_64/bits/sem.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-struct semid_ds {
+-	struct ipc_perm sem_perm;
+-	time_t sem_otime;
+-	time_t __unused1;
+-	time_t sem_ctime;
+-	time_t __unused2;
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
+-	unsigned short sem_nsems;
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-#else
+-	char __sem_nsems_pad[sizeof(time_t)-sizeof(short)];
+-	unsigned short sem_nsems;
+-#endif
+-	time_t __unused3;
+-	time_t __unused4;
+-};
+--- a/arch/x86_64/bits/statfs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-struct statfs {
+-	unsigned long f_type, f_bsize;
+-	fsblkcnt_t f_blocks, f_bfree, f_bavail;
+-	fsfilcnt_t f_files, f_ffree;
+-	fsid_t f_fsid;
+-	unsigned long f_namelen, f_frsize, f_flags, f_spare[4];
+-};
+--- a/arch/x86_64/bits/stdarg.h
++++ /dev/null
+@@ -1,4 +0,0 @@
+-#define va_start(v,l)   __builtin_va_start(v,l)
+-#define va_end(v)       __builtin_va_end(v)
+-#define va_arg(v,l)     __builtin_va_arg(v,l)
+-#define va_copy(d,s)    __builtin_va_copy(d,s)
+--- a/arch/x86_64/bits/syscall.h
++++ b/arch/x86_64/bits/syscall.h
+@@ -321,6 +321,9 @@
+ #define __NR_kexec_file_load			320
+ #define __NR_bpf				321
+ #define __NR_execveat				322
++#define __NR_userfaultfd			323
++#define __NR_membarrier				324
++#define __NR_mlock2				325
+ 
+ 
+ 
+@@ -649,3 +652,6 @@
+ #define SYS_kexec_file_load			320
+ #define SYS_bpf					321
+ #define SYS_execveat				322
++#define SYS_userfaultfd				323
++#define SYS_membarrier				324
++#define SYS_mlock2				325
+--- a/arch/x86_64/bits/termios.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-struct termios
+-{
+-	tcflag_t c_iflag;
+-	tcflag_t c_oflag;
+-	tcflag_t c_cflag;
+-	tcflag_t c_lflag;
+-	cc_t c_line;
+-	cc_t c_cc[NCCS];
+-	speed_t __c_ispeed;
+-	speed_t __c_ospeed;
+-};
+-
+-#define VINTR     0
+-#define VQUIT     1
+-#define VERASE    2
+-#define VKILL     3
+-#define VEOF      4
+-#define VTIME     5
+-#define VMIN      6
+-#define VSWTC     7
+-#define VSTART    8
+-#define VSTOP     9
+-#define VSUSP    10
+-#define VEOL     11
+-#define VREPRINT 12
+-#define VDISCARD 13
+-#define VWERASE  14
+-#define VLNEXT   15
+-#define VEOL2    16
+-
+-#define IGNBRK  0000001
+-#define BRKINT  0000002
+-#define IGNPAR  0000004
+-#define PARMRK  0000010
+-#define INPCK   0000020
+-#define ISTRIP  0000040
+-#define INLCR   0000100
+-#define IGNCR   0000200
+-#define ICRNL   0000400
+-#define IUCLC   0001000
+-#define IXON    0002000
+-#define IXANY   0004000
+-#define IXOFF   0010000
+-#define IMAXBEL 0020000
+-#define IUTF8   0040000
+-
+-#define OPOST  0000001
+-#define OLCUC  0000002
+-#define ONLCR  0000004
+-#define OCRNL  0000010
+-#define ONOCR  0000020
+-#define ONLRET 0000040
+-#define OFILL  0000100
+-#define OFDEL  0000200
+-#define NLDLY  0000400
+-#define NL0    0000000
+-#define NL1    0000400
+-#define CRDLY  0003000
+-#define CR0    0000000
+-#define CR1    0001000
+-#define CR2    0002000
+-#define CR3    0003000
+-#define TABDLY 0014000
+-#define TAB0   0000000
+-#define TAB1   0004000
+-#define TAB2   0010000
+-#define TAB3   0014000
+-#define BSDLY  0020000
+-#define BS0    0000000
+-#define BS1    0020000
+-#define FFDLY  0100000
+-#define FF0    0000000
+-#define FF1    0100000
+-
+-#define VTDLY  0040000
+-#define VT0    0000000
+-#define VT1    0040000
+-
+-#define B0       0000000
+-#define B50      0000001
+-#define B75      0000002
+-#define B110     0000003
+-#define B134     0000004
+-#define B150     0000005
+-#define B200     0000006
+-#define B300     0000007
+-#define B600     0000010
+-#define B1200    0000011
+-#define B1800    0000012
+-#define B2400    0000013
+-#define B4800    0000014
+-#define B9600    0000015
+-#define B19200   0000016
+-#define B38400   0000017
+-
+-#define B57600   0010001
+-#define B115200  0010002
+-#define B230400  0010003
+-#define B460800  0010004
+-#define B500000  0010005
+-#define B576000  0010006
+-#define B921600  0010007
+-#define B1000000 0010010
+-#define B1152000 0010011
+-#define B1500000 0010012
+-#define B2000000 0010013
+-#define B2500000 0010014
+-#define B3000000 0010015
+-#define B3500000 0010016
+-#define B4000000 0010017
+-
+-#define CBAUD    0010017
+-
+-#define CSIZE  0000060
+-#define CS5    0000000
+-#define CS6    0000020
+-#define CS7    0000040
+-#define CS8    0000060
+-#define CSTOPB 0000100
+-#define CREAD  0000200
+-#define PARENB 0000400
+-#define PARODD 0001000
+-#define HUPCL  0002000
+-#define CLOCAL 0004000
+-
+-#define ISIG   0000001
+-#define ICANON 0000002
+-#define ECHO   0000010
+-#define ECHOE  0000020
+-#define ECHOK  0000040
+-#define ECHONL 0000100
+-#define NOFLSH 0000200
+-#define TOSTOP 0000400
+-#define IEXTEN 0100000
+-
+-#define ECHOCTL 0001000
+-#define ECHOPRT 0002000
+-#define ECHOKE 0004000
+-#define FLUSHO 0010000
+-#define PENDIN 0040000
+-
+-#define TCOOFF 0
+-#define TCOON  1
+-#define TCIOFF 2
+-#define TCION  3
+-
+-#define TCIFLUSH  0
+-#define TCOFLUSH  1
+-#define TCIOFLUSH 2
+-
+-#define TCSANOW   0
+-#define TCSADRAIN 1
+-#define TCSAFLUSH 2
+-
+-#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+-#define CBAUDEX 0010000
+-#define CRTSCTS  020000000000
+-#define EXTPROC 0200000
+-#define XTABS  0014000
+-#endif
+--- a/arch/x86_64/pthread_arch.h
++++ b/arch/x86_64/pthread_arch.h
+@@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
+ 
+ #define TP_ADJ(p) (p)
+ 
+-#define CANCEL_REG_IP 16
++#define MC_PC gregs[REG_RIP]
+--- a/configure
++++ b/configure
+@@ -9,6 +9,9 @@ VAR=VALUE.  See below for descriptions o
+ 
+ Defaults for the options are specified in brackets.
+ 
++Configuration:
++  --srcdir=DIR            source directory [detected]
++
+ Installation directories:
+   --prefix=PREFIX         main installation prefix [/usr/local/musl]
+   --exec-prefix=EPREFIX   installation prefix for executable files [PREFIX]
+@@ -117,6 +120,7 @@ CFLAGS_TRY=
+ LDFLAGS_AUTO=
+ LDFLAGS_TRY=
+ OPTIMIZE_GLOBS=
++srcdir=
+ prefix=/usr/local/musl
+ exec_prefix='$(prefix)'
+ bindir='$(exec_prefix)/bin'
+@@ -139,6 +143,7 @@ clang_wrapper=no
+ for arg ; do
+ case "$arg" in
+ --help) usage ;;
++--srcdir=*) srcdir=${arg#*=} ;;
+ --prefix=*) prefix=${arg#*=} ;;
+ --exec-prefix=*) exec_prefix=${arg#*=} ;;
+ --bindir=*) bindir=${arg#*=} ;;
+@@ -179,11 +184,23 @@ LIBCC=*) LIBCC=${arg#*=} ;;
+ esac
+ done
+ 
+-for i in prefix exec_prefix bindir libdir includedir syslibdir ; do
++for i in srcdir prefix exec_prefix bindir libdir includedir syslibdir ; do
+ stripdir $i
+ done
+ 
+ #
++# Get the source dir for out-of-tree builds
++#
++if test -z "$srcdir" ; then
++srcdir="${0%/configure}"
++stripdir srcdir
++fi
++abs_builddir="$(pwd)" || fail "$0: cannot determine working directory"
++abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir"
++test "$abs_srcdir" = "$abs_builddir" && srcdir=.
++test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory"
++
++#
+ # Get a temp filename we can use
+ #
+ i=0
+@@ -263,11 +280,11 @@ fi
+ fi
+ 
+ if test "$gcc_wrapper" = yes ; then
+-tools="$tools tools/musl-gcc"
++tools="$tools obj/musl-gcc"
+ tool_libs="$tool_libs lib/musl-gcc.specs"
+ fi
+ if test "$clang_wrapper" = yes ; then
+-tools="$tools tools/musl-clang tools/ld.musl-clang"
++tools="$tools obj/musl-clang obj/ld.musl-clang"
+ fi
+ 
+ #
+@@ -321,7 +338,7 @@ __attribute__((__may_alias__))
+ #endif
+ x;
+ EOF
+-if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
++if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \
+   -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+ printf "no\n"
+ else
+@@ -330,6 +347,13 @@ CFLAGS_C99FSE="$CFLAGS_C99FSE -D__may_al
+ fi
+ 
+ #
++# The GNU toolchain defaults to assuming unmarked files need an
++# executable stack, potentially exposing vulnerabilities in programs
++# linked with such object files. Fix this.
++#
++tryflag CFLAGS_C99FSE -Wa,--noexecstack
++
++#
+ # Check for options to disable stack protector, which needs to be
+ # disabled for a few early-bootstrap translation units. If not found,
+ # this is not an error; we assume the toolchain does not do ssp.
+@@ -430,11 +454,15 @@ tryflag CFLAGS_AUTO -fno-unwind-tables
+ tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables
+ 
+ #
+-# The GNU toolchain defaults to assuming unmarked files need an
+-# executable stack, potentially exposing vulnerabilities in programs
+-# linked with such object files. Fix this.
++# Attempt to put each function and each data object in its own
++# section. This both allows additional size optimizations at link
++# time and works around a dangerous class of compiler/assembler bugs
++# whereby relative address expressions are constant-folded by the
++# assembler even when one or more of the symbols involved is
++# replaceable. See gas pr 18561 and gcc pr 66609, 68178, etc.
+ #
+-tryflag CFLAGS_AUTO -Wa,--noexecstack
++tryflag CFLAGS_AUTO -ffunction-sections
++tryflag CFLAGS_AUTO -fdata-sections
+ 
+ #
+ # On x86, make sure we don't have incompatible instruction set
+@@ -489,7 +517,7 @@ int foo(void) { }
+ int bar(void) { fp = foo; return foo(); }
+ EOF
+ if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \
+-  -DSHARED -fPIC -I./src/internal -include vis.h \
++  -DSHARED -fPIC -I$srcdir/src/internal -include vis.h \
+   -nostdlib -shared -Wl,-Bsymbolic-functions \
+   -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+ visibility=yes
+@@ -504,6 +532,25 @@ CFLAGS_AUTO="$CFLAGS_AUTO -include vis.h
+ CFLAGS_AUTO="${CFLAGS_AUTO# }"
+ fi
+ 
++# Determine if the compiler produces position-independent code (PIC)
++# by default. If so, we don't need to compile separate object files
++# for libc.a and libc.so.
++if trycppif __PIC__ "$CFLAGS_C99FSE $CPPFLAGS $CFLAGS" ; then
++pic_default=yes
++else
++pic_default=no
++fi
++
++# Reduce space lost to padding for alignment purposes by sorting data
++# objects according to their alignment reqirements. This approximates
++# optimal packing.
++tryldflag LDFLAGS_AUTO -Wl,--sort-section,alignment
++tryldflag LDFLAGS_AUTO -Wl,--sort-common
++
++# When linking shared library, drop dummy weak definitions that were
++# replaced by strong definitions from other translation units.
++tryldflag LDFLAGS_AUTO -Wl,--gc-sections
++
+ # Some patched GCC builds have these defaults messed up...
+ tryldflag LDFLAGS_AUTO -Wl,--hash-style=both
+ 
+@@ -513,6 +560,11 @@ tryldflag LDFLAGS_AUTO -Wl,--hash-style=
+ # runtime library; implementation error is also a possibility.
+ tryldflag LDFLAGS_AUTO -Wl,--no-undefined
+ 
++# Avoid exporting symbols from compiler runtime libraries. They
++# should be hidden anyway, but some toolchains including old gcc
++# versions built without shared library support and pcc are broken.
++tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL
++
+ test "$shared" = "no" || {
+ # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions
+ LDFLAGS_DUMMY=
+@@ -599,8 +651,9 @@ echo '#include <float.h>' > "$tmpc"
+ echo '#if LDBL_MANT_DIG == 53' >> "$tmpc"
+ echo 'typedef char ldcheck[9-(int)sizeof(long double)];' >> "$tmpc"
+ echo '#endif' >> "$tmpc"
+-if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
+-  -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
++if $CC $CFLAGS_C99FSE \
++  -I$srcdir/arch/$ARCH -I$srcdir/arch/generic -I$srcdir/include \
++  $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+ printf "yes\n"
+ else
+ printf "no\n"
+@@ -622,6 +675,7 @@ cat << EOF
+ ARCH = $ARCH
+ SUBARCH = $SUBARCH
+ ASMSUBARCH = $ASMSUBARCH
++srcdir = $srcdir
+ prefix = $prefix
+ exec_prefix = $exec_prefix
+ bindir = $bindir
+@@ -629,12 +683,14 @@ libdir = $libdir
+ includedir = $includedir
+ syslibdir = $syslibdir
+ CC = $CC
+-CFLAGS = $CFLAGS_AUTO $CFLAGS
++CFLAGS = $CFLAGS
++CFLAGS_AUTO = $CFLAGS_AUTO
+ CFLAGS_C99FSE = $CFLAGS_C99FSE
+ CFLAGS_MEMOPS = $CFLAGS_MEMOPS
+ CFLAGS_NOSSP = $CFLAGS_NOSSP
+ CPPFLAGS = $CPPFLAGS
+-LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
++LDFLAGS = $LDFLAGS
++LDFLAGS_AUTO = $LDFLAGS_AUTO
+ CROSS_COMPILE = $CROSS_COMPILE
+ LIBCC = $LIBCC
+ OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
+@@ -646,6 +702,9 @@ test "x$static" = xno && echo "STATIC_LI
+ test "x$shared" = xno && echo "SHARED_LIBS ="
+ test "x$cc_family" = xgcc && echo 'WRAPCC_GCC = $(CC)'
+ test "x$cc_family" = xclang && echo 'WRAPCC_CLANG = $(CC)'
++test "x$pic_default" = xyes && echo 'AOBJS = $(LOBJS)'
+ exec 1>&3 3>&-
+ 
++test "$srcdir" = "." || ln -sf $srcdir/Makefile .
++
+ printf "done\n"
+--- a/crt/arm/crti.s
++++ b/crt/arm/crti.s
+@@ -1,3 +1,5 @@
++.syntax unified
++
+ .section .init
+ .global _init
+ .type _init,%function
+--- a/crt/arm/crtn.s
++++ b/crt/arm/crtn.s
+@@ -1,11 +1,9 @@
++.syntax unified
++
+ .section .init
+ 	pop {r0,lr}
+-	tst lr,#1
+-	moveq pc,lr
+ 	bx lr
+ 
+ .section .fini
+ 	pop {r0,lr}
+-	tst lr,#1
+-	moveq pc,lr
+ 	bx lr
+--- a/crt/rcrt1.c
++++ b/crt/rcrt1.c
+@@ -1,7 +1,7 @@
+ #define SHARED
+ #define START "_start"
+ #define _dlstart_c _start_c
+-#include "../src/ldso/dlstart.c"
++#include "../ldso/dlstart.c"
+ 
+ int main();
+ void _init() __attribute__((weak));
+--- a/include/complex.h
++++ b/include/complex.h
+@@ -116,7 +116,7 @@ long double creall(long double complex);
+ 
+ #if __STDC_VERSION__ >= 201112L
+ #if defined(_Imaginary_I)
+-#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)))
++#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))
+ #elif defined(__clang__)
+ #define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) })
+ #else
+--- a/include/netinet/in.h
++++ b/include/netinet/in.h
+@@ -103,6 +103,7 @@ uint16_t ntohs(uint16_t);
+ #define IPPROTO_SCTP     132
+ #define IPPROTO_MH       135
+ #define IPPROTO_UDPLITE  136
++#define IPPROTO_MPLS     137
+ #define IPPROTO_RAW      255
+ #define IPPROTO_MAX      256
+ 
+@@ -200,6 +201,7 @@ uint16_t ntohs(uint16_t);
+ #define IP_MINTTL          21
+ #define IP_NODEFRAG        22
+ #define IP_CHECKSUM        23
++#define IP_BIND_ADDRESS_NO_PORT 24
+ #define IP_MULTICAST_IF    32
+ #define IP_MULTICAST_TTL   33
+ #define IP_MULTICAST_LOOP  34
+--- a/include/netinet/tcp.h
++++ b/include/netinet/tcp.h
+@@ -27,6 +27,9 @@
+ #define TCP_FASTOPEN     23
+ #define TCP_TIMESTAMP    24
+ #define TCP_NOTSENT_LOWAT 25
++#define TCP_CC_INFO      26
++#define TCP_SAVE_SYN     27
++#define TCP_SAVED_SYN    28
+ 
+ #define TCP_ESTABLISHED  1
+ #define TCP_SYN_SENT     2
+@@ -41,7 +44,20 @@
+ #define TCP_CLOSING      11
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
++#define TCPOPT_EOL              0
++#define TCPOPT_NOP              1
++#define TCPOPT_MAXSEG           2
++#define TCPOPT_WINDOW           3
++#define TCPOPT_SACK_PERMITTED   4
++#define TCPOPT_SACK             5
++#define TCPOPT_TIMESTAMP        8
++#define TCPOLEN_SACK_PERMITTED  2
++#define TCPOLEN_WINDOW          3
++#define TCPOLEN_MAXSEG          4
++#define TCPOLEN_TIMESTAMP       10
++
+ #define SOL_TCP 6
++
+ #include <sys/types.h>
+ #include <sys/socket.h>
+ #include <stdint.h>
+@@ -164,6 +180,10 @@ struct tcp_info
+ 	uint32_t tcpi_total_retrans;
+ 	uint64_t tcpi_pacing_rate;
+ 	uint64_t tcpi_max_pacing_rate;
++	uint64_t tcpi_bytes_acked;
++	uint64_t tcpi_bytes_received;
++	uint32_t tcpi_segs_out;
++	uint32_t tcpi_segs_in;
+ };
+ 
+ #define TCP_MD5SIG_MAXKEYLEN    80
+--- a/include/netpacket/packet.h
++++ b/include/netpacket/packet.h
+@@ -32,10 +32,27 @@ struct packet_mreq {
+ #define	PACKET_RECV_OUTPUT		3
+ #define	PACKET_RX_RING			5
+ #define	PACKET_STATISTICS		6
++#define PACKET_COPY_THRESH		7
++#define PACKET_AUXDATA			8
++#define PACKET_ORIGDEV			9
++#define PACKET_VERSION			10
++#define PACKET_HDRLEN			11
++#define PACKET_RESERVE			12
++#define PACKET_TX_RING			13
++#define PACKET_LOSS			14
++#define PACKET_VNET_HDR			15
++#define PACKET_TX_TIMESTAMP		16
++#define PACKET_TIMESTAMP		17
++#define PACKET_FANOUT			18
++#define PACKET_TX_HAS_OFF		19
++#define PACKET_QDISC_BYPASS		20
++#define PACKET_ROLLOVER_STATS		21
++#define PACKET_FANOUT_DATA		22
+ 
+ #define PACKET_MR_MULTICAST	0
+ #define PACKET_MR_PROMISC	1
+ #define PACKET_MR_ALLMULTI	2
++#define PACKET_MR_UNICAST	3
+ 
+ #ifdef __cplusplus
+ }
+--- a/include/signal.h
++++ b/include/signal.h
+@@ -27,8 +27,6 @@ extern "C" {
+ 
+ #include <bits/alltypes.h>
+ 
+-#define SIG_HOLD ((void (*)(int)) 2)
+-
+ #define SIG_BLOCK     0
+ #define SIG_UNBLOCK   1
+ #define SIG_SETMASK   2
+@@ -43,6 +41,18 @@ extern "C" {
+ #define SI_USER 0
+ #define SI_KERNEL 128
+ 
++typedef struct sigaltstack stack_t;
++
++#endif
++
++#include <bits/signal.h>
++
++#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
++ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
++ || defined(_BSD_SOURCE)
++
++#define SIG_HOLD ((void (*)(int)) 2)
++
+ #define FPE_INTDIV 1
+ #define FPE_INTOVF 2
+ #define FPE_FLTDIV 3
+@@ -78,15 +88,17 @@ extern "C" {
+ #define CLD_STOPPED 5
+ #define CLD_CONTINUED 6
+ 
+-typedef struct sigaltstack stack_t;
+-
+ union sigval {
+ 	int sival_int;
+ 	void *sival_ptr;
+ };
+ 
+ typedef struct {
++#ifdef __SI_SWAP_ERRNO_CODE
++	int si_signo, si_code, si_errno;
++#else
+ 	int si_signo, si_errno, si_code;
++#endif
+ 	union {
+ 		char __pad[128 - 2*sizeof(int) - sizeof(long)];
+ 		struct {
+@@ -240,8 +252,6 @@ int sigandset(sigset_t *, const sigset_t
+ #define SA_ONESHOT SA_RESETHAND
+ #endif
+ 
+-#include <bits/signal.h>
+-
+ #define SIG_ERR  ((void (*)(int))-1)
+ #define SIG_DFL  ((void (*)(int)) 0)
+ #define SIG_IGN  ((void (*)(int)) 1)
+--- a/include/sys/mman.h
++++ b/include/sys/mman.h
+@@ -39,6 +39,7 @@ int remap_file_pages (void *, size_t, in
+ #endif
+ 
+ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
++#define MLOCK_ONFAULT   0x01
+ int madvise (void *, size_t, int);
+ int mincore (void *, size_t, unsigned char *);
+ #endif
+--- a/include/sys/mount.h
++++ b/include/sys/mount.h
+@@ -46,12 +46,13 @@ extern "C" {
+ #define MS_KERNMOUNT   (1<<22)
+ #define MS_I_VERSION   (1<<23)
+ #define MS_STRICTATIME (1<<24)
++#define MS_LAZYTIME    (1<<25)
+ #define MS_NOSEC       (1<<28)
+ #define MS_BORN        (1<<29)
+ #define MS_ACTIVE      (1<<30)
+ #define MS_NOUSER      (1U<<31)
+ 
+-#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
++#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|MS_LAZYTIME)
+ 
+ #define MS_MGC_VAL 0xc0ed0000
+ #define MS_MGC_MSK 0xffff0000
+--- a/include/sys/prctl.h
++++ b/include/sys/prctl.h
+@@ -124,6 +124,12 @@ struct prctl_mm_map {
+ #define PR_FP_MODE_FR (1 << 0)
+ #define PR_FP_MODE_FRE (1 << 1)
+ 
++#define PR_CAP_AMBIENT          47
++#define PR_CAP_AMBIENT_IS_SET   1
++#define PR_CAP_AMBIENT_RAISE    2
++#define PR_CAP_AMBIENT_LOWER    3
++#define PR_CAP_AMBIENT_CLEAR_ALL 4
++
+ int prctl (int, ...);
+ 
+ #ifdef __cplusplus
+--- a/include/sys/ptrace.h
++++ b/include/sys/ptrace.h
+@@ -39,6 +39,7 @@ extern "C" {
+ #define PTRACE_PEEKSIGINFO 0x4209
+ #define PTRACE_GETSIGMASK 0x420a
+ #define PTRACE_SETSIGMASK 0x420b
++#define PTRACE_SECCOMP_GET_FILTER 0x420c
+ 
+ #define PT_READ_I PTRACE_PEEKTEXT
+ #define PT_READ_D PTRACE_PEEKDATA
+@@ -72,7 +73,8 @@ extern "C" {
+ #define PTRACE_O_TRACEEXIT      0x00000040
+ #define PTRACE_O_TRACESECCOMP   0x00000080
+ #define PTRACE_O_EXITKILL       0x00100000
+-#define PTRACE_O_MASK           0x001000ff
++#define PTRACE_O_SUSPEND_SECCOMP 0x00200000
++#define PTRACE_O_MASK           0x003000ff
+ 
+ #define PTRACE_EVENT_FORK 1
+ #define PTRACE_EVENT_VFORK 2
+--- a/include/sys/socket.h
++++ b/include/sys/socket.h
+@@ -96,6 +96,7 @@ struct linger
+ #define PF_WANPIPE      25
+ #define PF_LLC          26
+ #define PF_IB           27
++#define PF_MPLS         28
+ #define PF_CAN          29
+ #define PF_TIPC         30
+ #define PF_BLUETOOTH    31
+@@ -141,6 +142,7 @@ struct linger
+ #define AF_WANPIPE      PF_WANPIPE
+ #define AF_LLC          PF_LLC
+ #define AF_IB           PF_IB
++#define AF_MPLS         PF_MPLS
+ #define AF_CAN          PF_CAN
+ #define AF_TIPC         PF_TIPC
+ #define AF_BLUETOOTH    PF_BLUETOOTH
+@@ -255,6 +257,7 @@ struct linger
+ #define MSG_NOSIGNAL  0x4000
+ #define MSG_MORE      0x8000
+ #define MSG_WAITFORONE 0x10000
++#define MSG_FASTOPEN  0x20000000
+ #define MSG_CMSG_CLOEXEC 0x40000000
+ 
+ #define __CMSG_LEN(cmsg) (((cmsg)->cmsg_len + sizeof(long) - 1) & ~(long)(sizeof(long) - 1))
+--- /dev/null
++++ b/ldso/dlstart.c
+@@ -0,0 +1,146 @@
++#include <stddef.h>
++#include "dynlink.h"
++
++#ifndef START
++#define START "_dlstart"
++#endif
++
++#include "crt_arch.h"
++
++#ifndef GETFUNCSYM
++#define GETFUNCSYM(fp, sym, got) do { \
++	__attribute__((__visibility__("hidden"))) void sym(); \
++	static void (*static_func_ptr)() = sym; \
++	__asm__ __volatile__ ( "" : "+m"(static_func_ptr) : : "memory"); \
++	*(fp) = static_func_ptr; } while(0)
++#endif
++
++__attribute__((__visibility__("hidden")))
++void _dlstart_c(size_t *sp, size_t *dynv)
++{
++	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
++	size_t *rel, rel_size, base;
++
++	int argc = *sp;
++	char **argv = (void *)(sp+1);
++
++	for (i=argc+1; argv[i]; i++);
++	size_t *auxv = (void *)(argv+i+1);
++
++	for (i=0; i<AUX_CNT; i++) aux[i] = 0;
++	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT)
++		aux[auxv[i]] = auxv[i+1];
++
++#if DL_FDPIC
++	struct fdpic_loadseg *segs, fakeseg;
++	size_t j;
++	if (dynv) {
++		/* crt_arch.h entry point asm is responsible for reserving
++		 * space and moving the extra fdpic arguments to the stack
++		 * vector where they are easily accessible from C. */
++		segs = ((struct fdpic_loadmap *)(sp[-1] ? sp[-1] : sp[-2]))->segs;
++	} else {
++		/* If dynv is null, the entry point was started from loader
++		 * that is not fdpic-aware. We can assume normal fixed-
++		 * displacement ELF loading was performed, but when ldso was
++		 * run as a command, finding the Ehdr is a heursitic: we
++		 * have to assume Phdrs start in the first 4k of the file. */
++		base = aux[AT_BASE];
++		if (!base) base = aux[AT_PHDR] & -4096;
++		segs = &fakeseg;
++		segs[0].addr = base;
++		segs[0].p_vaddr = 0;
++		segs[0].p_memsz = -1;
++		Ehdr *eh = (void *)base;
++		Phdr *ph = (void *)(base + eh->e_phoff);
++		size_t phnum = eh->e_phnum;
++		size_t phent = eh->e_phentsize;
++		while (phnum-- && ph->p_type != PT_DYNAMIC)
++			ph = (void *)((size_t)ph + phent);
++		dynv = (void *)(base + ph->p_vaddr);
++	}
++#endif
++
++	for (i=0; i<DYN_CNT; i++) dyn[i] = 0;
++	for (i=0; dynv[i]; i+=2) if (dynv[i]<DYN_CNT)
++		dyn[dynv[i]] = dynv[i+1];
++
++#if DL_FDPIC
++	for (i=0; i<DYN_CNT; i++) {
++		if (i==DT_RELASZ || i==DT_RELSZ) continue;
++		if (!dyn[i]) continue;
++		for (j=0; dyn[i]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
++		dyn[i] += segs[j].addr - segs[j].p_vaddr;
++	}
++	base = 0;
++
++	const Sym *syms = (void *)dyn[DT_SYMTAB];
++
++	rel = (void *)dyn[DT_RELA];
++	rel_size = dyn[DT_RELASZ];
++	for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
++		if (!IS_RELATIVE(rel[1], syms)) continue;
++		for (j=0; rel[0]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
++		size_t *rel_addr = (void *)
++			(rel[0] + segs[j].addr - segs[j].p_vaddr);
++		if (R_TYPE(rel[1]) == REL_FUNCDESC_VAL) {
++			*rel_addr += segs[rel_addr[1]].addr
++				- segs[rel_addr[1]].p_vaddr
++				+ syms[R_SYM(rel[1])].st_value;
++			rel_addr[1] = dyn[DT_PLTGOT];
++		} else {
++			size_t val = syms[R_SYM(rel[1])].st_value;
++			for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
++			*rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
++		}
++	}
++#else
++	/* If the dynamic linker is invoked as a command, its load
++	 * address is not available in the aux vector. Instead, compute
++	 * the load address as the difference between &_DYNAMIC and the
++	 * virtual address in the PT_DYNAMIC program header. */
++	base = aux[AT_BASE];
++	if (!base) {
++		size_t phnum = aux[AT_PHNUM];
++		size_t phentsize = aux[AT_PHENT];
++		Phdr *ph = (void *)aux[AT_PHDR];
++		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
++			if (ph->p_type == PT_DYNAMIC) {
++				base = (size_t)dynv - ph->p_vaddr;
++				break;
++			}
++		}
++	}
++
++	/* MIPS uses an ugly packed form for GOT relocations. Since we
++	 * can't make function calls yet and the code is tiny anyway,
++	 * it's simply inlined here. */
++	if (NEED_MIPS_GOT_RELOCS) {
++		size_t local_cnt = 0;
++		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
++		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
++			local_cnt = dynv[i+1];
++		for (i=0; i<local_cnt; i++) got[i] += base;
++	}
++
++	rel = (void *)(base+dyn[DT_REL]);
++	rel_size = dyn[DT_RELSZ];
++	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
++		if (!IS_RELATIVE(rel[1], 0)) continue;
++		size_t *rel_addr = (void *)(base + rel[0]);
++		*rel_addr += base;
++	}
++
++	rel = (void *)(base+dyn[DT_RELA]);
++	rel_size = dyn[DT_RELASZ];
++	for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
++		if (!IS_RELATIVE(rel[1], 0)) continue;
++		size_t *rel_addr = (void *)(base + rel[0]);
++		*rel_addr = base + rel[2];
++	}
++#endif
++
++	stage2_func dls2;
++	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
++	dls2((void *)base, sp);
++}
+--- /dev/null
++++ b/ldso/dynlink.c
+@@ -0,0 +1,1931 @@
++#define _GNU_SOURCE
++#include <stdio.h>
++#include <stdlib.h>
++#include <stdarg.h>
++#include <stddef.h>
++#include <string.h>
++#include <unistd.h>
++#include <stdint.h>
++#include <elf.h>
++#include <sys/mman.h>
++#include <limits.h>
++#include <fcntl.h>
++#include <sys/stat.h>
++#include <errno.h>
++#include <link.h>
++#include <setjmp.h>
++#include <pthread.h>
++#include <ctype.h>
++#include <dlfcn.h>
++#include "pthread_impl.h"
++#include "libc.h"
++#include "dynlink.h"
++
++static void error(const char *, ...);
++
++#define MAXP2(a,b) (-(-(a)&-(b)))
++#define ALIGN(x,y) ((x)+(y)-1 & -(y))
++
++struct debug {
++	int ver;
++	void *head;
++	void (*bp)(void);
++	int state;
++	void *base;
++};
++
++struct td_index {
++	size_t args[2];
++	struct td_index *next;
++};
++
++struct dso {
++#if DL_FDPIC
++	struct fdpic_loadmap *loadmap;
++#else
++	unsigned char *base;
++#endif
++	char *name;
++	size_t *dynv;
++	struct dso *next, *prev;
++
++	Phdr *phdr;
++	int phnum;
++	size_t phentsize;
++	int refcnt;
++	Sym *syms;
++	uint32_t *hashtab;
++	uint32_t *ghashtab;
++	int16_t *versym;
++	char *strings;
++	unsigned char *map;
++	size_t map_len;
++	dev_t dev;
++	ino_t ino;
++	signed char global;
++	char relocated;
++	char constructed;
++	char kernel_mapped;
++	struct dso **deps, *needed_by;
++	char *rpath_orig, *rpath;
++	struct tls_module tls;
++	size_t tls_id;
++	size_t relro_start, relro_end;
++	void **new_dtv;
++	unsigned char *new_tls;
++	volatile int new_dtv_idx, new_tls_idx;
++	struct td_index *td_index;
++	struct dso *fini_next;
++	char *shortname;
++#if DL_FDPIC
++	unsigned char *base;
++#else
++	struct fdpic_loadmap *loadmap;
++#endif
++	struct funcdesc {
++		void *addr;
++		size_t *got;
++	} *funcdescs;
++	size_t *got;
++	char buf[];
++};
++
++struct symdef {
++	Sym *sym;
++	struct dso *dso;
++};
++
++int __init_tp(void *);
++void __init_libc(char **, char *);
++void *__copy_tls(unsigned char *);
++
++__attribute__((__visibility__("hidden")))
++const char *__libc_get_version(void);
++
++static struct builtin_tls {
++	char c;
++	struct pthread pt;
++	void *space[16];
++} builtin_tls[1];
++#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
++
++#define ADDEND_LIMIT 4096
++static size_t *saved_addends, *apply_addends_to;
++
++static struct dso ldso;
++static struct dso *head, *tail, *fini_head;
++static char *env_path, *sys_path;
++static unsigned long long gencnt;
++static int runtime;
++static int ldd_mode;
++static int ldso_fail;
++static int noload;
++static jmp_buf *rtld_fail;
++static pthread_rwlock_t lock;
++static struct debug debug;
++static struct tls_module *tls_tail;
++static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
++static size_t static_tls_cnt;
++static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
++static struct fdpic_loadmap *app_loadmap;
++static struct fdpic_dummy_loadmap app_dummy_loadmap;
++
++struct debug *_dl_debug_addr = &debug;
++
++__attribute__((__visibility__("hidden")))
++void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
++
++__attribute__((__visibility__("hidden")))
++extern void (*const __init_array_end)(void), (*const __fini_array_end)(void);
++
++weak_alias(__init_array_start, __init_array_end);
++weak_alias(__fini_array_start, __fini_array_end);
++
++static int dl_strcmp(const char *l, const char *r)
++{
++	for (; *l==*r && *l; l++, r++);
++	return *(unsigned char *)l - *(unsigned char *)r;
++}
++#define strcmp(l,r) dl_strcmp(l,r)
++
++/* Compute load address for a virtual address in a given dso. */
++#if DL_FDPIC
++static void *laddr(const struct dso *p, size_t v)
++{
++	size_t j=0;
++	if (!p->loadmap) return p->base + v;
++	for (j=0; v-p->loadmap->segs[j].p_vaddr >= p->loadmap->segs[j].p_memsz; j++);
++	return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
++}
++#define fpaddr(p, v) ((void (*)())&(struct funcdesc){ \
++	laddr(p, v), (p)->got })
++#else
++#define laddr(p, v) (void *)((p)->base + (v))
++#define fpaddr(p, v) ((void (*)())laddr(p, v))
++#endif
++
++static void decode_vec(size_t *v, size_t *a, size_t cnt)
++{
++	size_t i;
++	for (i=0; i<cnt; i++) a[i] = 0;
++	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
++		a[0] |= 1UL<<v[0];
++		a[v[0]] = v[1];
++	}
++}
++
++static int search_vec(size_t *v, size_t *r, size_t key)
++{
++	for (; v[0]!=key; v+=2)
++		if (!v[0]) return 0;
++	*r = v[1];
++	return 1;
++}
++
++static uint32_t sysv_hash(const char *s0)
++{
++	const unsigned char *s = (void *)s0;
++	uint_fast32_t h = 0;
++	while (*s) {
++		h = 16*h + *s++;
++		h ^= h>>24 & 0xf0;
++	}
++	return h & 0xfffffff;
++}
++
++static uint32_t gnu_hash(const char *s0)
++{
++	const unsigned char *s = (void *)s0;
++	uint_fast32_t h = 5381;
++	for (; *s; s++)
++		h += h*32 + *s;
++	return h;
++}
++
++static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
++{
++	size_t i;
++	Sym *syms = dso->syms;
++	uint32_t *hashtab = dso->hashtab;
++	char *strings = dso->strings;
++	for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
++		if ((!dso->versym || dso->versym[i] >= 0)
++		    && (!strcmp(s, strings+syms[i].st_name)))
++			return syms+i;
++	}
++	return 0;
++}
++
++static Sym *gnu_lookup(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s)
++{
++	uint32_t nbuckets = hashtab[0];
++	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
++	uint32_t i = buckets[h1 % nbuckets];
++
++	if (!i) return 0;
++
++	uint32_t *hashval = buckets + nbuckets + (i - hashtab[1]);
++
++	for (h1 |= 1; ; i++) {
++		uint32_t h2 = *hashval++;
++		if ((h1 == (h2|1)) && (!dso->versym || dso->versym[i] >= 0)
++		    && !strcmp(s, dso->strings + dso->syms[i].st_name))
++			return dso->syms+i;
++		if (h2 & 1) break;
++	}
++
++	return 0;
++}
++
++static Sym *gnu_lookup_filtered(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s, uint32_t fofs, size_t fmask)
++{
++	const size_t *bloomwords = (const void *)(hashtab+4);
++	size_t f = bloomwords[fofs & (hashtab[2]-1)];
++	if (!(f & fmask)) return 0;
++
++	f >>= (h1 >> hashtab[3]) % (8 * sizeof f);
++	if (!(f & 1)) return 0;
++
++	return gnu_lookup(h1, hashtab, dso, s);
++}
++
++#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
++#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
++
++#ifndef ARCH_SYM_REJECT_UND
++#define ARCH_SYM_REJECT_UND(s) 0
++#endif
++
++static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
++{
++	uint32_t h = 0, gh, gho, *ght;
++	size_t ghm = 0;
++	struct symdef def = {0};
++	for (; dso; dso=dso->next) {
++		Sym *sym;
++		if (!dso->global) continue;
++		if ((ght = dso->ghashtab)) {
++			if (!ghm) {
++				gh = gnu_hash(s);
++				int maskbits = 8 * sizeof ghm;
++				gho = gh / maskbits;
++				ghm = 1ul << gh % maskbits;
++			}
++			sym = gnu_lookup_filtered(gh, ght, dso, s, gho, ghm);
++		} else {
++			if (!h) h = sysv_hash(s);
++			sym = sysv_lookup(s, h, dso);
++		}
++		if (!sym) continue;
++		if (!sym->st_shndx)
++			if (need_def || (sym->st_info&0xf) == STT_TLS
++			    || ARCH_SYM_REJECT_UND(sym))
++				continue;
++		if (!sym->st_value)
++			if ((sym->st_info&0xf) != STT_TLS)
++				continue;
++		if (!(1<<(sym->st_info&0xf) & OK_TYPES)) continue;
++		if (!(1<<(sym->st_info>>4) & OK_BINDS)) continue;
++
++		if (def.sym && sym->st_info>>4 == STB_WEAK) continue;
++		def.sym = sym;
++		def.dso = dso;
++		if (sym->st_info>>4 == STB_GLOBAL) break;
++	}
++	return def;
++}
++
++__attribute__((__visibility__("hidden")))
++ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
++
++static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
++{
++	unsigned char *base = dso->base;
++	Sym *syms = dso->syms;
++	char *strings = dso->strings;
++	Sym *sym;
++	const char *name;
++	void *ctx;
++	int type;
++	int sym_index;
++	struct symdef def;
++	size_t *reloc_addr;
++	size_t sym_val;
++	size_t tls_val;
++	size_t addend;
++	int skip_relative = 0, reuse_addends = 0, save_slot = 0;
++
++	if (dso == &ldso) {
++		/* Only ldso's REL table needs addend saving/reuse. */
++		if (rel == apply_addends_to)
++			reuse_addends = 1;
++		skip_relative = 1;
++	}
++
++	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
++		if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
++		type = R_TYPE(rel[1]);
++		if (type == REL_NONE) continue;
++		sym_index = R_SYM(rel[1]);
++		reloc_addr = laddr(dso, rel[0]);
++		if (sym_index) {
++			sym = syms + sym_index;
++			name = strings + sym->st_name;
++			ctx = type==REL_COPY ? head->next : head;
++			def = (sym->st_info&0xf) == STT_SECTION
++				? (struct symdef){ .dso = dso, .sym = sym }
++				: find_sym(ctx, name, type==REL_PLT);
++			if (!def.sym && (sym->st_shndx != SHN_UNDEF
++			    || sym->st_info>>4 != STB_WEAK)) {
++				error("Error relocating %s: %s: symbol not found",
++					dso->name, name);
++				if (runtime) longjmp(*rtld_fail, 1);
++				continue;
++			}
++		} else {
++			sym = 0;
++			def.sym = 0;
++			def.dso = dso;
++		}
++
++		if (stride > 2) {
++			addend = rel[2];
++		} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
++			addend = 0;
++		} else if (reuse_addends) {
++			/* Save original addend in stage 2 where the dso
++			 * chain consists of just ldso; otherwise read back
++			 * saved addend since the inline one was clobbered. */
++			if (head==&ldso)
++				saved_addends[save_slot] = *reloc_addr;
++			addend = saved_addends[save_slot++];
++		} else {
++			addend = *reloc_addr;
++		}
++
++		sym_val = def.sym ? (size_t)laddr(def.dso, def.sym->st_value) : 0;
++		tls_val = def.sym ? def.sym->st_value : 0;
++
++		switch(type) {
++		case REL_NONE:
++			break;
++		case REL_OFFSET:
++			addend -= (size_t)reloc_addr;
++		case REL_SYMBOLIC:
++		case REL_GOT:
++		case REL_PLT:
++			*reloc_addr = sym_val + addend;
++			break;
++		case REL_RELATIVE:
++			*reloc_addr = (size_t)base + addend;
++			break;
++		case REL_SYM_OR_REL:
++			if (sym) *reloc_addr = sym_val + addend;
++			else *reloc_addr = (size_t)base + addend;
++			break;
++		case REL_COPY:
++			memcpy(reloc_addr, (void *)sym_val, sym->st_size);
++			break;
++		case REL_OFFSET32:
++			*(uint32_t *)reloc_addr = sym_val + addend
++				- (size_t)reloc_addr;
++			break;
++		case REL_FUNCDESC:
++			*reloc_addr = def.sym ? (size_t)(def.dso->funcdescs
++				+ (def.sym - def.dso->syms)) : 0;
++			break;
++		case REL_FUNCDESC_VAL:
++			if ((sym->st_info&0xf) == STT_SECTION) *reloc_addr += sym_val;
++			else *reloc_addr = sym_val;
++			reloc_addr[1] = def.sym ? (size_t)def.dso->got : 0;
++			break;
++		case REL_DTPMOD:
++			*reloc_addr = def.dso->tls_id;
++			break;
++		case REL_DTPOFF:
++			*reloc_addr = tls_val + addend - DTP_OFFSET;
++			break;
++#ifdef TLS_ABOVE_TP
++		case REL_TPOFF:
++			*reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
++			break;
++#else
++		case REL_TPOFF:
++			*reloc_addr = tls_val - def.dso->tls.offset + addend;
++			break;
++		case REL_TPOFF_NEG:
++			*reloc_addr = def.dso->tls.offset - tls_val + addend;
++			break;
++#endif
++		case REL_TLSDESC:
++			if (stride<3) addend = reloc_addr[1];
++			if (runtime && def.dso->tls_id >= static_tls_cnt) {
++				struct td_index *new = malloc(sizeof *new);
++				if (!new) {
++					error(
++					"Error relocating %s: cannot allocate TLSDESC for %s",
++					dso->name, sym ? name : "(local)" );
++					longjmp(*rtld_fail, 1);
++				}
++				new->next = dso->td_index;
++				dso->td_index = new;
++				new->args[0] = def.dso->tls_id;
++				new->args[1] = tls_val + addend;
++				reloc_addr[0] = (size_t)__tlsdesc_dynamic;
++				reloc_addr[1] = (size_t)new;
++			} else {
++				reloc_addr[0] = (size_t)__tlsdesc_static;
++#ifdef TLS_ABOVE_TP
++				reloc_addr[1] = tls_val + def.dso->tls.offset
++					+ TPOFF_K + addend;
++#else
++				reloc_addr[1] = tls_val - def.dso->tls.offset
++					+ addend;
++#endif
++			}
++			break;
++		default:
++			error("Error relocating %s: unsupported relocation type %d",
++				dso->name, type);
++			if (runtime) longjmp(*rtld_fail, 1);
++			continue;
++		}
++	}
++}
++
++/* A huge hack: to make up for the wastefulness of shared libraries
++ * needing at least a page of dirty memory even if they have no global
++ * data, we reclaim the gaps at the beginning and end of writable maps
++ * and "donate" them to the heap by setting up minimal malloc
++ * structures and then freeing them. */
++
++static void reclaim(struct dso *dso, size_t start, size_t end)
++{
++	size_t *a, *z;
++	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
++	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
++	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
++	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
++	if (start>end || end-start < 4*sizeof(size_t)) return;
++	a = laddr(dso, start);
++	z = laddr(dso, end);
++	a[-2] = 1;
++	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
++	z[1] = 1;
++	free(a);
++}
++
++static void reclaim_gaps(struct dso *dso)
++{
++	Phdr *ph = dso->phdr;
++	size_t phcnt = dso->phnum;
++
++	if (DL_FDPIC) return; // FIXME
++	for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
++		if (ph->p_type!=PT_LOAD) continue;
++		if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
++		reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
++		reclaim(dso, ph->p_vaddr+ph->p_memsz,
++			ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
++	}
++}
++
++static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
++{
++	static int no_map_fixed;
++	char *q;
++	if (!no_map_fixed) {
++		q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
++		if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
++			return q;
++		no_map_fixed = 1;
++	}
++	/* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
++	if (flags & MAP_ANONYMOUS) {
++		memset(p, 0, n);
++		return p;
++	}
++	ssize_t r;
++	if (lseek(fd, off, SEEK_SET) < 0) return MAP_FAILED;
++	for (q=p; n; q+=r, off+=r, n-=r) {
++		r = read(fd, q, n);
++		if (r < 0 && errno != EINTR) return MAP_FAILED;
++		if (!r) {
++			memset(q, 0, n);
++			break;
++		}
++	}
++	return p;
++}
++
++static void unmap_library(struct dso *dso)
++{
++	if (dso->loadmap) {
++		size_t i;
++		for (i=0; i<dso->loadmap->nsegs; i++) {
++			if (!dso->loadmap->segs[i].p_memsz)
++				continue;
++			munmap((void *)dso->loadmap->segs[i].addr,
++				dso->loadmap->segs[i].p_memsz);
++		}
++		free(dso->loadmap);
++	} else if (dso->map && dso->map_len) {
++		munmap(dso->map, dso->map_len);
++	}
++}
++
++static void *map_library(int fd, struct dso *dso)
++{
++	Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
++	void *allocated_buf=0;
++	size_t phsize;
++	size_t addr_min=SIZE_MAX, addr_max=0, map_len;
++	size_t this_min, this_max;
++	size_t nsegs = 0;
++	off_t off_start;
++	Ehdr *eh;
++	Phdr *ph, *ph0;
++	unsigned prot;
++	unsigned char *map=MAP_FAILED, *base;
++	size_t dyn=0;
++	size_t tls_image=0;
++	size_t i;
++
++	ssize_t l = read(fd, buf, sizeof buf);
++	eh = buf;
++	if (l<0) return 0;
++	if (l<sizeof *eh || (eh->e_type != ET_DYN && eh->e_type != ET_EXEC))
++		goto noexec;
++	phsize = eh->e_phentsize * eh->e_phnum;
++	if (phsize > sizeof buf - sizeof *eh) {
++		allocated_buf = malloc(phsize);
++		if (!allocated_buf) return 0;
++		l = pread(fd, allocated_buf, phsize, eh->e_phoff);
++		if (l < 0) goto error;
++		if (l != phsize) goto noexec;
++		ph = ph0 = allocated_buf;
++	} else if (eh->e_phoff + phsize > l) {
++		l = pread(fd, buf+1, phsize, eh->e_phoff);
++		if (l < 0) goto error;
++		if (l != phsize) goto noexec;
++		ph = ph0 = (void *)(buf + 1);
++	} else {
++		ph = ph0 = (void *)((char *)buf + eh->e_phoff);
++	}
++	for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
++		if (ph->p_type == PT_DYNAMIC) {
++			dyn = ph->p_vaddr;
++		} else if (ph->p_type == PT_TLS) {
++			tls_image = ph->p_vaddr;
++			dso->tls.align = ph->p_align;
++			dso->tls.len = ph->p_filesz;
++			dso->tls.size = ph->p_memsz;
++		} else if (ph->p_type == PT_GNU_RELRO) {
++			dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
++			dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
++		}
++		if (ph->p_type != PT_LOAD) continue;
++		nsegs++;
++		if (ph->p_vaddr < addr_min) {
++			addr_min = ph->p_vaddr;
++			off_start = ph->p_offset;
++			prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
++				((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
++				((ph->p_flags&PF_X) ? PROT_EXEC : 0));
++		}
++		if (ph->p_vaddr+ph->p_memsz > addr_max) {
++			addr_max = ph->p_vaddr+ph->p_memsz;
++		}
++	}
++	if (!dyn) goto noexec;
++	if (DL_FDPIC && !(eh->e_flags & FDPIC_CONSTDISP_FLAG)) {
++		dso->loadmap = calloc(1, sizeof *dso->loadmap
++			+ nsegs * sizeof *dso->loadmap->segs);
++		if (!dso->loadmap) goto error;
++		dso->loadmap->nsegs = nsegs;
++		for (ph=ph0, i=0; i<nsegs; ph=(void *)((char *)ph+eh->e_phentsize)) {
++			if (ph->p_type != PT_LOAD) continue;
++			prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
++				((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
++				((ph->p_flags&PF_X) ? PROT_EXEC : 0));
++			map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
++				prot, MAP_PRIVATE,
++				fd, ph->p_offset & -PAGE_SIZE);
++			if (map == MAP_FAILED) {
++				unmap_library(dso);
++				goto error;
++			}
++			dso->loadmap->segs[i].addr = (size_t)map +
++				(ph->p_vaddr & PAGE_SIZE-1);
++			dso->loadmap->segs[i].p_vaddr = ph->p_vaddr;
++			dso->loadmap->segs[i].p_memsz = ph->p_memsz;
++			i++;
++			if (prot & PROT_WRITE) {
++				size_t brk = (ph->p_vaddr & PAGE_SIZE-1)
++					+ ph->p_filesz;
++				size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE;
++				size_t pgend = brk + ph->p_memsz - ph->p_filesz
++					+ PAGE_SIZE-1 & -PAGE_SIZE;
++				if (pgend > pgbrk && mmap_fixed(map+pgbrk,
++					pgend-pgbrk, prot,
++					MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
++					-1, off_start) == MAP_FAILED)
++					goto error;
++				memset(map + brk, 0, pgbrk-brk);
++			}
++		}
++		map = (void *)dso->loadmap->segs[0].addr;
++		map_len = 0;
++		goto done_mapping;
++	}
++	addr_max += PAGE_SIZE-1;
++	addr_max &= -PAGE_SIZE;
++	addr_min &= -PAGE_SIZE;
++	off_start &= -PAGE_SIZE;
++	map_len = addr_max - addr_min + off_start;
++	/* The first time, we map too much, possibly even more than
++	 * the length of the file. This is okay because we will not
++	 * use the invalid part; we just need to reserve the right
++	 * amount of virtual address space to map over later. */
++	map = DL_NOMMU_SUPPORT
++		? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC,
++			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
++		: mmap((void *)addr_min, map_len, prot,
++			MAP_PRIVATE, fd, off_start);
++	if (map==MAP_FAILED) goto error;
++	dso->map = map;
++	dso->map_len = map_len;
++	/* If the loaded file is not relocatable and the requested address is
++	 * not available, then the load operation must fail. */
++	if (eh->e_type != ET_DYN && addr_min && map!=(void *)addr_min) {
++		errno = EBUSY;
++		goto error;
++	}
++	base = map - addr_min;
++	dso->phdr = 0;
++	dso->phnum = 0;
++	for (ph=ph0, i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
++		if (ph->p_type != PT_LOAD) continue;
++		/* Check if the programs headers are in this load segment, and
++		 * if so, record the address for use by dl_iterate_phdr. */
++		if (!dso->phdr && eh->e_phoff >= ph->p_offset
++		    && eh->e_phoff+phsize <= ph->p_offset+ph->p_filesz) {
++			dso->phdr = (void *)(base + ph->p_vaddr
++				+ (eh->e_phoff-ph->p_offset));
++			dso->phnum = eh->e_phnum;
++			dso->phentsize = eh->e_phentsize;
++		}
++		/* Reuse the existing mapping for the lowest-address LOAD */
++		if ((ph->p_vaddr & -PAGE_SIZE) == addr_min && !DL_NOMMU_SUPPORT)
++			continue;
++		this_min = ph->p_vaddr & -PAGE_SIZE;
++		this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE;
++		off_start = ph->p_offset & -PAGE_SIZE;
++		prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
++			((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
++			((ph->p_flags&PF_X) ? PROT_EXEC : 0));
++		if (mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
++			goto error;
++		if (ph->p_memsz > ph->p_filesz) {
++			size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
++			size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
++			memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
++			if (pgbrk-(size_t)base < this_max && mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
++				goto error;
++		}
++	}
++	for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
++		if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
++			if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
++			    && errno != ENOSYS)
++				goto error;
++			break;
++		}
++done_mapping:
++	dso->base = base;
++	dso->dynv = laddr(dso, dyn);
++	if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
++	if (!runtime) reclaim_gaps(dso);
++	free(allocated_buf);
++	return map;
++noexec:
++	errno = ENOEXEC;
++error:
++	if (map!=MAP_FAILED) unmap_library(dso);
++	free(allocated_buf);
++	return 0;
++}
++
++static int path_open(const char *name, const char *s, char *buf, size_t buf_size)
++{
++	size_t l;
++	int fd;
++	for (;;) {
++		s += strspn(s, ":\n");
++		l = strcspn(s, ":\n");
++		if (l-1 >= INT_MAX) return -1;
++		if (snprintf(buf, buf_size, "%.*s/%s", (int)l, s, name) < buf_size) {
++			if ((fd = open(buf, O_RDONLY|O_CLOEXEC))>=0) return fd;
++			switch (errno) {
++			case ENOENT:
++			case ENOTDIR:
++			case EACCES:
++			case ENAMETOOLONG:
++				break;
++			default:
++				/* Any negative value but -1 will inhibit
++				 * futher path search. */
++				return -2;
++			}
++		}
++		s += l;
++	}
++}
++
++static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
++{
++	size_t n, l;
++	const char *s, *t, *origin;
++	char *d;
++	if (p->rpath || !p->rpath_orig) return 0;
++	if (!strchr(p->rpath_orig, '$')) {
++		p->rpath = p->rpath_orig;
++		return 0;
++	}
++	n = 0;
++	s = p->rpath_orig;
++	while ((t=strchr(s, '$'))) {
++		if (strncmp(t, "$ORIGIN", 7) && strncmp(t, "${ORIGIN}", 9))
++			return 0;
++		s = t+1;
++		n++;
++	}
++	if (n > SSIZE_MAX/PATH_MAX) return 0;
++
++	if (p->kernel_mapped) {
++		/* $ORIGIN searches cannot be performed for the main program
++		 * when it is suid/sgid/AT_SECURE. This is because the
++		 * pathname is under the control of the caller of execve.
++		 * For libraries, however, $ORIGIN can be processed safely
++		 * since the library's pathname came from a trusted source
++		 * (either system paths or a call to dlopen). */
++		if (libc.secure)
++			return 0;
++		l = readlink("/proc/self/exe", buf, buf_size);
++		if (l == -1) switch (errno) {
++		case ENOENT:
++		case ENOTDIR:
++		case EACCES:
++			break;
++		default:
++			return -1;
++		}
++		if (l >= buf_size)
++			return 0;
++		buf[l] = 0;
++		origin = buf;
++	} else {
++		origin = p->name;
++	}
++	t = strrchr(origin, '/');
++	l = t ? t-origin : 0;
++	p->rpath = malloc(strlen(p->rpath_orig) + n*l + 1);
++	if (!p->rpath) return -1;
++
++	d = p->rpath;
++	s = p->rpath_orig;
++	while ((t=strchr(s, '$'))) {
++		memcpy(d, s, t-s);
++		d += t-s;
++		memcpy(d, origin, l);
++		d += l;
++		/* It was determined previously that the '$' is followed
++		 * either by "ORIGIN" or "{ORIGIN}". */
++		s = t + 7 + 2*(t[1]=='{');
++	}
++	strcpy(d, s);
++	return 0;
++}
++
++static void decode_dyn(struct dso *p)
++{
++	size_t dyn[DYN_CNT];
++	decode_vec(p->dynv, dyn, DYN_CNT);
++	p->syms = laddr(p, dyn[DT_SYMTAB]);
++	p->strings = laddr(p, dyn[DT_STRTAB]);
++	if (dyn[0]&(1<<DT_HASH))
++		p->hashtab = laddr(p, dyn[DT_HASH]);
++	if (dyn[0]&(1<<DT_RPATH))
++		p->rpath_orig = p->strings + dyn[DT_RPATH];
++	if (dyn[0]&(1<<DT_RUNPATH))
++		p->rpath_orig = p->strings + dyn[DT_RUNPATH];
++	if (dyn[0]&(1<<DT_PLTGOT))
++		p->got = laddr(p, dyn[DT_PLTGOT]);
++	if (search_vec(p->dynv, dyn, DT_GNU_HASH))
++		p->ghashtab = laddr(p, *dyn);
++	if (search_vec(p->dynv, dyn, DT_VERSYM))
++		p->versym = laddr(p, *dyn);
++}
++
++static size_t count_syms(struct dso *p)
++{
++	if (p->hashtab) return p->hashtab[1];
++
++	size_t nsym, i;
++	uint32_t *buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
++	uint32_t *hashval;
++	for (i = nsym = 0; i < p->ghashtab[0]; i++) {
++		if (buckets[i] > nsym)
++			nsym = buckets[i];
++	}
++	if (nsym) {
++		hashval = buckets + p->ghashtab[0] + (nsym - p->ghashtab[1]);
++		do nsym++;
++		while (!(*hashval++ & 1));
++	}
++	return nsym;
++}
++
++static void *dl_mmap(size_t n)
++{
++	void *p;
++	int prot = PROT_READ|PROT_WRITE, flags = MAP_ANONYMOUS|MAP_PRIVATE;
++#ifdef SYS_mmap2
++	p = (void *)__syscall(SYS_mmap2, 0, n, prot, flags, -1, 0);
++#else
++	p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
++#endif
++	return p == MAP_FAILED ? 0 : p;
++}
++
++static void makefuncdescs(struct dso *p)
++{
++	static int self_done;
++	size_t nsym = count_syms(p);
++	size_t i, size = nsym * sizeof(*p->funcdescs);
++
++	if (!self_done) {
++		p->funcdescs = dl_mmap(size);
++		self_done = 1;
++	} else {
++		p->funcdescs = malloc(size);
++	}
++	if (!p->funcdescs) {
++		if (!runtime) a_crash();
++		error("Error allocating function descriptors for %s", p->name);
++		longjmp(*rtld_fail, 1);
++	}
++	for (i=0; i<nsym; i++) {
++		if ((p->syms[i].st_info&0xf)==STT_FUNC && p->syms[i].st_shndx) {
++			p->funcdescs[i].addr = laddr(p, p->syms[i].st_value);
++			p->funcdescs[i].got = p->got;
++		} else {
++			p->funcdescs[i].addr = 0;
++			p->funcdescs[i].got = 0;
++		}
++	}
++}
++
++static struct dso *load_library(const char *name, struct dso *needed_by)
++{
++	char buf[2*NAME_MAX+2];
++	const char *pathname;
++	unsigned char *map;
++	struct dso *p, temp_dso = {0};
++	int fd;
++	struct stat st;
++	size_t alloc_size;
++	int n_th = 0;
++	int is_self = 0;
++
++	if (!*name) {
++		errno = EINVAL;
++		return 0;
++	}
++
++	/* Catch and block attempts to reload the implementation itself */
++	if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
++		static const char *rp, reserved[] =
++			"c\0pthread\0rt\0m\0dl\0util\0xnet\0";
++		char *z = strchr(name, '.');
++		if (z) {
++			size_t l = z-name;
++			for (rp=reserved; *rp && strncmp(name+3, rp, l-3); rp+=strlen(rp)+1);
++			if (*rp) {
++				if (ldd_mode) {
++					/* Track which names have been resolved
++					 * and only report each one once. */
++					static unsigned reported;
++					unsigned mask = 1U<<(rp-reserved);
++					if (!(reported & mask)) {
++						reported |= mask;
++						dprintf(1, "\t%s => %s (%p)\n",
++							name, ldso.name,
++							ldso.base);
++					}
++				}
++				is_self = 1;
++			}
++		}
++	}
++	if (!strcmp(name, ldso.name)) is_self = 1;
++	if (is_self) {
++		if (!ldso.prev) {
++			tail->next = &ldso;
++			ldso.prev = tail;
++			tail = ldso.next ? ldso.next : &ldso;
++		}
++		return &ldso;
++	}
++	if (strchr(name, '/')) {
++		pathname = name;
++		fd = open(name, O_RDONLY|O_CLOEXEC);
++	} else {
++		/* Search for the name to see if it's already loaded */
++		for (p=head->next; p; p=p->next) {
++			if (p->shortname && !strcmp(p->shortname, name)) {
++				p->refcnt++;
++				return p;
++			}
++		}
++		if (strlen(name) > NAME_MAX) return 0;
++		fd = -1;
++		if (env_path) fd = path_open(name, env_path, buf, sizeof buf);
++		for (p=needed_by; fd == -1 && p; p=p->needed_by) {
++			if (fixup_rpath(p, buf, sizeof buf) < 0)
++				fd = -2; /* Inhibit further search. */
++			if (p->rpath)
++				fd = path_open(name, p->rpath, buf, sizeof buf);
++		}
++		if (fd == -1) {
++			if (!sys_path) {
++				char *prefix = 0;
++				size_t prefix_len;
++				if (ldso.name[0]=='/') {
++					char *s, *t, *z;
++					for (s=t=z=ldso.name; *s; s++)
++						if (*s=='/') z=t, t=s;
++					prefix_len = z-ldso.name;
++					if (prefix_len < PATH_MAX)
++						prefix = ldso.name;
++				}
++				if (!prefix) {
++					prefix = "";
++					prefix_len = 0;
++				}
++				char etc_ldso_path[prefix_len + 1
++					+ sizeof "/etc/ld-musl-" LDSO_ARCH ".path"];
++				snprintf(etc_ldso_path, sizeof etc_ldso_path,
++					"%.*s/etc/ld-musl-" LDSO_ARCH ".path",
++					(int)prefix_len, prefix);
++				FILE *f = fopen(etc_ldso_path, "rbe");
++				if (f) {
++					if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
++						free(sys_path);
++						sys_path = "";
++					}
++					fclose(f);
++				} else if (errno != ENOENT) {
++					sys_path = "";
++				}
++			}
++			if (!sys_path) sys_path = "/lib:/usr/local/lib:/usr/lib";
++			fd = path_open(name, sys_path, buf, sizeof buf);
++		}
++		pathname = buf;
++	}
++	if (fd < 0) return 0;
++	if (fstat(fd, &st) < 0) {
++		close(fd);
++		return 0;
++	}
++	for (p=head->next; p; p=p->next) {
++		if (p->dev == st.st_dev && p->ino == st.st_ino) {
++			/* If this library was previously loaded with a
++			 * pathname but a search found the same inode,
++			 * setup its shortname so it can be found by name. */
++			if (!p->shortname && pathname != name)
++				p->shortname = strrchr(p->name, '/')+1;
++			close(fd);
++			p->refcnt++;
++			return p;
++		}
++	}
++	map = noload ? 0 : map_library(fd, &temp_dso);
++	close(fd);
++	if (!map) return 0;
++
++	/* Allocate storage for the new DSO. When there is TLS, this
++	 * storage must include a reservation for all pre-existing
++	 * threads to obtain copies of both the new TLS, and an
++	 * extended DTV capable of storing an additional slot for
++	 * the newly-loaded DSO. */
++	alloc_size = sizeof *p + strlen(pathname) + 1;
++	if (runtime && temp_dso.tls.image) {
++		size_t per_th = temp_dso.tls.size + temp_dso.tls.align
++			+ sizeof(void *) * (tls_cnt+3);
++		n_th = libc.threads_minus_1 + 1;
++		if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
++		else alloc_size += n_th * per_th;
++	}
++	p = calloc(1, alloc_size);
++	if (!p) {
++		unmap_library(&temp_dso);
++		return 0;
++	}
++	memcpy(p, &temp_dso, sizeof temp_dso);
++	decode_dyn(p);
++	p->dev = st.st_dev;
++	p->ino = st.st_ino;
++	p->refcnt = 1;
++	p->needed_by = needed_by;
++	p->name = p->buf;
++	strcpy(p->name, pathname);
++	/* Add a shortname only if name arg was not an explicit pathname. */
++	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
++	if (p->tls.image) {
++		p->tls_id = ++tls_cnt;
++		tls_align = MAXP2(tls_align, p->tls.align);
++#ifdef TLS_ABOVE_TP
++		p->tls.offset = tls_offset + ( (tls_align-1) &
++			-(tls_offset + (uintptr_t)p->tls.image) );
++		tls_offset += p->tls.size;
++#else
++		tls_offset += p->tls.size + p->tls.align - 1;
++		tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
++			& (p->tls.align-1);
++		p->tls.offset = tls_offset;
++#endif
++		p->new_dtv = (void *)(-sizeof(size_t) &
++			(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
++		p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
++		if (tls_tail) tls_tail->next = &p->tls;
++		else libc.tls_head = &p->tls;
++		tls_tail = &p->tls;
++	}
++
++	tail->next = p;
++	p->prev = tail;
++	tail = p;
++
++	if (DL_FDPIC) makefuncdescs(p);
++
++	if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, p->base);
++
++	return p;
++}
++
++static void load_deps(struct dso *p)
++{
++	size_t i, ndeps=0;
++	struct dso ***deps = &p->deps, **tmp, *dep;
++	for (; p; p=p->next) {
++		for (i=0; p->dynv[i]; i+=2) {
++			if (p->dynv[i] != DT_NEEDED) continue;
++			dep = load_library(p->strings + p->dynv[i+1], p);
++			if (!dep) {
++				error("Error loading shared library %s: %m (needed by %s)",
++					p->strings + p->dynv[i+1], p->name);
++				if (runtime) longjmp(*rtld_fail, 1);
++				continue;
++			}
++			if (runtime) {
++				tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
++				if (!tmp) longjmp(*rtld_fail, 1);
++				tmp[ndeps++] = dep;
++				tmp[ndeps] = 0;
++				*deps = tmp;
++			}
++		}
++	}
++}
++
++static void load_preload(char *s)
++{
++	int tmp;
++	char *z;
++	for (z=s; *z; s=z) {
++		for (   ; *s && (isspace(*s) || *s==':'); s++);
++		for (z=s; *z && !isspace(*z) && *z!=':'; z++);
++		tmp = *z;
++		*z = 0;
++		load_library(s, 0);
++		*z = tmp;
++	}
++}
++
++static void make_global(struct dso *p)
++{
++	for (; p; p=p->next) p->global = 1;
++}
++
++static void do_mips_relocs(struct dso *p, size_t *got)
++{
++	size_t i, j, rel[2];
++	unsigned char *base = p->base;
++	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
++	if (p==&ldso) {
++		got += i;
++	} else {
++		while (i--) *got++ += (size_t)base;
++	}
++	j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
++	i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
++	Sym *sym = p->syms + j;
++	rel[0] = (unsigned char *)got - base;
++	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
++		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
++		do_relocs(p, rel, sizeof rel, 2);
++	}
++}
++
++static void reloc_all(struct dso *p)
++{
++	size_t dyn[DYN_CNT];
++	for (; p; p=p->next) {
++		if (p->relocated) continue;
++		decode_vec(p->dynv, dyn, DYN_CNT);
++		if (NEED_MIPS_GOT_RELOCS)
++			do_mips_relocs(p, laddr(p, dyn[DT_PLTGOT]));
++		do_relocs(p, laddr(p, dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
++			2+(dyn[DT_PLTREL]==DT_RELA));
++		do_relocs(p, laddr(p, dyn[DT_REL]), dyn[DT_RELSZ], 2);
++		do_relocs(p, laddr(p, dyn[DT_RELA]), dyn[DT_RELASZ], 3);
++
++		if (head != &ldso && p->relro_start != p->relro_end &&
++		    mprotect(laddr(p, p->relro_start), p->relro_end-p->relro_start, PROT_READ)
++		    && errno != ENOSYS) {
++			error("Error relocating %s: RELRO protection failed: %m",
++				p->name);
++			if (runtime) longjmp(*rtld_fail, 1);
++		}
++
++		p->relocated = 1;
++	}
++}
++
++static void kernel_mapped_dso(struct dso *p)
++{
++	size_t min_addr = -1, max_addr = 0, cnt;
++	Phdr *ph = p->phdr;
++	for (cnt = p->phnum; cnt--; ph = (void *)((char *)ph + p->phentsize)) {
++		if (ph->p_type == PT_DYNAMIC) {
++			p->dynv = laddr(p, ph->p_vaddr);
++		} else if (ph->p_type == PT_GNU_RELRO) {
++			p->relro_start = ph->p_vaddr & -PAGE_SIZE;
++			p->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
++		}
++		if (ph->p_type != PT_LOAD) continue;
++		if (ph->p_vaddr < min_addr)
++			min_addr = ph->p_vaddr;
++		if (ph->p_vaddr+ph->p_memsz > max_addr)
++			max_addr = ph->p_vaddr+ph->p_memsz;
++	}
++	min_addr &= -PAGE_SIZE;
++	max_addr = (max_addr + PAGE_SIZE-1) & -PAGE_SIZE;
++	p->map = p->base + min_addr;
++	p->map_len = max_addr - min_addr;
++	p->kernel_mapped = 1;
++}
++
++void __libc_exit_fini()
++{
++	struct dso *p;
++	size_t dyn[DYN_CNT];
++	for (p=fini_head; p; p=p->fini_next) {
++		if (!p->constructed) continue;
++		decode_vec(p->dynv, dyn, DYN_CNT);
++		if (dyn[0] & (1<<DT_FINI_ARRAY)) {
++			size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
++			size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
++			while (n--) ((void (*)(void))*--fn)();
++		}
++#ifndef NO_LEGACY_INITFINI
++		if ((dyn[0] & (1<<DT_FINI)) && dyn[DT_FINI])
++			fpaddr(p, dyn[DT_FINI])();
++#endif
++	}
++}
++
++static void do_init_fini(struct dso *p)
++{
++	size_t dyn[DYN_CNT];
++	int need_locking = libc.threads_minus_1;
++	/* Allow recursive calls that arise when a library calls
++	 * dlopen from one of its constructors, but block any
++	 * other threads until all ctors have finished. */
++	if (need_locking) pthread_mutex_lock(&init_fini_lock);
++	for (; p; p=p->prev) {
++		if (p->constructed) continue;
++		p->constructed = 1;
++		decode_vec(p->dynv, dyn, DYN_CNT);
++		if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
++			p->fini_next = fini_head;
++			fini_head = p;
++		}
++#ifndef NO_LEGACY_INITFINI
++		if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
++			fpaddr(p, dyn[DT_INIT])();
++#endif
++		if (dyn[0] & (1<<DT_INIT_ARRAY)) {
++			size_t n = dyn[DT_INIT_ARRAYSZ]/sizeof(size_t);
++			size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
++			while (n--) ((void (*)(void))*fn++)();
++		}
++		if (!need_locking && libc.threads_minus_1) {
++			need_locking = 1;
++			pthread_mutex_lock(&init_fini_lock);
++		}
++	}
++	if (need_locking) pthread_mutex_unlock(&init_fini_lock);
++}
++
++void __libc_start_init(void)
++{
++	do_init_fini(tail);
++}
++
++static void dl_debug_state(void)
++{
++}
++
++weak_alias(dl_debug_state, _dl_debug_state);
++
++void __init_tls(size_t *auxv)
++{
++}
++
++__attribute__((__visibility__("hidden")))
++void *__tls_get_new(size_t *v)
++{
++	pthread_t self = __pthread_self();
++
++	/* Block signals to make accessing new TLS async-signal-safe */
++	sigset_t set;
++	__block_all_sigs(&set);
++	if (v[0]<=(size_t)self->dtv[0]) {
++		__restore_sigs(&set);
++		return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
++	}
++
++	/* This is safe without any locks held because, if the caller
++	 * is able to request the Nth entry of the DTV, the DSO list
++	 * must be valid at least that far out and it was synchronized
++	 * at program startup or by an already-completed call to dlopen. */
++	struct dso *p;
++	for (p=head; p->tls_id != v[0]; p=p->next);
++
++	/* Get new DTV space from new DSO if needed */
++	if (v[0] > (size_t)self->dtv[0]) {
++		void **newdtv = p->new_dtv +
++			(v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
++		memcpy(newdtv, self->dtv,
++			((size_t)self->dtv[0]+1) * sizeof(void *));
++		newdtv[0] = (void *)v[0];
++		self->dtv = self->dtv_copy = newdtv;
++	}
++
++	/* Get new TLS memory from all new DSOs up to the requested one */
++	unsigned char *mem;
++	for (p=head; ; p=p->next) {
++		if (!p->tls_id || self->dtv[p->tls_id]) continue;
++		mem = p->new_tls + (p->tls.size + p->tls.align)
++			* a_fetch_add(&p->new_tls_idx,1);
++		mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
++			& (p->tls.align-1);
++		self->dtv[p->tls_id] = mem;
++		memcpy(mem, p->tls.image, p->tls.len);
++		if (p->tls_id == v[0]) break;
++	}
++	__restore_sigs(&set);
++	return mem + v[1] + DTP_OFFSET;
++}
++
++static void update_tls_size()
++{
++	libc.tls_cnt = tls_cnt;
++	libc.tls_align = tls_align;
++	libc.tls_size = ALIGN(
++		(1+tls_cnt) * sizeof(void *) +
++		tls_offset +
++		sizeof(struct pthread) +
++		tls_align * 2,
++	tls_align);
++}
++
++/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
++ * following stage 2 and stage 3 functions via primitive symbolic lookup
++ * since it does not have access to their addresses to begin with. */
++
++/* Stage 2 of the dynamic linker is called after relative relocations 
++ * have been processed. It can make function calls to static functions
++ * and access string literals and static data, but cannot use extern
++ * symbols. Its job is to perform symbolic relocations on the dynamic
++ * linker itself, but some of the relocations performed may need to be
++ * replaced later due to copy relocations in the main program. */
++
++__attribute__((__visibility__("hidden")))
++void __dls2(unsigned char *base, size_t *sp)
++{
++	if (DL_FDPIC) {
++		void *p1 = (void *)sp[-2];
++		void *p2 = (void *)sp[-1];
++		if (!p1) {
++			size_t *auxv, aux[AUX_CNT];
++			for (auxv=sp+1+*sp+1; *auxv; auxv++); auxv++;
++			decode_vec(auxv, aux, AUX_CNT);
++			if (aux[AT_BASE]) ldso.base = (void *)aux[AT_BASE];
++			else ldso.base = (void *)(aux[AT_PHDR] & -4096);
++		}
++		app_loadmap = p2 ? p1 : 0;
++		ldso.loadmap = p2 ? p2 : p1;
++		ldso.base = laddr(&ldso, 0);
++	} else {
++		ldso.base = base;
++	}
++	Ehdr *ehdr = (void *)ldso.base;
++	ldso.name = ldso.shortname = "libc.so";
++	ldso.global = 1;
++	ldso.phnum = ehdr->e_phnum;
++	ldso.phdr = laddr(&ldso, ehdr->e_phoff);
++	ldso.phentsize = ehdr->e_phentsize;
++	kernel_mapped_dso(&ldso);
++	decode_dyn(&ldso);
++
++	if (DL_FDPIC) makefuncdescs(&ldso);
++
++	/* Prepare storage for to save clobbered REL addends so they
++	 * can be reused in stage 3. There should be very few. If
++	 * something goes wrong and there are a huge number, abort
++	 * instead of risking stack overflow. */
++	size_t dyn[DYN_CNT];
++	decode_vec(ldso.dynv, dyn, DYN_CNT);
++	size_t *rel = laddr(&ldso, dyn[DT_REL]);
++	size_t rel_size = dyn[DT_RELSZ];
++	size_t symbolic_rel_cnt = 0;
++	apply_addends_to = rel;
++	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
++		if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
++	if (symbolic_rel_cnt >= ADDEND_LIMIT) a_crash();
++	size_t addends[symbolic_rel_cnt+1];
++	saved_addends = addends;
++
++	head = &ldso;
++	reloc_all(&ldso);
++
++	ldso.relocated = 0;
++
++	/* Call dynamic linker stage-3, __dls3, looking it up
++	 * symbolically as a barrier against moving the address
++	 * load across the above relocation processing. */
++	struct symdef dls3_def = find_sym(&ldso, "__dls3", 0);
++	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls3_def.sym-ldso.syms])(sp);
++	else ((stage3_func)laddr(&ldso, dls3_def.sym->st_value))(sp);
++}
++
++/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
++ * fully functional. Its job is to load (if not already loaded) and
++ * process dependencies and relocations for the main application and
++ * transfer control to its entry point. */
++
++_Noreturn void __dls3(size_t *sp)
++{
++	static struct dso app, vdso;
++	size_t aux[AUX_CNT], *auxv;
++	size_t i;
++	char *env_preload=0;
++	size_t vdso_base;
++	int argc = *sp;
++	char **argv = (void *)(sp+1);
++	char **argv_orig = argv;
++	char **envp = argv+argc+1;
++
++	/* Find aux vector just past environ[] and use it to initialize
++	 * global data that may be needed before we can make syscalls. */
++	__environ = envp;
++	for (i=argc+1; argv[i]; i++);
++	libc.auxv = auxv = (void *)(argv+i+1);
++	decode_vec(auxv, aux, AUX_CNT);
++	__hwcap = aux[AT_HWCAP];
++	libc.page_size = aux[AT_PAGESZ];
++	libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
++		|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
++
++	/* Setup early thread pointer in builtin_tls for ldso/libc itself to
++	 * use during dynamic linking. If possible it will also serve as the
++	 * thread pointer at runtime. */
++	libc.tls_size = sizeof builtin_tls;
++	libc.tls_align = tls_align;
++	if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
++		a_crash();
++	}
++
++	/* Only trust user/env if kernel says we're not suid/sgid */
++	if (!libc.secure) {
++		env_path = getenv("LD_LIBRARY_PATH");
++		env_preload = getenv("LD_PRELOAD");
++	}
++
++	/* If the main program was already loaded by the kernel,
++	 * AT_PHDR will point to some location other than the dynamic
++	 * linker's program headers. */
++	if (aux[AT_PHDR] != (size_t)ldso.phdr) {
++		size_t interp_off = 0;
++		size_t tls_image = 0;
++		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
++		Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
++		app.phnum = aux[AT_PHNUM];
++		app.phentsize = aux[AT_PHENT];
++		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
++			if (phdr->p_type == PT_PHDR)
++				app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
++			else if (phdr->p_type == PT_INTERP)
++				interp_off = (size_t)phdr->p_vaddr;
++			else if (phdr->p_type == PT_TLS) {
++				tls_image = phdr->p_vaddr;
++				app.tls.len = phdr->p_filesz;
++				app.tls.size = phdr->p_memsz;
++				app.tls.align = phdr->p_align;
++			}
++		}
++		if (DL_FDPIC) app.loadmap = app_loadmap;
++		if (app.tls.size) app.tls.image = laddr(&app, tls_image);
++		if (interp_off) ldso.name = laddr(&app, interp_off);
++		if ((aux[0] & (1UL<<AT_EXECFN))
++		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
++			app.name = (char *)aux[AT_EXECFN];
++		else
++			app.name = argv[0];
++		kernel_mapped_dso(&app);
++	} else {
++		int fd;
++		char *ldname = argv[0];
++		size_t l = strlen(ldname);
++		if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
++		argv++;
++		while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
++			char *opt = argv[0]+2;
++			*argv++ = (void *)-1;
++			if (!*opt) {
++				break;
++			} else if (!memcmp(opt, "list", 5)) {
++				ldd_mode = 1;
++			} else if (!memcmp(opt, "library-path", 12)) {
++				if (opt[12]=='=') env_path = opt+13;
++				else if (opt[12]) *argv = 0;
++				else if (*argv) env_path = *argv++;
++			} else if (!memcmp(opt, "preload", 7)) {
++				if (opt[7]=='=') env_preload = opt+8;
++				else if (opt[7]) *argv = 0;
++				else if (*argv) env_preload = *argv++;
++			} else {
++				argv[0] = 0;
++			}
++		}
++		argv[-1] = (void *)(argc - (argv-argv_orig));
++		if (!argv[0]) {
++			dprintf(2, "musl libc (" LDSO_ARCH ")\n"
++				"Version %s\n"
++				"Dynamic Program Loader\n"
++				"Usage: %s [options] [--] pathname%s\n",
++				__libc_get_version(), ldname,
++				ldd_mode ? "" : " [args]");
++			_exit(1);
++		}
++		fd = open(argv[0], O_RDONLY);
++		if (fd < 0) {
++			dprintf(2, "%s: cannot load %s: %s\n", ldname, argv[0], strerror(errno));
++			_exit(1);
++		}
++		runtime = 1;
++		Ehdr *ehdr = (void *)map_library(fd, &app);
++		if (!ehdr) {
++			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
++			_exit(1);
++		}
++		runtime = 0;
++		close(fd);
++		ldso.name = ldname;
++		app.name = argv[0];
++		aux[AT_ENTRY] = (size_t)laddr(&app, ehdr->e_entry);
++		/* Find the name that would have been used for the dynamic
++		 * linker had ldd not taken its place. */
++		if (ldd_mode) {
++			for (i=0; i<app.phnum; i++) {
++				if (app.phdr[i].p_type == PT_INTERP)
++					ldso.name = laddr(&app, app.phdr[i].p_vaddr);
++			}
++			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
++		}
++	}
++	if (app.tls.size) {
++		libc.tls_head = tls_tail = &app.tls;
++		app.tls_id = tls_cnt = 1;
++#ifdef TLS_ABOVE_TP
++		app.tls.offset = 0;
++		tls_offset = app.tls.size
++			+ ( -((uintptr_t)app.tls.image + app.tls.size)
++			& (app.tls.align-1) );
++#else
++		tls_offset = app.tls.offset = app.tls.size
++			+ ( -((uintptr_t)app.tls.image + app.tls.size)
++			& (app.tls.align-1) );
++#endif
++		tls_align = MAXP2(tls_align, app.tls.align);
++	}
++	app.global = 1;
++	decode_dyn(&app);
++	if (DL_FDPIC) {
++		makefuncdescs(&app);
++		if (!app.loadmap) {
++			app.loadmap = (void *)&app_dummy_loadmap;
++			app.loadmap->nsegs = 1;
++			app.loadmap->segs[0].addr = (size_t)app.map;
++			app.loadmap->segs[0].p_vaddr = (size_t)app.map
++				- (size_t)app.base;
++			app.loadmap->segs[0].p_memsz = app.map_len;
++		}
++		argv[-3] = (void *)app.loadmap;
++	}
++
++	/* Attach to vdso, if provided by the kernel */
++	if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
++		Ehdr *ehdr = (void *)vdso_base;
++		Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
++		vdso.phnum = ehdr->e_phnum;
++		vdso.phentsize = ehdr->e_phentsize;
++		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
++			if (phdr->p_type == PT_DYNAMIC)
++				vdso.dynv = (void *)(vdso_base + phdr->p_offset);
++			if (phdr->p_type == PT_LOAD)
++				vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
++		}
++		vdso.name = "";
++		vdso.shortname = "linux-gate.so.1";
++		vdso.global = 1;
++		vdso.relocated = 1;
++		decode_dyn(&vdso);
++		vdso.prev = &ldso;
++		ldso.next = &vdso;
++	}
++
++	/* Initial dso chain consists only of the app. */
++	head = tail = &app;
++
++	/* Donate unused parts of app and library mapping to malloc */
++	reclaim_gaps(&app);
++	reclaim_gaps(&ldso);
++
++	/* Load preload/needed libraries, add their symbols to the global
++	 * namespace, and perform all remaining relocations. */
++	if (env_preload) load_preload(env_preload);
++	load_deps(&app);
++	make_global(&app);
++
++#ifndef DYNAMIC_IS_RO
++	for (i=0; app.dynv[i]; i+=2)
++		if (app.dynv[i]==DT_DEBUG)
++			app.dynv[i+1] = (size_t)&debug;
++#endif
++
++	/* The main program must be relocated LAST since it may contin
++	 * copy relocations which depend on libraries' relocations. */
++	reloc_all(app.next);
++	reloc_all(&app);
++
++	update_tls_size();
++	if (libc.tls_size > sizeof builtin_tls || tls_align > MIN_TLS_ALIGN) {
++		void *initial_tls = calloc(libc.tls_size, 1);
++		if (!initial_tls) {
++			dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
++				argv[0], libc.tls_size);
++			_exit(127);
++		}
++		if (__init_tp(__copy_tls(initial_tls)) < 0) {
++			a_crash();
++		}
++	} else {
++		size_t tmp_tls_size = libc.tls_size;
++		pthread_t self = __pthread_self();
++		/* Temporarily set the tls size to the full size of
++		 * builtin_tls so that __copy_tls will use the same layout
++		 * as it did for before. Then check, just to be safe. */
++		libc.tls_size = sizeof builtin_tls;
++		if (__copy_tls((void*)builtin_tls) != self) a_crash();
++		libc.tls_size = tmp_tls_size;
++	}
++	static_tls_cnt = tls_cnt;
++
++	if (ldso_fail) _exit(127);
++	if (ldd_mode) _exit(0);
++
++	/* Switch to runtime mode: any further failures in the dynamic
++	 * linker are a reportable failure rather than a fatal startup
++	 * error. */
++	runtime = 1;
++
++	debug.ver = 1;
++	debug.bp = dl_debug_state;
++	debug.head = head;
++	debug.base = ldso.base;
++	debug.state = 0;
++	_dl_debug_state();
++
++	errno = 0;
++
++	CRTJMP((void *)aux[AT_ENTRY], argv-1);
++	for(;;);
++}
++
++void *dlopen(const char *file, int mode)
++{
++	struct dso *volatile p, *orig_tail, *next;
++	struct tls_module *orig_tls_tail;
++	size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
++	size_t i;
++	int cs;
++	jmp_buf jb;
++
++	if (!file) return head;
++
++	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
++	pthread_rwlock_wrlock(&lock);
++	__inhibit_ptc();
++
++	p = 0;
++	orig_tls_tail = tls_tail;
++	orig_tls_cnt = tls_cnt;
++	orig_tls_offset = tls_offset;
++	orig_tls_align = tls_align;
++	orig_tail = tail;
++	noload = mode & RTLD_NOLOAD;
++
++	rtld_fail = &jb;
++	if (setjmp(*rtld_fail)) {
++		/* Clean up anything new that was (partially) loaded */
++		if (p && p->deps) for (i=0; p->deps[i]; i++)
++			if (p->deps[i]->global < 0)
++				p->deps[i]->global = 0;
++		for (p=orig_tail->next; p; p=next) {
++			next = p->next;
++			while (p->td_index) {
++				void *tmp = p->td_index->next;
++				free(p->td_index);
++				p->td_index = tmp;
++			}
++			free(p->funcdescs);
++			if (p->rpath != p->rpath_orig)
++				free(p->rpath);
++			free(p->deps);
++			unmap_library(p);
++			free(p);
++		}
++		if (!orig_tls_tail) libc.tls_head = 0;
++		tls_tail = orig_tls_tail;
++		tls_cnt = orig_tls_cnt;
++		tls_offset = orig_tls_offset;
++		tls_align = orig_tls_align;
++		tail = orig_tail;
++		tail->next = 0;
++		p = 0;
++		goto end;
++	} else p = load_library(file, head);
++
++	if (!p) {
++		error(noload ?
++			"Library %s is not already loaded" :
++			"Error loading shared library %s: %m",
++			file);
++		goto end;
++	}
++
++	/* First load handling */
++	if (!p->deps) {
++		load_deps(p);
++		if (p->deps) for (i=0; p->deps[i]; i++)
++			if (!p->deps[i]->global)
++				p->deps[i]->global = -1;
++		if (!p->global) p->global = -1;
++		reloc_all(p);
++		if (p->deps) for (i=0; p->deps[i]; i++)
++			if (p->deps[i]->global < 0)
++				p->deps[i]->global = 0;
++		if (p->global < 0) p->global = 0;
++	}
++
++	if (mode & RTLD_GLOBAL) {
++		if (p->deps) for (i=0; p->deps[i]; i++)
++			p->deps[i]->global = 1;
++		p->global = 1;
++	}
++
++	update_tls_size();
++	_dl_debug_state();
++	orig_tail = tail;
++end:
++	__release_ptc();
++	if (p) gencnt++;
++	pthread_rwlock_unlock(&lock);
++	if (p) do_init_fini(orig_tail);
++	pthread_setcancelstate(cs, 0);
++	return p;
++}
++
++__attribute__((__visibility__("hidden")))
++int __dl_invalid_handle(void *h)
++{
++	struct dso *p;
++	for (p=head; p; p=p->next) if (h==p) return 0;
++	error("Invalid library handle %p", (void *)h);
++	return 1;
++}
++
++static void *addr2dso(size_t a)
++{
++	struct dso *p;
++	size_t i;
++	if (DL_FDPIC) for (p=head; p; p=p->next) {
++		i = count_syms(p);
++		if (a-(size_t)p->funcdescs < i*sizeof(*p->funcdescs))
++			return p;
++	}
++	for (p=head; p; p=p->next) {
++		if (DL_FDPIC && p->loadmap) {
++			for (i=0; i<p->loadmap->nsegs; i++) {
++				if (a-p->loadmap->segs[i].p_vaddr
++				    < p->loadmap->segs[i].p_memsz)
++					return p;
++			}
++		} else {
++			if (a-(size_t)p->map < p->map_len)
++				return p;
++		}
++	}
++	return 0;
++}
++
++void *__tls_get_addr(size_t *);
++
++static void *do_dlsym(struct dso *p, const char *s, void *ra)
++{
++	size_t i;
++	uint32_t h = 0, gh = 0, *ght;
++	Sym *sym;
++	if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
++		if (p == RTLD_DEFAULT) {
++			p = head;
++		} else if (p == RTLD_NEXT) {
++			p = addr2dso((size_t)ra);
++			if (!p) p=head;
++			p = p->next;
++		}
++		struct symdef def = find_sym(p, s, 0);
++		if (!def.sym) goto failed;
++		if ((def.sym->st_info&0xf) == STT_TLS)
++			return __tls_get_addr((size_t []){def.dso->tls_id, def.sym->st_value});
++		if (DL_FDPIC && (def.sym->st_info&0xf) == STT_FUNC)
++			return def.dso->funcdescs + (def.sym - def.dso->syms);
++		return laddr(def.dso, def.sym->st_value);
++	}
++	if (__dl_invalid_handle(p))
++		return 0;
++	if ((ght = p->ghashtab)) {
++		gh = gnu_hash(s);
++		sym = gnu_lookup(gh, ght, p, s);
++	} else {
++		h = sysv_hash(s);
++		sym = sysv_lookup(s, h, p);
++	}
++	if (sym && (sym->st_info&0xf) == STT_TLS)
++		return __tls_get_addr((size_t []){p->tls_id, sym->st_value});
++	if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
++		return p->funcdescs + (sym - p->syms);
++	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
++		return laddr(p, sym->st_value);
++	if (p->deps) for (i=0; p->deps[i]; i++) {
++		if ((ght = p->deps[i]->ghashtab)) {
++			if (!gh) gh = gnu_hash(s);
++			sym = gnu_lookup(gh, ght, p->deps[i], s);
++		} else {
++			if (!h) h = sysv_hash(s);
++			sym = sysv_lookup(s, h, p->deps[i]);
++		}
++		if (sym && (sym->st_info&0xf) == STT_TLS)
++			return __tls_get_addr((size_t []){p->deps[i]->tls_id, sym->st_value});
++		if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
++			return p->deps[i]->funcdescs + (sym - p->deps[i]->syms);
++		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
++			return laddr(p->deps[i], sym->st_value);
++	}
++failed:
++	error("Symbol not found: %s", s);
++	return 0;
++}
++
++int dladdr(const void *addr, Dl_info *info)
++{
++	struct dso *p;
++	Sym *sym, *bestsym;
++	uint32_t nsym;
++	char *strings;
++	void *best = 0;
++
++	pthread_rwlock_rdlock(&lock);
++	p = addr2dso((size_t)addr);
++	pthread_rwlock_unlock(&lock);
++
++	if (!p) return 0;
++
++	sym = p->syms;
++	strings = p->strings;
++	nsym = count_syms(p);
++
++	if (DL_FDPIC) {
++		size_t idx = ((size_t)addr-(size_t)p->funcdescs)
++			/ sizeof(*p->funcdescs);
++		if (idx < nsym && (sym[idx].st_info&0xf) == STT_FUNC) {
++			best = p->funcdescs + idx;
++			bestsym = sym + idx;
++		}
++	}
++
++	if (!best) for (; nsym; nsym--, sym++) {
++		if (sym->st_value
++		 && (1<<(sym->st_info&0xf) & OK_TYPES)
++		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
++			void *symaddr = laddr(p, sym->st_value);
++			if (symaddr > addr || symaddr < best)
++				continue;
++			best = symaddr;
++			bestsym = sym;
++			if (addr == symaddr)
++				break;
++		}
++	}
++
++	if (!best) return 0;
++
++	if (DL_FDPIC && (bestsym->st_info&0xf) == STT_FUNC)
++		best = p->funcdescs + (bestsym - p->syms);
++
++	info->dli_fname = p->name;
++	info->dli_fbase = p->base;
++	info->dli_sname = strings + bestsym->st_name;
++	info->dli_saddr = best;
++
++	return 1;
++}
++
++__attribute__((__visibility__("hidden")))
++void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
++{
++	void *res;
++	pthread_rwlock_rdlock(&lock);
++	res = do_dlsym(p, s, ra);
++	pthread_rwlock_unlock(&lock);
++	return res;
++}
++
++int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
++{
++	struct dso *current;
++	struct dl_phdr_info info;
++	int ret = 0;
++	for(current = head; current;) {
++		info.dlpi_addr      = (uintptr_t)current->base;
++		info.dlpi_name      = current->name;
++		info.dlpi_phdr      = current->phdr;
++		info.dlpi_phnum     = current->phnum;
++		info.dlpi_adds      = gencnt;
++		info.dlpi_subs      = 0;
++		info.dlpi_tls_modid = current->tls_id;
++		info.dlpi_tls_data  = current->tls.image;
++
++		ret = (callback)(&info, sizeof (info), data);
++
++		if (ret != 0) break;
++
++		pthread_rwlock_rdlock(&lock);
++		current = current->next;
++		pthread_rwlock_unlock(&lock);
++	}
++	return ret;
++}
++
++__attribute__((__visibility__("hidden")))
++void __dl_vseterr(const char *, va_list);
++
++static void error(const char *fmt, ...)
++{
++	va_list ap;
++	va_start(ap, fmt);
++	if (!runtime) {
++		vdprintf(2, fmt, ap);
++		dprintf(2, "\n");
++		ldso_fail = 1;
++		va_end(ap);
++		return;
++	}
++	__dl_vseterr(fmt, ap);
++	va_end(ap);
++}
+--- a/src/env/__init_tls.c
++++ b/src/env/__init_tls.c
+@@ -8,9 +8,6 @@
+ #include "atomic.h"
+ #include "syscall.h"
+ 
+-#ifndef SHARED
+-static
+-#endif
+ int __init_tp(void *p)
+ {
+ 	pthread_t td = p;
+@@ -24,8 +21,6 @@ int __init_tp(void *p)
+ 	return 0;
+ }
+ 
+-#ifndef SHARED
+-
+ static struct builtin_tls {
+ 	char c;
+ 	struct pthread pt;
+@@ -33,33 +28,40 @@ static struct builtin_tls {
+ } builtin_tls[1];
+ #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
+ 
+-struct tls_image {
+-	void *image;
+-	size_t len, size, align;
+-} __static_tls;
+-
+-#define T __static_tls
++static struct tls_module main_tls;
+ 
+ void *__copy_tls(unsigned char *mem)
+ {
+ 	pthread_t td;
+-	if (!T.image) return mem;
+-	void **dtv = (void *)mem;
+-	dtv[0] = (void *)1;
++	struct tls_module *p;
++	size_t i;
++	void **dtv;
++
+ #ifdef TLS_ABOVE_TP
+-	mem += sizeof(void *) * 2;
+-	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
++	dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
++
++	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
+ 	td = (pthread_t)mem;
+ 	mem += sizeof(struct pthread);
++
++	for (i=1, p=libc.tls_head; p; i++, p=p->next) {
++		dtv[i] = mem + p->offset;
++		memcpy(dtv[i], p->image, p->len);
++	}
+ #else
++	dtv = (void **)mem;
++
+ 	mem += libc.tls_size - sizeof(struct pthread);
+-	mem -= (uintptr_t)mem & (T.align-1);
++	mem -= (uintptr_t)mem & (libc.tls_align-1);
+ 	td = (pthread_t)mem;
+-	mem -= T.size;
++
++	for (i=1, p=libc.tls_head; p; i++, p=p->next) {
++		dtv[i] = mem - p->offset;
++		memcpy(dtv[i], p->image, p->len);
++	}
+ #endif
++	dtv[0] = (void *)libc.tls_cnt;
+ 	td->dtv = td->dtv_copy = dtv;
+-	dtv[1] = mem;
+-	memcpy(mem, T.image, T.len);
+ 	return td;
+ }
+ 
+@@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
+ typedef Elf64_Phdr Phdr;
+ #endif
+ 
+-void __init_tls(size_t *aux)
++static void static_init_tls(size_t *aux)
+ {
+ 	unsigned char *p;
+ 	size_t n;
+@@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
+ 	}
+ 
+ 	if (tls_phdr) {
+-		T.image = (void *)(base + tls_phdr->p_vaddr);
+-		T.len = tls_phdr->p_filesz;
+-		T.size = tls_phdr->p_memsz;
+-		T.align = tls_phdr->p_align;
++		main_tls.image = (void *)(base + tls_phdr->p_vaddr);
++		main_tls.len = tls_phdr->p_filesz;
++		main_tls.size = tls_phdr->p_memsz;
++		main_tls.align = tls_phdr->p_align;
++		libc.tls_cnt = 1;
++		libc.tls_head = &main_tls;
+ 	}
+ 
+-	T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
+-	if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
++	main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
++		& (main_tls.align-1);
++	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
++#ifndef TLS_ABOVE_TP
++	main_tls.offset = main_tls.size;
++#endif
+ 
+-	libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
++	libc.tls_align = main_tls.align;
++	libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
++		+ main_tls.size + main_tls.align
+ 		+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
+ 
+ 	if (libc.tls_size > sizeof builtin_tls) {
+@@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
+ 	if (__init_tp(__copy_tls(mem)) < 0)
+ 		a_crash();
+ }
+-#else
+-void __init_tls(size_t *auxv) { }
+-#endif
++
++weak_alias(static_init_tls, __init_tls);
+--- a/src/env/__libc_start_main.c
++++ b/src/env/__libc_start_main.c
+@@ -8,21 +8,17 @@
+ 
+ void __init_tls(size_t *);
+ 
+-#ifndef SHARED
+-static void dummy() {}
++static void dummy(void) {}
+ weak_alias(dummy, _init);
+-extern void (*const __init_array_start)() __attribute__((weak));
+-extern void (*const __init_array_end)() __attribute__((weak));
+-#endif
++
++__attribute__((__weak__, __visibility__("hidden")))
++extern void (*const __init_array_start)(void), (*const __init_array_end)(void);
+ 
+ static void dummy1(void *p) {}
+ weak_alias(dummy1, __init_ssp);
+ 
+ #define AUX_CNT 38
+ 
+-#ifndef SHARED
+-static
+-#endif
+ void __init_libc(char **envp, char *pn)
+ {
+ 	size_t i, *auxv, aux[AUX_CNT] = { 0 };
+@@ -57,20 +53,22 @@ void __init_libc(char **envp, char *pn)
+ 	libc.secure = 1;
+ }
+ 
+-int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
++static void libc_start_init(void)
+ {
+-	char **envp = argv+argc+1;
+-
+-#ifndef SHARED
+-	__init_libc(envp, argv[0]);
+ 	_init();
+ 	uintptr_t a = (uintptr_t)&__init_array_start;
+ 	for (; a<(uintptr_t)&__init_array_end; a+=sizeof(void(*)()))
+ 		(*(void (**)())a)();
+-#else
+-	void __libc_start_init(void);
++}
++
++weak_alias(libc_start_init, __libc_start_init);
++
++int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
++{
++	char **envp = argv+argc+1;
++
++	__init_libc(envp, argv[0]);
+ 	__libc_start_init();
+-#endif
+ 
+ 	/* Pass control to the application */
+ 	exit(main(argc, argv, envp));
+--- a/src/env/__reset_tls.c
++++ b/src/env/__reset_tls.c
+@@ -1,21 +1,16 @@
+-#ifndef SHARED
+-
+ #include <string.h>
+ #include "pthread_impl.h"
+-
+-extern struct tls_image {
+-	void *image;
+-	size_t len, size, align;
+-} __static_tls;
+-
+-#define T __static_tls
++#include "libc.h"
+ 
+ void __reset_tls()
+ {
+-	if (!T.size) return;
+ 	pthread_t self = __pthread_self();
+-	memcpy(self->dtv[1], T.image, T.len);
+-	memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
++	struct tls_module *p;
++	size_t i, n = (size_t)self->dtv[0];
++	if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
++		if (!self->dtv[i]) continue;
++		memcpy(self->dtv[i], p->image, p->len);
++		memset((char *)self->dtv[i]+p->len, 0,
++			p->size - p->len);
++	}
+ }
+-
+-#endif
+--- a/src/env/__stack_chk_fail.c
++++ b/src/env/__stack_chk_fail.c
+@@ -17,16 +17,7 @@ void __stack_chk_fail(void)
+ 	a_crash();
+ }
+ 
+-#ifdef SHARED
+-
+ __attribute__((__visibility__("hidden")))
+-void __stack_chk_fail_local(void)
+-{
+-	a_crash();
+-}
+-
+-#else
++void __stack_chk_fail_local(void);
+ 
+ weak_alias(__stack_chk_fail, __stack_chk_fail_local);
+-
+-#endif
+--- /dev/null
++++ b/src/exit/arm/__aeabi_atexit.c
+@@ -0,0 +1,6 @@
++int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
++
++int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
++{
++	return __cxa_atexit (func, obj, d);
++}
+--- a/src/exit/exit.c
++++ b/src/exit/exit.c
+@@ -10,25 +10,25 @@ static void dummy()
+  * as a consequence of linking either __toread.c or __towrite.c. */
+ weak_alias(dummy, __funcs_on_exit);
+ weak_alias(dummy, __stdio_exit);
+-
+-#ifndef SHARED
+ weak_alias(dummy, _fini);
+-extern void (*const __fini_array_start)() __attribute__((weak));
+-extern void (*const __fini_array_end)() __attribute__((weak));
+-#endif
+ 
+-_Noreturn void exit(int code)
+-{
+-	__funcs_on_exit();
++__attribute__((__weak__, __visibility__("hidden")))
++extern void (*const __fini_array_start)(void), (*const __fini_array_end)(void);
+ 
+-#ifndef SHARED
++static void libc_exit_fini(void)
++{
+ 	uintptr_t a = (uintptr_t)&__fini_array_end;
+ 	for (; a>(uintptr_t)&__fini_array_start; a-=sizeof(void(*)()))
+ 		(*(void (**)())(a-sizeof(void(*)())))();
+ 	_fini();
+-#endif
++}
+ 
+-	__stdio_exit();
++weak_alias(libc_exit_fini, __libc_exit_fini);
+ 
++_Noreturn void exit(int code)
++{
++	__funcs_on_exit();
++	__libc_exit_fini();
++	__stdio_exit();
+ 	_Exit(code);
+ }
+--- /dev/null
++++ b/src/fenv/arm/fenv-hf.S
+@@ -0,0 +1,69 @@
++#if __ARM_PCS_VFP
++
++.syntax unified
++.fpu vfp
++
++.global fegetround
++.type fegetround,%function
++fegetround:
++	fmrx r0, fpscr
++	and r0, r0, #0xc00000
++	bx lr
++
++.global __fesetround
++.type __fesetround,%function
++__fesetround:
++	fmrx r3, fpscr
++	bic r3, r3, #0xc00000
++	orr r3, r3, r0
++	fmxr fpscr, r3
++	mov r0, #0
++	bx lr
++
++.global fetestexcept
++.type fetestexcept,%function
++fetestexcept:
++	and r0, r0, #0x1f
++	fmrx r3, fpscr
++	and r0, r0, r3
++	bx lr
++
++.global feclearexcept
++.type feclearexcept,%function
++feclearexcept:
++	and r0, r0, #0x1f
++	fmrx r3, fpscr
++	bic r3, r3, r0
++	fmxr fpscr, r3
++	mov r0, #0
++	bx lr
++
++.global feraiseexcept
++.type feraiseexcept,%function
++feraiseexcept:
++	and r0, r0, #0x1f
++	fmrx r3, fpscr
++	orr r3, r3, r0
++	fmxr fpscr, r3
++	mov r0, #0
++	bx lr
++
++.global fegetenv
++.type fegetenv,%function
++fegetenv:
++	fmrx r3, fpscr
++	str r3, [r0]
++	mov r0, #0
++	bx lr
++
++.global fesetenv
++.type fesetenv,%function
++fesetenv:
++	cmn r0, #1
++	moveq r3, #0
++	ldrne r3, [r0]
++	fmxr fpscr, r3
++	mov r0, #0
++	bx lr
++
++#endif
+--- /dev/null
++++ b/src/fenv/arm/fenv.c
+@@ -0,0 +1,3 @@
++#if !__ARM_PCS_VFP
++#include "../fenv.c"
++#endif
+--- a/src/fenv/armebhf/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armhf/fenv.s
+--- a/src/fenv/armhf/fenv.s
++++ /dev/null
+@@ -1,64 +0,0 @@
+-.fpu vfp
+-
+-.global fegetround
+-.type fegetround,%function
+-fegetround:
+-	mrc p10, 7, r0, cr1, cr0, 0
+-	and r0, r0, #0xc00000
+-	bx lr
+-
+-.global __fesetround
+-.type __fesetround,%function
+-__fesetround:
+-	mrc p10, 7, r3, cr1, cr0, 0
+-	bic r3, r3, #0xc00000
+-	orr r3, r3, r0
+-	mcr p10, 7, r3, cr1, cr0, 0
+-	mov r0, #0
+-	bx lr
+-
+-.global fetestexcept
+-.type fetestexcept,%function
+-fetestexcept:
+-	and r0, r0, #0x1f
+-	mrc p10, 7, r3, cr1, cr0, 0
+-	and r0, r0, r3
+-	bx lr
+-
+-.global feclearexcept
+-.type feclearexcept,%function
+-feclearexcept:
+-	and r0, r0, #0x1f
+-	mrc p10, 7, r3, cr1, cr0, 0
+-	bic r3, r3, r0
+-	mcr p10, 7, r3, cr1, cr0, 0
+-	mov r0, #0
+-	bx lr
+-
+-.global feraiseexcept
+-.type feraiseexcept,%function
+-feraiseexcept:
+-	and r0, r0, #0x1f
+-	mrc p10, 7, r3, cr1, cr0, 0
+-	orr r3, r3, r0
+-	mcr p10, 7, r3, cr1, cr0, 0
+-	mov r0, #0
+-	bx lr
+-
+-.global fegetenv
+-.type fegetenv,%function
+-fegetenv:
+-	mrc p10, 7, r3, cr1, cr0, 0
+-	str r3, [r0]
+-	mov r0, #0
+-	bx lr
+-
+-.global fesetenv
+-.type fesetenv,%function
+-fesetenv:
+-	cmn r0, #1
+-	moveq r3, #0
+-	ldrne r3, [r0]
+-	mcr p10, 7, r3, cr1, cr0, 0
+-	mov r0, #0
+-	bx lr
+--- a/src/fenv/armhf/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-fenv.s
+--- a/src/fenv/mips-sf/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../fenv.c
+--- /dev/null
++++ b/src/fenv/mips/fenv-sf.c
+@@ -0,0 +1,3 @@
++#ifdef __mips_soft_float
++#include "../fenv.c"
++#endif
+--- /dev/null
++++ b/src/fenv/mips/fenv.S
+@@ -0,0 +1,71 @@
++#ifndef __mips_soft_float
++
++.set noreorder
++
++.global feclearexcept
++.type  feclearexcept,@function
++feclearexcept:
++	and     $4, $4, 0x7c
++	cfc1    $5, $31
++	or      $5, $5, $4
++	xor     $5, $5, $4
++	ctc1    $5, $31
++	jr      $ra
++	li      $2, 0
++
++.global feraiseexcept
++.type  feraiseexcept,@function
++feraiseexcept:
++	and     $4, $4, 0x7c
++	cfc1    $5, $31
++	or      $5, $5, $4
++	ctc1    $5, $31
++	jr      $ra
++	li      $2, 0
++
++.global fetestexcept
++.type  fetestexcept,@function
++fetestexcept:
++	and     $4, $4, 0x7c
++	cfc1    $2, $31
++	jr      $ra
++	and     $2, $2, $4
++
++.global fegetround
++.type  fegetround,@function
++fegetround:
++	cfc1    $2, $31
++	jr      $ra
++	andi    $2, $2, 3
++
++.global __fesetround
++.type __fesetround,@function
++__fesetround:
++	cfc1    $5, $31
++	li      $6, -4
++	and     $5, $5, $6
++	or      $5, $5, $4
++	ctc1    $5, $31
++	jr      $ra
++	li      $2, 0
++
++.global fegetenv
++.type  fegetenv,@function
++fegetenv:
++	cfc1    $5, $31
++	sw      $5, 0($4)
++	jr      $ra
++	li      $2, 0
++
++.global fesetenv
++.type  fesetenv,@function
++fesetenv:
++	addiu   $5, $4, 1
++	beq     $5, $0, 1f
++	 nop
++	lw      $5, 0($4)
++1:	ctc1    $5, $31
++	jr      $ra
++	li      $2, 0
++
++#endif
+--- a/src/fenv/mips/fenv.s
++++ /dev/null
+@@ -1,67 +0,0 @@
+-.set noreorder
+-
+-.global feclearexcept
+-.type  feclearexcept,@function
+-feclearexcept:
+-	and     $4, $4, 0x7c
+-	cfc1    $5, $31
+-	or      $5, $5, $4
+-	xor     $5, $5, $4
+-	ctc1    $5, $31
+-	jr      $ra
+-	li      $2, 0
+-
+-.global feraiseexcept
+-.type  feraiseexcept,@function
+-feraiseexcept:
+-	and     $4, $4, 0x7c
+-	cfc1    $5, $31
+-	or      $5, $5, $4
+-	ctc1    $5, $31
+-	jr      $ra
+-	li      $2, 0
+-
+-.global fetestexcept
+-.type  fetestexcept,@function
+-fetestexcept:
+-	and     $4, $4, 0x7c
+-	cfc1    $2, $31
+-	jr      $ra
+-	and     $2, $2, $4
+-
+-.global fegetround
+-.type  fegetround,@function
+-fegetround:
+-	cfc1    $2, $31
+-	jr      $ra
+-	andi    $2, $2, 3
+-
+-.global __fesetround
+-.type __fesetround,@function
+-__fesetround:
+-	cfc1    $5, $31
+-	li      $6, -4
+-	and     $5, $5, $6
+-	or      $5, $5, $4
+-	ctc1    $5, $31
+-	jr      $ra
+-	li      $2, 0
+-
+-.global fegetenv
+-.type  fegetenv,@function
+-fegetenv:
+-	cfc1    $5, $31
+-	sw      $5, 0($4)
+-	jr      $ra
+-	li      $2, 0
+-
+-.global fesetenv
+-.type  fesetenv,@function
+-fesetenv:
+-	addiu   $5, $4, 1
+-	beq     $5, $0, 1f
+-	 nop
+-	lw      $5, 0($4)
+-1:	ctc1    $5, $31
+-	jr      $ra
+-	li      $2, 0
+--- a/src/fenv/mipsel-sf/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../fenv.c
+--- a/src/fenv/sh-nofpu/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../fenv.c
+--- /dev/null
++++ b/src/fenv/sh/fenv-nofpu.c
+@@ -0,0 +1,3 @@
++#if !__SH_FPU_ANY__ && !__SH4__
++#include "../fenv.c"
++#endif
+--- /dev/null
++++ b/src/fenv/sh/fenv.S
+@@ -0,0 +1,78 @@
++#if __SH_FPU_ANY__ || __SH4__
++
++.global fegetround
++.type   fegetround, @function
++fegetround:
++	sts fpscr, r0
++	rts
++	 and #3, r0
++
++.global __fesetround
++.type   __fesetround, @function
++__fesetround:
++	sts fpscr, r0
++	or  r4, r0
++	lds r0, fpscr
++	rts
++	 mov #0, r0
++
++.global fetestexcept
++.type   fetestexcept, @function
++fetestexcept:
++	sts fpscr, r0
++	and r4, r0
++	rts
++	 and #0x7c, r0
++
++.global feclearexcept
++.type   feclearexcept, @function
++feclearexcept:
++	mov r4, r0
++	and #0x7c, r0
++	not r0, r4
++	sts fpscr, r0
++	and r4, r0
++	lds r0, fpscr
++	rts
++	 mov #0, r0
++
++.global feraiseexcept
++.type   feraiseexcept, @function
++feraiseexcept:
++	mov r4, r0
++	and #0x7c, r0
++	sts fpscr, r4
++	or  r4, r0
++	lds r0, fpscr
++	rts
++	 mov #0, r0
++
++.global fegetenv
++.type   fegetenv, @function
++fegetenv:
++	sts fpscr, r0
++	mov.l r0, @r4
++	rts
++	 mov #0, r0
++
++.global fesetenv
++.type   fesetenv, @function
++fesetenv:
++	mov r4, r0
++	cmp/eq #-1, r0
++	bf 1f
++
++	! the default environment is complicated by the fact that we need to
++	! preserve the current precision bit, which we do not know a priori
++	sts fpscr, r0
++	mov #8, r1
++	swap.w r1, r1
++	bra 2f
++	 and r1, r0
++
++1:	mov.l @r4, r0      ! non-default environment
++2:	lds r0, fpscr
++	rts
++	 mov #0, r0
++
++#endif
+--- a/src/fenv/sh/fenv.s
++++ /dev/null
+@@ -1,74 +0,0 @@
+-.global fegetround
+-.type   fegetround, @function
+-fegetround:
+-	sts fpscr, r0
+-	rts
+-	 and #3, r0
+-
+-.global __fesetround
+-.type   __fesetround, @function
+-__fesetround:
+-	sts fpscr, r0
+-	or  r4, r0
+-	lds r0, fpscr
+-	rts
+-	 mov #0, r0
+-
+-.global fetestexcept
+-.type   fetestexcept, @function
+-fetestexcept:
+-	sts fpscr, r0
+-	and r4, r0
+-	rts
+-	 and #0x7c, r0
+-
+-.global feclearexcept
+-.type   feclearexcept, @function
+-feclearexcept:
+-	mov r4, r0
+-	and #0x7c, r0
+-	not r0, r4
+-	sts fpscr, r0
+-	and r4, r0
+-	lds r0, fpscr
+-	rts
+-	 mov #0, r0
+-
+-.global feraiseexcept
+-.type   feraiseexcept, @function
+-feraiseexcept:
+-	mov r4, r0
+-	and #0x7c, r0
+-	sts fpscr, r4
+-	or  r4, r0
+-	lds r0, fpscr
+-	rts
+-	 mov #0, r0
+-
+-.global fegetenv
+-.type   fegetenv, @function
+-fegetenv:
+-	sts fpscr, r0
+-	mov.l r0, @r4
+-	rts
+-	 mov #0, r0
+-
+-.global fesetenv
+-.type   fesetenv, @function
+-fesetenv:
+-	mov r4, r0
+-	cmp/eq #-1, r0
+-	bf 1f
+-
+-	! the default environment is complicated by the fact that we need to
+-	! preserve the current precision bit, which we do not know a priori
+-	sts fpscr, r0
+-	mov #8, r1
+-	swap.w r1, r1
+-	bra 2f
+-	 and r1, r0
+-
+-1:	mov.l @r4, r0      ! non-default environment
+-2:	lds r0, fpscr
+-	rts
+-	 mov #0, r0
+--- a/src/fenv/sheb-nofpu/fenv.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../fenv.c
+--- a/src/internal/arm/syscall.s
++++ b/src/internal/arm/syscall.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .global __syscall
+ .hidden __syscall
+ .type __syscall,%function
+@@ -11,6 +12,4 @@ __syscall:
+ 	ldmfd ip,{r3,r4,r5,r6}
+ 	svc 0
+ 	ldmfd sp!,{r4,r5,r6,r7}
+-	tst lr,#1
+-	moveq pc,lr
+ 	bx lr
+--- /dev/null
++++ b/src/internal/atomic.h
+@@ -0,0 +1,275 @@
++#ifndef _ATOMIC_H
++#define _ATOMIC_H
++
++#include <stdint.h>
++
++#include "atomic_arch.h"
++
++#ifdef a_ll
++
++#ifndef a_pre_llsc
++#define a_pre_llsc()
++#endif
++
++#ifndef a_post_llsc
++#define a_post_llsc()
++#endif
++
++#ifndef a_cas
++#define a_cas a_cas
++static inline int a_cas(volatile int *p, int t, int s)
++{
++	int old;
++	a_pre_llsc();
++	do old = a_ll(p);
++	while (old==t && !a_sc(p, s));
++	a_post_llsc();
++	return old;
++}
++#endif
++
++#ifndef a_swap
++#define a_swap a_swap
++static inline int a_swap(volatile int *p, int v)
++{
++	int old;
++	a_pre_llsc();
++	do old = a_ll(p);
++	while (!a_sc(p, v));
++	a_post_llsc();
++	return old;
++}
++#endif
++
++#ifndef a_fetch_add
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *p, int v)
++{
++	int old;
++	a_pre_llsc();
++	do old = a_ll(p);
++	while (!a_sc(p, (unsigned)old + v));
++	a_post_llsc();
++	return old;
++}
++#endif
++
++#ifndef a_fetch_and
++#define a_fetch_and a_fetch_and
++static inline int a_fetch_and(volatile int *p, int v)
++{
++	int old;
++	a_pre_llsc();
++	do old = a_ll(p);
++	while (!a_sc(p, old & v));
++	a_post_llsc();
++	return old;
++}
++#endif
++
++#ifndef a_fetch_or
++#define a_fetch_or a_fetch_or
++static inline int a_fetch_or(volatile int *p, int v)
++{
++	int old;
++	a_pre_llsc();
++	do old = a_ll(p);
++	while (!a_sc(p, old | v));
++	a_post_llsc();
++	return old;
++}
++#endif
++
++#endif
++
++#ifndef a_cas
++#error missing definition of a_cas
++#endif
++
++#ifndef a_swap
++#define a_swap a_swap
++static inline int a_swap(volatile int *p, int v)
++{
++	int old;
++	do old = *p;
++	while (a_cas(p, old, v) != old);
++	return old;
++}
++#endif
++
++#ifndef a_fetch_add
++#define a_fetch_add a_fetch_add
++static inline int a_fetch_add(volatile int *p, int v)
++{
++	int old;
++	do old = *p;
++	while (a_cas(p, old, (unsigned)old+v) != old);
++	return old;
++}
++#endif
++
++#ifndef a_fetch_and
++#define a_fetch_and a_fetch_and
++static inline int a_fetch_and(volatile int *p, int v)
++{
++	int old;
++	do old = *p;
++	while (a_cas(p, old, old&v) != old);
++	return old;
++}
++#endif
++#ifndef a_fetch_or
++#define a_fetch_or a_fetch_or
++static inline int a_fetch_or(volatile int *p, int v)
++{
++	int old;
++	do old = *p;
++	while (a_cas(p, old, old|v) != old);
++	return old;
++}
++#endif
++
++#ifndef a_and
++#define a_and a_and
++static inline void a_and(volatile int *p, int v)
++{
++	a_fetch_and(p, v);
++}
++#endif
++
++#ifndef a_or
++#define a_or a_or
++static inline void a_or(volatile int *p, int v)
++{
++	a_fetch_or(p, v);
++}
++#endif
++
++#ifndef a_inc
++#define a_inc a_inc
++static inline void a_inc(volatile int *p)
++{
++	a_fetch_add(p, 1);
++}
++#endif
++
++#ifndef a_dec
++#define a_dec a_dec
++static inline void a_dec(volatile int *p)
++{
++	a_fetch_add(p, -1);
++}
++#endif
++
++#ifndef a_store
++#define a_store a_store
++static inline void a_store(volatile int *p, int v)
++{
++#ifdef a_barrier
++	a_barrier();
++	*p = v;
++	a_barrier();
++#else
++	a_swap(p, v);
++#endif
++}
++#endif
++
++#ifndef a_barrier
++#define a_barrier a_barrier
++static void a_barrier()
++{
++	volatile int tmp = 0;
++	a_cas(&tmp, 0, 0);
++}
++#endif
++
++#ifndef a_spin
++#define a_spin a_barrier
++#endif
++
++#ifndef a_and_64
++#define a_and_64 a_and_64
++static inline void a_and_64(volatile uint64_t *p, uint64_t v)
++{
++	union { uint64_t v; uint32_t r[2]; } u = { v };
++	if (u.r[0]+1) a_and((int *)p, u.r[0]);
++	if (u.r[1]+1) a_and((int *)p+1, u.r[1]);
++}
++#endif
++
++#ifndef a_or_64
++#define a_or_64 a_or_64
++static inline void a_or_64(volatile uint64_t *p, uint64_t v)
++{
++	union { uint64_t v; uint32_t r[2]; } u = { v };
++	if (u.r[0]) a_or((int *)p, u.r[0]);
++	if (u.r[1]) a_or((int *)p+1, u.r[1]);
++}
++#endif
++
++#ifndef a_cas_p
++#define a_cas_p a_cas_p
++static inline void *a_cas_p(volatile void *p, void *t, void *s)
++{
++	return (void *)a_cas((volatile int *)p, (int)t, (int)s);
++}
++#endif
++
++#ifndef a_or_l
++#define a_or_l a_or_l
++static inline void a_or_l(volatile void *p, long v)
++{
++	if (sizeof(long) == sizeof(int)) a_or(p, v);
++	else a_or_64(p, v);
++}
++#endif
++
++#ifndef a_crash
++#define a_crash a_crash
++static inline void a_crash()
++{
++	*(volatile char *)0=0;
++}
++#endif
++
++#ifndef a_ctz_64
++#define a_ctz_64 a_ctz_64
++static inline int a_ctz_64(uint64_t x)
++{
++	static const char debruijn64[64] = {
++		0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
++		62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
++		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
++		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
++	};
++	static const char debruijn32[32] = {
++		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
++		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
++	};
++	if (sizeof(long) < 8) {
++		uint32_t y = x;
++		if (!y) {
++			y = x>>32;
++			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
++		}
++		return debruijn32[(y&-y)*0x076be629 >> 27];
++	}
++	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
++}
++#endif
++
++#ifndef a_ctz_l
++#define a_ctz_l a_ctz_l
++static inline int a_ctz_l(unsigned long x)
++{
++	static const char debruijn32[32] = {
++		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
++		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
++	};
++	if (sizeof(long) == 8) return a_ctz_64(x);
++	return debruijn32[(x&-x)*0x076be629 >> 27];
++}
++#endif
++
++#endif
+--- a/src/internal/dynlink.h
++++ b/src/internal/dynlink.h
+@@ -64,6 +64,10 @@ struct fdpic_dummy_loadmap {
+ #define DL_FDPIC 0
+ #endif
+ 
++#ifndef DL_NOMMU_SUPPORT
++#define DL_NOMMU_SUPPORT 0
++#endif
++
+ #if !DL_FDPIC
+ #define IS_RELATIVE(x,s) ( \
+ 	(R_TYPE(x) == REL_RELATIVE) || \
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -11,13 +11,20 @@ struct __locale_struct {
+ 	const struct __locale_map *volatile cat[6];
+ };
+ 
++struct tls_module {
++	struct tls_module *next;
++	void *image;
++	size_t len, size, align, offset;
++};
++
+ struct __libc {
+ 	int can_do_threads;
+ 	int threaded;
+ 	int secure;
+ 	volatile int threads_minus_1;
+ 	size_t *auxv;
+-	size_t tls_size;
++	struct tls_module *tls_head;
++	size_t tls_size, tls_align, tls_cnt;
+ 	size_t page_size;
+ 	struct __locale_struct global_locale;
+ };
+--- /dev/null
++++ b/src/internal/sh/__shcall.c
+@@ -0,0 +1,5 @@
++__attribute__((__visibility__("hidden")))
++int __shcall(void *arg, int (*func)(void *))
++{
++	return func(arg);
++}
+--- a/src/internal/syscall.h
++++ b/src/internal/syscall.h
+@@ -17,9 +17,7 @@
+ typedef long syscall_arg_t;
+ #endif
+ 
+-#ifdef SHARED
+ __attribute__((visibility("hidden")))
+-#endif
+ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
+ 	__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
+ 	             syscall_arg_t, syscall_arg_t, syscall_arg_t);
+@@ -65,7 +63,7 @@ long __syscall_ret(unsigned long), __sys
+ #define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__)
+ #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__))
+ 
+-#ifdef SYS_socket
++#ifndef SYSCALL_USE_SOCKETCALL
+ #define __socketcall(nm,a,b,c,d,e,f) syscall(SYS_##nm, a, b, c, d, e, f)
+ #define __socketcall_cp(nm,a,b,c,d,e,f) syscall_cp(SYS_##nm, a, b, c, d, e, f)
+ #else
+--- a/src/internal/version.c
++++ b/src/internal/version.c
+@@ -1,12 +1,9 @@
+-#ifdef SHARED
+-
+ #include "version.h"
+ 
+ static const char version[] = VERSION;
+ 
++__attribute__((__visibility__("hidden")))
+ const char *__libc_get_version()
+ {
+ 	return version;
+ }
+-
+-#endif
+--- a/src/internal/vis.h
++++ b/src/internal/vis.h
+@@ -4,10 +4,9 @@
+  * override default visibilities to reduce the size and performance costs
+  * of position-independent code. */
+ 
+-#ifndef CRT
+-#ifdef SHARED
++#if !defined(CRT) && !defined(__ASSEMBLER__)
+ 
+-/* For shared libc.so, all symbols should be protected, but some toolchains
++/* Conceptually, all symbols should be protected, but some toolchains
+  * fail to support copy relocations for protected data, so exclude all
+  * exported data symbols. */
+ 
+@@ -25,16 +24,4 @@ extern char *optarg, **environ, **__envi
+ 
+ #pragma GCC visibility push(protected)
+ 
+-#elif defined(__PIC__)
+-
+-/* If building static libc.a as position-independent code, try to make
+- * everything hidden except possibly-undefined weak references. */
+-
+-__attribute__((__visibility__("default")))
+-extern void (*const __init_array_start)(), (*const __init_array_end)(),
+-	(*const __fini_array_start)(), (*const __fini_array_end)();
+-
+-#pragma GCC visibility push(hidden)
+-
+-#endif
+ #endif
+--- /dev/null
++++ b/src/ldso/__dlsym.c
+@@ -0,0 +1,13 @@
++#include <dlfcn.h>
++#include "libc.h"
++
++__attribute__((__visibility__("hidden")))
++void __dl_seterr(const char *, ...);
++
++static void *stub_dlsym(void *restrict p, const char *restrict s, void *restrict ra)
++{
++	__dl_seterr("Symbol not found: %s", s);
++	return 0;
++}
++
++weak_alias(stub_dlsym, __dlsym);
+--- a/src/ldso/arm/dlsym.s
++++ b/src/ldso/arm/dlsym.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .text
+ .global dlsym
+ .hidden __dlsym
+--- /dev/null
++++ b/src/ldso/arm/find_exidx.c
+@@ -0,0 +1,42 @@
++#define _GNU_SOURCE
++#include <link.h>
++#include <stdint.h>
++
++struct find_exidx_data {
++	uintptr_t pc, exidx_start;
++	int exidx_len;
++};
++
++static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
++{
++	struct find_exidx_data *data = ptr;
++	const ElfW(Phdr) *phdr = info->dlpi_phdr;
++	uintptr_t addr, exidx_start = 0;
++	int i, match = 0, exidx_len = 0;
++
++	for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
++		addr = info->dlpi_addr + phdr->p_vaddr;
++		switch (phdr->p_type) {
++		case PT_LOAD:
++			match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
++			break;
++		case PT_ARM_EXIDX:
++			exidx_start = addr;
++			exidx_len = phdr->p_memsz;
++			break;
++		}
++	}
++	data->exidx_start = exidx_start;
++	data->exidx_len = exidx_len;
++	return match;
++}
++
++uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
++{
++	struct find_exidx_data data;
++	data.pc = pc;
++	if (dl_iterate_phdr(find_exidx, &data) <= 0)
++		return 0;
++	*pcount = data.exidx_len / 8;
++	return data.exidx_start;
++}
+--- a/src/ldso/dl_iterate_phdr.c
++++ b/src/ldso/dl_iterate_phdr.c
+@@ -1,12 +1,10 @@
+-#ifndef SHARED
+-
+ #include <elf.h>
+ #include <link.h>
+ #include "libc.h"
+ 
+ #define AUX_CNT 38
+ 
+-int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
++static int static_dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
+ {
+ 	unsigned char *p;
+ 	ElfW(Phdr) *phdr, *tls_phdr=0;
+@@ -40,4 +38,5 @@ int dl_iterate_phdr(int(*callback)(struc
+ 	}
+ 	return (callback)(&info, sizeof (info), data);
+ }
+-#endif
++
++weak_alias(static_dl_iterate_phdr, dl_iterate_phdr);
+--- a/src/ldso/dladdr.c
++++ b/src/ldso/dladdr.c
+@@ -1,9 +1,10 @@
+ #define _GNU_SOURCE
+ #include <dlfcn.h>
++#include "libc.h"
+ 
+-int __dladdr(const void *, Dl_info *);
+-
+-int dladdr(const void *addr, Dl_info *info)
++static int stub_dladdr(const void *addr, Dl_info *info)
+ {
+-	return __dladdr(addr, info);
++	return 0;
+ }
++
++weak_alias(stub_dladdr, dladdr);
+--- /dev/null
++++ b/src/ldso/dlclose.c
+@@ -0,0 +1,9 @@
++#include <dlfcn.h>
++
++__attribute__((__visibility__("hidden")))
++int __dl_invalid_handle(void *);
++
++int dlclose(void *p)
++{
++	return __dl_invalid_handle(p);
++}
+--- /dev/null
++++ b/src/ldso/dlerror.c
+@@ -0,0 +1,64 @@
++#include <dlfcn.h>
++#include <stdlib.h>
++#include <stdarg.h>
++#include "pthread_impl.h"
++#include "libc.h"
++
++char *dlerror()
++{
++	pthread_t self = __pthread_self();
++	if (!self->dlerror_flag) return 0;
++	self->dlerror_flag = 0;
++	char *s = self->dlerror_buf;
++	if (s == (void *)-1)
++		return "Dynamic linker failed to allocate memory for error message";
++	else
++		return s;
++}
++
++void __dl_thread_cleanup(void)
++{
++	pthread_t self = __pthread_self();
++	if (self->dlerror_buf != (void *)-1)
++		free(self->dlerror_buf);
++}
++
++__attribute__((__visibility__("hidden")))
++void __dl_vseterr(const char *fmt, va_list ap)
++{
++	va_list ap2;
++	va_copy(ap2, ap);
++	pthread_t self = __pthread_self();
++	if (self->dlerror_buf != (void *)-1)
++		free(self->dlerror_buf);
++	size_t len = vsnprintf(0, 0, fmt, ap2);
++	va_end(ap2);
++	char *buf = malloc(len+1);
++	if (buf) {
++		vsnprintf(buf, len+1, fmt, ap);
++	} else {
++		buf = (void *)-1;	
++	}
++	self->dlerror_buf = buf;
++	self->dlerror_flag = 1;
++}
++
++__attribute__((__visibility__("hidden")))
++void __dl_seterr(const char *fmt, ...)
++{
++	va_list ap;
++	va_start(ap, fmt);
++	__dl_vseterr(fmt, ap);
++	va_end(ap);
++}
++
++__attribute__((__visibility__("hidden")))
++int __dl_invalid_handle(void *);
++
++static int stub_invalid_handle(void *h)
++{
++	__dl_seterr("Invalid library handle %p", (void *)h);
++	return 1;
++}
++
++weak_alias(stub_invalid_handle, __dl_invalid_handle);
+--- a/src/ldso/dlinfo.c
++++ b/src/ldso/dlinfo.c
+@@ -1,9 +1,19 @@
+ #define _GNU_SOURCE
+ #include <dlfcn.h>
+ 
+-int __dlinfo(void *, int, void *);
++__attribute__((__visibility__("hidden")))
++int __dl_invalid_handle(void *);
++
++__attribute__((__visibility__("hidden")))
++void __dl_seterr(const char *, ...);
+ 
+ int dlinfo(void *dso, int req, void *res)
+ {
+-	return __dlinfo(dso, req, res);
++	if (__dl_invalid_handle(dso)) return -1;
++	if (req != RTLD_DI_LINKMAP) {
++		__dl_seterr("Unsupported request %d", req);
++		return -1;
++	}
++	*(struct link_map **)res = dso;
++	return 0;
+ }
+--- /dev/null
++++ b/src/ldso/dlopen.c
+@@ -0,0 +1,13 @@
++#include <dlfcn.h>
++#include "libc.h"
++
++__attribute__((__visibility__("hidden")))
++void __dl_seterr(const char *, ...);
++
++static void *stub_dlopen(const char *file, int mode)
++{
++	__dl_seterr("Dynamic loading not supported");
++	return 0;
++}
++
++weak_alias(stub_dlopen, dlopen);
+--- a/src/ldso/dlstart.c
++++ /dev/null
+@@ -1,150 +0,0 @@
+-#include <stddef.h>
+-#include "dynlink.h"
+-
+-#ifdef SHARED
+-
+-#ifndef START
+-#define START "_dlstart"
+-#endif
+-
+-#include "crt_arch.h"
+-
+-#ifndef GETFUNCSYM
+-#define GETFUNCSYM(fp, sym, got) do { \
+-	__attribute__((__visibility__("hidden"))) void sym(); \
+-	static void (*static_func_ptr)() = sym; \
+-	__asm__ __volatile__ ( "" : "+m"(static_func_ptr) : : "memory"); \
+-	*(fp) = static_func_ptr; } while(0)
+-#endif
+-
+-__attribute__((__visibility__("hidden")))
+-void _dlstart_c(size_t *sp, size_t *dynv)
+-{
+-	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
+-	size_t *rel, rel_size, base;
+-
+-	int argc = *sp;
+-	char **argv = (void *)(sp+1);
+-
+-	for (i=argc+1; argv[i]; i++);
+-	size_t *auxv = (void *)(argv+i+1);
+-
+-	for (i=0; i<AUX_CNT; i++) aux[i] = 0;
+-	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT)
+-		aux[auxv[i]] = auxv[i+1];
+-
+-#if DL_FDPIC
+-	struct fdpic_loadseg *segs, fakeseg;
+-	size_t j;
+-	if (dynv) {
+-		/* crt_arch.h entry point asm is responsible for reserving
+-		 * space and moving the extra fdpic arguments to the stack
+-		 * vector where they are easily accessible from C. */
+-		segs = ((struct fdpic_loadmap *)(sp[-1] ? sp[-1] : sp[-2]))->segs;
+-	} else {
+-		/* If dynv is null, the entry point was started from loader
+-		 * that is not fdpic-aware. We can assume normal fixed-
+-		 * displacement ELF loading was performed, but when ldso was
+-		 * run as a command, finding the Ehdr is a heursitic: we
+-		 * have to assume Phdrs start in the first 4k of the file. */
+-		base = aux[AT_BASE];
+-		if (!base) base = aux[AT_PHDR] & -4096;
+-		segs = &fakeseg;
+-		segs[0].addr = base;
+-		segs[0].p_vaddr = 0;
+-		segs[0].p_memsz = -1;
+-		Ehdr *eh = (void *)base;
+-		Phdr *ph = (void *)(base + eh->e_phoff);
+-		size_t phnum = eh->e_phnum;
+-		size_t phent = eh->e_phentsize;
+-		while (phnum-- && ph->p_type != PT_DYNAMIC)
+-			ph = (void *)((size_t)ph + phent);
+-		dynv = (void *)(base + ph->p_vaddr);
+-	}
+-#endif
+-
+-	for (i=0; i<DYN_CNT; i++) dyn[i] = 0;
+-	for (i=0; dynv[i]; i+=2) if (dynv[i]<DYN_CNT)
+-		dyn[dynv[i]] = dynv[i+1];
+-
+-#if DL_FDPIC
+-	for (i=0; i<DYN_CNT; i++) {
+-		if (i==DT_RELASZ || i==DT_RELSZ) continue;
+-		if (!dyn[i]) continue;
+-		for (j=0; dyn[i]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
+-		dyn[i] += segs[j].addr - segs[j].p_vaddr;
+-	}
+-	base = 0;
+-
+-	const Sym *syms = (void *)dyn[DT_SYMTAB];
+-
+-	rel = (void *)dyn[DT_RELA];
+-	rel_size = dyn[DT_RELASZ];
+-	for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
+-		if (!IS_RELATIVE(rel[1], syms)) continue;
+-		for (j=0; rel[0]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
+-		size_t *rel_addr = (void *)
+-			(rel[0] + segs[j].addr - segs[j].p_vaddr);
+-		if (R_TYPE(rel[1]) == REL_FUNCDESC_VAL) {
+-			*rel_addr += segs[rel_addr[1]].addr
+-				- segs[rel_addr[1]].p_vaddr
+-				+ syms[R_SYM(rel[1])].st_value;
+-			rel_addr[1] = dyn[DT_PLTGOT];
+-		} else {
+-			size_t val = syms[R_SYM(rel[1])].st_value;
+-			for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
+-			*rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
+-		}
+-	}
+-#else
+-	/* If the dynamic linker is invoked as a command, its load
+-	 * address is not available in the aux vector. Instead, compute
+-	 * the load address as the difference between &_DYNAMIC and the
+-	 * virtual address in the PT_DYNAMIC program header. */
+-	base = aux[AT_BASE];
+-	if (!base) {
+-		size_t phnum = aux[AT_PHNUM];
+-		size_t phentsize = aux[AT_PHENT];
+-		Phdr *ph = (void *)aux[AT_PHDR];
+-		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
+-			if (ph->p_type == PT_DYNAMIC) {
+-				base = (size_t)dynv - ph->p_vaddr;
+-				break;
+-			}
+-		}
+-	}
+-
+-	/* MIPS uses an ugly packed form for GOT relocations. Since we
+-	 * can't make function calls yet and the code is tiny anyway,
+-	 * it's simply inlined here. */
+-	if (NEED_MIPS_GOT_RELOCS) {
+-		size_t local_cnt = 0;
+-		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
+-		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
+-			local_cnt = dynv[i+1];
+-		for (i=0; i<local_cnt; i++) got[i] += base;
+-	}
+-
+-	rel = (void *)(base+dyn[DT_REL]);
+-	rel_size = dyn[DT_RELSZ];
+-	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
+-		if (!IS_RELATIVE(rel[1], 0)) continue;
+-		size_t *rel_addr = (void *)(base + rel[0]);
+-		*rel_addr += base;
+-	}
+-
+-	rel = (void *)(base+dyn[DT_RELA]);
+-	rel_size = dyn[DT_RELASZ];
+-	for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
+-		if (!IS_RELATIVE(rel[1], 0)) continue;
+-		size_t *rel_addr = (void *)(base + rel[0]);
+-		*rel_addr = base + rel[2];
+-	}
+-#endif
+-
+-	stage2_func dls2;
+-	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
+-	dls2((void *)base, sp);
+-}
+-
+-#endif
+--- a/src/ldso/dynlink.c
++++ /dev/null
+@@ -1,2000 +0,0 @@
+-#define _GNU_SOURCE
+-#include <stdio.h>
+-#include <stdlib.h>
+-#include <stdarg.h>
+-#include <stddef.h>
+-#include <string.h>
+-#include <unistd.h>
+-#include <stdint.h>
+-#include <elf.h>
+-#include <sys/mman.h>
+-#include <limits.h>
+-#include <fcntl.h>
+-#include <sys/stat.h>
+-#include <errno.h>
+-#include <link.h>
+-#include <setjmp.h>
+-#include <pthread.h>
+-#include <ctype.h>
+-#include <dlfcn.h>
+-#include "pthread_impl.h"
+-#include "libc.h"
+-#include "dynlink.h"
+-
+-static void error(const char *, ...);
+-
+-#ifdef SHARED
+-
+-#define MAXP2(a,b) (-(-(a)&-(b)))
+-#define ALIGN(x,y) ((x)+(y)-1 & -(y))
+-
+-struct debug {
+-	int ver;
+-	void *head;
+-	void (*bp)(void);
+-	int state;
+-	void *base;
+-};
+-
+-struct td_index {
+-	size_t args[2];
+-	struct td_index *next;
+-};
+-
+-struct dso {
+-#if DL_FDPIC
+-	struct fdpic_loadmap *loadmap;
+-#else
+-	unsigned char *base;
+-#endif
+-	char *name;
+-	size_t *dynv;
+-	struct dso *next, *prev;
+-
+-	Phdr *phdr;
+-	int phnum;
+-	size_t phentsize;
+-	int refcnt;
+-	Sym *syms;
+-	uint32_t *hashtab;
+-	uint32_t *ghashtab;
+-	int16_t *versym;
+-	char *strings;
+-	unsigned char *map;
+-	size_t map_len;
+-	dev_t dev;
+-	ino_t ino;
+-	signed char global;
+-	char relocated;
+-	char constructed;
+-	char kernel_mapped;
+-	struct dso **deps, *needed_by;
+-	char *rpath_orig, *rpath;
+-	void *tls_image;
+-	size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+-	size_t relro_start, relro_end;
+-	void **new_dtv;
+-	unsigned char *new_tls;
+-	volatile int new_dtv_idx, new_tls_idx;
+-	struct td_index *td_index;
+-	struct dso *fini_next;
+-	char *shortname;
+-#if DL_FDPIC
+-	unsigned char *base;
+-#else
+-	struct fdpic_loadmap *loadmap;
+-#endif
+-	struct funcdesc {
+-		void *addr;
+-		size_t *got;
+-	} *funcdescs;
+-	size_t *got;
+-	char buf[];
+-};
+-
+-struct symdef {
+-	Sym *sym;
+-	struct dso *dso;
+-};
+-
+-int __init_tp(void *);
+-void __init_libc(char **, char *);
+-
+-const char *__libc_get_version(void);
+-
+-static struct builtin_tls {
+-	char c;
+-	struct pthread pt;
+-	void *space[16];
+-} builtin_tls[1];
+-#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
+-
+-#define ADDEND_LIMIT 4096
+-static size_t *saved_addends, *apply_addends_to;
+-
+-static struct dso ldso;
+-static struct dso *head, *tail, *fini_head;
+-static char *env_path, *sys_path;
+-static unsigned long long gencnt;
+-static int runtime;
+-static int ldd_mode;
+-static int ldso_fail;
+-static int noload;
+-static jmp_buf *rtld_fail;
+-static pthread_rwlock_t lock;
+-static struct debug debug;
+-static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
+-static size_t static_tls_cnt;
+-static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+-static struct fdpic_loadmap *app_loadmap;
+-static struct fdpic_dummy_loadmap app_dummy_loadmap;
+-
+-struct debug *_dl_debug_addr = &debug;
+-
+-static int dl_strcmp(const char *l, const char *r)
+-{
+-	for (; *l==*r && *l; l++, r++);
+-	return *(unsigned char *)l - *(unsigned char *)r;
+-}
+-#define strcmp(l,r) dl_strcmp(l,r)
+-
+-/* Compute load address for a virtual address in a given dso. */
+-#if DL_FDPIC
+-static void *laddr(const struct dso *p, size_t v)
+-{
+-	size_t j=0;
+-	if (!p->loadmap) return p->base + v;
+-	for (j=0; v-p->loadmap->segs[j].p_vaddr >= p->loadmap->segs[j].p_memsz; j++);
+-	return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
+-}
+-#define fpaddr(p, v) ((void (*)())&(struct funcdesc){ \
+-	laddr(p, v), (p)->got })
+-#else
+-#define laddr(p, v) (void *)((p)->base + (v))
+-#define fpaddr(p, v) ((void (*)())laddr(p, v))
+-#endif
+-
+-static void decode_vec(size_t *v, size_t *a, size_t cnt)
+-{
+-	size_t i;
+-	for (i=0; i<cnt; i++) a[i] = 0;
+-	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
+-		a[0] |= 1UL<<v[0];
+-		a[v[0]] = v[1];
+-	}
+-}
+-
+-static int search_vec(size_t *v, size_t *r, size_t key)
+-{
+-	for (; v[0]!=key; v+=2)
+-		if (!v[0]) return 0;
+-	*r = v[1];
+-	return 1;
+-}
+-
+-static uint32_t sysv_hash(const char *s0)
+-{
+-	const unsigned char *s = (void *)s0;
+-	uint_fast32_t h = 0;
+-	while (*s) {
+-		h = 16*h + *s++;
+-		h ^= h>>24 & 0xf0;
+-	}
+-	return h & 0xfffffff;
+-}
+-
+-static uint32_t gnu_hash(const char *s0)
+-{
+-	const unsigned char *s = (void *)s0;
+-	uint_fast32_t h = 5381;
+-	for (; *s; s++)
+-		h += h*32 + *s;
+-	return h;
+-}
+-
+-static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
+-{
+-	size_t i;
+-	Sym *syms = dso->syms;
+-	uint32_t *hashtab = dso->hashtab;
+-	char *strings = dso->strings;
+-	for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
+-		if ((!dso->versym || dso->versym[i] >= 0)
+-		    && (!strcmp(s, strings+syms[i].st_name)))
+-			return syms+i;
+-	}
+-	return 0;
+-}
+-
+-static Sym *gnu_lookup(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s)
+-{
+-	uint32_t nbuckets = hashtab[0];
+-	uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
+-	uint32_t i = buckets[h1 % nbuckets];
+-
+-	if (!i) return 0;
+-
+-	uint32_t *hashval = buckets + nbuckets + (i - hashtab[1]);
+-
+-	for (h1 |= 1; ; i++) {
+-		uint32_t h2 = *hashval++;
+-		if ((h1 == (h2|1)) && (!dso->versym || dso->versym[i] >= 0)
+-		    && !strcmp(s, dso->strings + dso->syms[i].st_name))
+-			return dso->syms+i;
+-		if (h2 & 1) break;
+-	}
+-
+-	return 0;
+-}
+-
+-static Sym *gnu_lookup_filtered(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s, uint32_t fofs, size_t fmask)
+-{
+-	const size_t *bloomwords = (const void *)(hashtab+4);
+-	size_t f = bloomwords[fofs & (hashtab[2]-1)];
+-	if (!(f & fmask)) return 0;
+-
+-	f >>= (h1 >> hashtab[3]) % (8 * sizeof f);
+-	if (!(f & 1)) return 0;
+-
+-	return gnu_lookup(h1, hashtab, dso, s);
+-}
+-
+-#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
+-#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
+-
+-#ifndef ARCH_SYM_REJECT_UND
+-#define ARCH_SYM_REJECT_UND(s) 0
+-#endif
+-
+-static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
+-{
+-	uint32_t h = 0, gh, gho, *ght;
+-	size_t ghm = 0;
+-	struct symdef def = {0};
+-	for (; dso; dso=dso->next) {
+-		Sym *sym;
+-		if (!dso->global) continue;
+-		if ((ght = dso->ghashtab)) {
+-			if (!ghm) {
+-				gh = gnu_hash(s);
+-				int maskbits = 8 * sizeof ghm;
+-				gho = gh / maskbits;
+-				ghm = 1ul << gh % maskbits;
+-			}
+-			sym = gnu_lookup_filtered(gh, ght, dso, s, gho, ghm);
+-		} else {
+-			if (!h) h = sysv_hash(s);
+-			sym = sysv_lookup(s, h, dso);
+-		}
+-		if (!sym) continue;
+-		if (!sym->st_shndx)
+-			if (need_def || (sym->st_info&0xf) == STT_TLS
+-			    || ARCH_SYM_REJECT_UND(sym))
+-				continue;
+-		if (!sym->st_value)
+-			if ((sym->st_info&0xf) != STT_TLS)
+-				continue;
+-		if (!(1<<(sym->st_info&0xf) & OK_TYPES)) continue;
+-		if (!(1<<(sym->st_info>>4) & OK_BINDS)) continue;
+-
+-		if (def.sym && sym->st_info>>4 == STB_WEAK) continue;
+-		def.sym = sym;
+-		def.dso = dso;
+-		if (sym->st_info>>4 == STB_GLOBAL) break;
+-	}
+-	return def;
+-}
+-
+-__attribute__((__visibility__("hidden")))
+-ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
+-
+-static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
+-{
+-	unsigned char *base = dso->base;
+-	Sym *syms = dso->syms;
+-	char *strings = dso->strings;
+-	Sym *sym;
+-	const char *name;
+-	void *ctx;
+-	int type;
+-	int sym_index;
+-	struct symdef def;
+-	size_t *reloc_addr;
+-	size_t sym_val;
+-	size_t tls_val;
+-	size_t addend;
+-	int skip_relative = 0, reuse_addends = 0, save_slot = 0;
+-
+-	if (dso == &ldso) {
+-		/* Only ldso's REL table needs addend saving/reuse. */
+-		if (rel == apply_addends_to)
+-			reuse_addends = 1;
+-		skip_relative = 1;
+-	}
+-
+-	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+-		if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
+-		type = R_TYPE(rel[1]);
+-		if (type == REL_NONE) continue;
+-		sym_index = R_SYM(rel[1]);
+-		reloc_addr = laddr(dso, rel[0]);
+-		if (sym_index) {
+-			sym = syms + sym_index;
+-			name = strings + sym->st_name;
+-			ctx = type==REL_COPY ? head->next : head;
+-			def = (sym->st_info&0xf) == STT_SECTION
+-				? (struct symdef){ .dso = dso, .sym = sym }
+-				: find_sym(ctx, name, type==REL_PLT);
+-			if (!def.sym && (sym->st_shndx != SHN_UNDEF
+-			    || sym->st_info>>4 != STB_WEAK)) {
+-				error("Error relocating %s: %s: symbol not found",
+-					dso->name, name);
+-				if (runtime) longjmp(*rtld_fail, 1);
+-				continue;
+-			}
+-		} else {
+-			sym = 0;
+-			def.sym = 0;
+-			def.dso = dso;
+-		}
+-
+-		if (stride > 2) {
+-			addend = rel[2];
+-		} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
+-			addend = 0;
+-		} else if (reuse_addends) {
+-			/* Save original addend in stage 2 where the dso
+-			 * chain consists of just ldso; otherwise read back
+-			 * saved addend since the inline one was clobbered. */
+-			if (head==&ldso)
+-				saved_addends[save_slot] = *reloc_addr;
+-			addend = saved_addends[save_slot++];
+-		} else {
+-			addend = *reloc_addr;
+-		}
+-
+-		sym_val = def.sym ? (size_t)laddr(def.dso, def.sym->st_value) : 0;
+-		tls_val = def.sym ? def.sym->st_value : 0;
+-
+-		switch(type) {
+-		case REL_NONE:
+-			break;
+-		case REL_OFFSET:
+-			addend -= (size_t)reloc_addr;
+-		case REL_SYMBOLIC:
+-		case REL_GOT:
+-		case REL_PLT:
+-			*reloc_addr = sym_val + addend;
+-			break;
+-		case REL_RELATIVE:
+-			*reloc_addr = (size_t)base + addend;
+-			break;
+-		case REL_SYM_OR_REL:
+-			if (sym) *reloc_addr = sym_val + addend;
+-			else *reloc_addr = (size_t)base + addend;
+-			break;
+-		case REL_COPY:
+-			memcpy(reloc_addr, (void *)sym_val, sym->st_size);
+-			break;
+-		case REL_OFFSET32:
+-			*(uint32_t *)reloc_addr = sym_val + addend
+-				- (size_t)reloc_addr;
+-			break;
+-		case REL_FUNCDESC:
+-			*reloc_addr = def.sym ? (size_t)(def.dso->funcdescs
+-				+ (def.sym - def.dso->syms)) : 0;
+-			break;
+-		case REL_FUNCDESC_VAL:
+-			if ((sym->st_info&0xf) == STT_SECTION) *reloc_addr += sym_val;
+-			else *reloc_addr = sym_val;
+-			reloc_addr[1] = def.sym ? (size_t)def.dso->got : 0;
+-			break;
+-		case REL_DTPMOD:
+-			*reloc_addr = def.dso->tls_id;
+-			break;
+-		case REL_DTPOFF:
+-			*reloc_addr = tls_val + addend - DTP_OFFSET;
+-			break;
+-#ifdef TLS_ABOVE_TP
+-		case REL_TPOFF:
+-			*reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
+-			break;
+-#else
+-		case REL_TPOFF:
+-			*reloc_addr = tls_val - def.dso->tls_offset + addend;
+-			break;
+-		case REL_TPOFF_NEG:
+-			*reloc_addr = def.dso->tls_offset - tls_val + addend;
+-			break;
+-#endif
+-		case REL_TLSDESC:
+-			if (stride<3) addend = reloc_addr[1];
+-			if (runtime && def.dso->tls_id >= static_tls_cnt) {
+-				struct td_index *new = malloc(sizeof *new);
+-				if (!new) {
+-					error(
+-					"Error relocating %s: cannot allocate TLSDESC for %s",
+-					dso->name, sym ? name : "(local)" );
+-					longjmp(*rtld_fail, 1);
+-				}
+-				new->next = dso->td_index;
+-				dso->td_index = new;
+-				new->args[0] = def.dso->tls_id;
+-				new->args[1] = tls_val + addend;
+-				reloc_addr[0] = (size_t)__tlsdesc_dynamic;
+-				reloc_addr[1] = (size_t)new;
+-			} else {
+-				reloc_addr[0] = (size_t)__tlsdesc_static;
+-#ifdef TLS_ABOVE_TP
+-				reloc_addr[1] = tls_val + def.dso->tls_offset
+-					+ TPOFF_K + addend;
+-#else
+-				reloc_addr[1] = tls_val - def.dso->tls_offset
+-					+ addend;
+-#endif
+-			}
+-			break;
+-		default:
+-			error("Error relocating %s: unsupported relocation type %d",
+-				dso->name, type);
+-			if (runtime) longjmp(*rtld_fail, 1);
+-			continue;
+-		}
+-	}
+-}
+-
+-/* A huge hack: to make up for the wastefulness of shared libraries
+- * needing at least a page of dirty memory even if they have no global
+- * data, we reclaim the gaps at the beginning and end of writable maps
+- * and "donate" them to the heap by setting up minimal malloc
+- * structures and then freeing them. */
+-
+-static void reclaim(struct dso *dso, size_t start, size_t end)
+-{
+-	size_t *a, *z;
+-	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
+-	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
+-	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
+-	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
+-	if (start>end || end-start < 4*sizeof(size_t)) return;
+-	a = laddr(dso, start);
+-	z = laddr(dso, end);
+-	a[-2] = 1;
+-	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
+-	z[1] = 1;
+-	free(a);
+-}
+-
+-static void reclaim_gaps(struct dso *dso)
+-{
+-	Phdr *ph = dso->phdr;
+-	size_t phcnt = dso->phnum;
+-
+-	if (DL_FDPIC) return; // FIXME
+-	for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
+-		if (ph->p_type!=PT_LOAD) continue;
+-		if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
+-		reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
+-		reclaim(dso, ph->p_vaddr+ph->p_memsz,
+-			ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
+-	}
+-}
+-
+-static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
+-{
+-	char *q = mmap(p, n, prot, flags, fd, off);
+-	if (q != MAP_FAILED || errno != EINVAL) return q;
+-	/* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
+-	if (flags & MAP_ANONYMOUS) {
+-		memset(p, 0, n);
+-		return p;
+-	}
+-	ssize_t r;
+-	if (lseek(fd, off, SEEK_SET) < 0) return MAP_FAILED;
+-	for (q=p; n; q+=r, off+=r, n-=r) {
+-		r = read(fd, q, n);
+-		if (r < 0 && errno != EINTR) return MAP_FAILED;
+-		if (!r) {
+-			memset(q, 0, n);
+-			break;
+-		}
+-	}
+-	return p;
+-}
+-
+-static void unmap_library(struct dso *dso)
+-{
+-	if (dso->loadmap) {
+-		size_t i;
+-		for (i=0; i<dso->loadmap->nsegs; i++) {
+-			if (!dso->loadmap->segs[i].p_memsz)
+-				continue;
+-			munmap((void *)dso->loadmap->segs[i].addr,
+-				dso->loadmap->segs[i].p_memsz);
+-		}
+-		free(dso->loadmap);
+-	} else if (dso->map && dso->map_len) {
+-		munmap(dso->map, dso->map_len);
+-	}
+-}
+-
+-static void *map_library(int fd, struct dso *dso)
+-{
+-	Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
+-	void *allocated_buf=0;
+-	size_t phsize;
+-	size_t addr_min=SIZE_MAX, addr_max=0, map_len;
+-	size_t this_min, this_max;
+-	size_t nsegs = 0;
+-	off_t off_start;
+-	Ehdr *eh;
+-	Phdr *ph, *ph0;
+-	unsigned prot;
+-	unsigned char *map=MAP_FAILED, *base;
+-	size_t dyn=0;
+-	size_t tls_image=0;
+-	size_t i;
+-
+-	ssize_t l = read(fd, buf, sizeof buf);
+-	eh = buf;
+-	if (l<0) return 0;
+-	if (l<sizeof *eh || (eh->e_type != ET_DYN && eh->e_type != ET_EXEC))
+-		goto noexec;
+-	phsize = eh->e_phentsize * eh->e_phnum;
+-	if (phsize > sizeof buf - sizeof *eh) {
+-		allocated_buf = malloc(phsize);
+-		if (!allocated_buf) return 0;
+-		l = pread(fd, allocated_buf, phsize, eh->e_phoff);
+-		if (l < 0) goto error;
+-		if (l != phsize) goto noexec;
+-		ph = ph0 = allocated_buf;
+-	} else if (eh->e_phoff + phsize > l) {
+-		l = pread(fd, buf+1, phsize, eh->e_phoff);
+-		if (l < 0) goto error;
+-		if (l != phsize) goto noexec;
+-		ph = ph0 = (void *)(buf + 1);
+-	} else {
+-		ph = ph0 = (void *)((char *)buf + eh->e_phoff);
+-	}
+-	for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
+-		if (ph->p_type == PT_DYNAMIC) {
+-			dyn = ph->p_vaddr;
+-		} else if (ph->p_type == PT_TLS) {
+-			tls_image = ph->p_vaddr;
+-			dso->tls_align = ph->p_align;
+-			dso->tls_len = ph->p_filesz;
+-			dso->tls_size = ph->p_memsz;
+-		} else if (ph->p_type == PT_GNU_RELRO) {
+-			dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
+-			dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
+-		}
+-		if (ph->p_type != PT_LOAD) continue;
+-		nsegs++;
+-		if (ph->p_vaddr < addr_min) {
+-			addr_min = ph->p_vaddr;
+-			off_start = ph->p_offset;
+-			prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
+-				((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
+-				((ph->p_flags&PF_X) ? PROT_EXEC : 0));
+-		}
+-		if (ph->p_vaddr+ph->p_memsz > addr_max) {
+-			addr_max = ph->p_vaddr+ph->p_memsz;
+-		}
+-	}
+-	if (!dyn) goto noexec;
+-	if (DL_FDPIC && !(eh->e_flags & FDPIC_CONSTDISP_FLAG)) {
+-		dso->loadmap = calloc(1, sizeof *dso->loadmap
+-			+ nsegs * sizeof *dso->loadmap->segs);
+-		if (!dso->loadmap) goto error;
+-		dso->loadmap->nsegs = nsegs;
+-		for (ph=ph0, i=0; i<nsegs; ph=(void *)((char *)ph+eh->e_phentsize)) {
+-			if (ph->p_type != PT_LOAD) continue;
+-			prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
+-				((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
+-				((ph->p_flags&PF_X) ? PROT_EXEC : 0));
+-			map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
+-				prot, (prot&PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED,
+-				fd, ph->p_offset & -PAGE_SIZE);
+-			if (map == MAP_FAILED) {
+-				unmap_library(dso);
+-				goto error;
+-			}
+-			dso->loadmap->segs[i].addr = (size_t)map +
+-				(ph->p_vaddr & PAGE_SIZE-1);
+-			dso->loadmap->segs[i].p_vaddr = ph->p_vaddr;
+-			dso->loadmap->segs[i].p_memsz = ph->p_memsz;
+-			i++;
+-		}
+-		map = (void *)dso->loadmap->segs[0].addr;
+-		map_len = 0;
+-		goto done_mapping;
+-	}
+-	addr_max += PAGE_SIZE-1;
+-	addr_max &= -PAGE_SIZE;
+-	addr_min &= -PAGE_SIZE;
+-	off_start &= -PAGE_SIZE;
+-	map_len = addr_max - addr_min + off_start;
+-	/* The first time, we map too much, possibly even more than
+-	 * the length of the file. This is okay because we will not
+-	 * use the invalid part; we just need to reserve the right
+-	 * amount of virtual address space to map over later. */
+-	map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start);
+-	if (map==MAP_FAILED) goto error;
+-	dso->map = map;
+-	dso->map_len = map_len;
+-	/* If the loaded file is not relocatable and the requested address is
+-	 * not available, then the load operation must fail. */
+-	if (eh->e_type != ET_DYN && addr_min && map!=(void *)addr_min) {
+-		errno = EBUSY;
+-		goto error;
+-	}
+-	base = map - addr_min;
+-	dso->phdr = 0;
+-	dso->phnum = 0;
+-	for (ph=ph0, i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
+-		if (ph->p_type != PT_LOAD) continue;
+-		/* Check if the programs headers are in this load segment, and
+-		 * if so, record the address for use by dl_iterate_phdr. */
+-		if (!dso->phdr && eh->e_phoff >= ph->p_offset
+-		    && eh->e_phoff+phsize <= ph->p_offset+ph->p_filesz) {
+-			dso->phdr = (void *)(base + ph->p_vaddr
+-				+ (eh->e_phoff-ph->p_offset));
+-			dso->phnum = eh->e_phnum;
+-			dso->phentsize = eh->e_phentsize;
+-		}
+-		/* Reuse the existing mapping for the lowest-address LOAD */
+-		if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue;
+-		this_min = ph->p_vaddr & -PAGE_SIZE;
+-		this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE;
+-		off_start = ph->p_offset & -PAGE_SIZE;
+-		prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
+-			((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
+-			((ph->p_flags&PF_X) ? PROT_EXEC : 0));
+-		if (mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
+-			goto error;
+-		if (ph->p_memsz > ph->p_filesz) {
+-			size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
+-			size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
+-			memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
+-			if (pgbrk-(size_t)base < this_max && mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
+-				goto error;
+-		}
+-	}
+-	for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
+-		if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
+-			if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
+-			    && errno != ENOSYS)
+-				goto error;
+-			break;
+-		}
+-done_mapping:
+-	dso->base = base;
+-	dso->dynv = laddr(dso, dyn);
+-	if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
+-	if (!runtime) reclaim_gaps(dso);
+-	free(allocated_buf);
+-	return map;
+-noexec:
+-	errno = ENOEXEC;
+-error:
+-	if (map!=MAP_FAILED) unmap_library(dso);
+-	free(allocated_buf);
+-	return 0;
+-}
+-
+-static int path_open(const char *name, const char *s, char *buf, size_t buf_size)
+-{
+-	size_t l;
+-	int fd;
+-	for (;;) {
+-		s += strspn(s, ":\n");
+-		l = strcspn(s, ":\n");
+-		if (l-1 >= INT_MAX) return -1;
+-		if (snprintf(buf, buf_size, "%.*s/%s", (int)l, s, name) < buf_size) {
+-			if ((fd = open(buf, O_RDONLY|O_CLOEXEC))>=0) return fd;
+-			switch (errno) {
+-			case ENOENT:
+-			case ENOTDIR:
+-			case EACCES:
+-			case ENAMETOOLONG:
+-				break;
+-			default:
+-				/* Any negative value but -1 will inhibit
+-				 * futher path search. */
+-				return -2;
+-			}
+-		}
+-		s += l;
+-	}
+-}
+-
+-static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
+-{
+-	size_t n, l;
+-	const char *s, *t, *origin;
+-	char *d;
+-	if (p->rpath || !p->rpath_orig) return 0;
+-	if (!strchr(p->rpath_orig, '$')) {
+-		p->rpath = p->rpath_orig;
+-		return 0;
+-	}
+-	n = 0;
+-	s = p->rpath_orig;
+-	while ((t=strchr(s, '$'))) {
+-		if (strncmp(t, "$ORIGIN", 7) && strncmp(t, "${ORIGIN}", 9))
+-			return 0;
+-		s = t+1;
+-		n++;
+-	}
+-	if (n > SSIZE_MAX/PATH_MAX) return 0;
+-
+-	if (p->kernel_mapped) {
+-		/* $ORIGIN searches cannot be performed for the main program
+-		 * when it is suid/sgid/AT_SECURE. This is because the
+-		 * pathname is under the control of the caller of execve.
+-		 * For libraries, however, $ORIGIN can be processed safely
+-		 * since the library's pathname came from a trusted source
+-		 * (either system paths or a call to dlopen). */
+-		if (libc.secure)
+-			return 0;
+-		l = readlink("/proc/self/exe", buf, buf_size);
+-		if (l == -1) switch (errno) {
+-		case ENOENT:
+-		case ENOTDIR:
+-		case EACCES:
+-			break;
+-		default:
+-			return -1;
+-		}
+-		if (l >= buf_size)
+-			return 0;
+-		buf[l] = 0;
+-		origin = buf;
+-	} else {
+-		origin = p->name;
+-	}
+-	t = strrchr(origin, '/');
+-	l = t ? t-origin : 0;
+-	p->rpath = malloc(strlen(p->rpath_orig) + n*l + 1);
+-	if (!p->rpath) return -1;
+-
+-	d = p->rpath;
+-	s = p->rpath_orig;
+-	while ((t=strchr(s, '$'))) {
+-		memcpy(d, s, t-s);
+-		d += t-s;
+-		memcpy(d, origin, l);
+-		d += l;
+-		/* It was determined previously that the '$' is followed
+-		 * either by "ORIGIN" or "{ORIGIN}". */
+-		s = t + 7 + 2*(t[1]=='{');
+-	}
+-	strcpy(d, s);
+-	return 0;
+-}
+-
+-static void decode_dyn(struct dso *p)
+-{
+-	size_t dyn[DYN_CNT];
+-	decode_vec(p->dynv, dyn, DYN_CNT);
+-	p->syms = laddr(p, dyn[DT_SYMTAB]);
+-	p->strings = laddr(p, dyn[DT_STRTAB]);
+-	if (dyn[0]&(1<<DT_HASH))
+-		p->hashtab = laddr(p, dyn[DT_HASH]);
+-	if (dyn[0]&(1<<DT_RPATH))
+-		p->rpath_orig = p->strings + dyn[DT_RPATH];
+-	if (dyn[0]&(1<<DT_RUNPATH))
+-		p->rpath_orig = p->strings + dyn[DT_RUNPATH];
+-	if (dyn[0]&(1<<DT_PLTGOT))
+-		p->got = laddr(p, dyn[DT_PLTGOT]);
+-	if (search_vec(p->dynv, dyn, DT_GNU_HASH))
+-		p->ghashtab = laddr(p, *dyn);
+-	if (search_vec(p->dynv, dyn, DT_VERSYM))
+-		p->versym = laddr(p, *dyn);
+-}
+-
+-static size_t count_syms(struct dso *p)
+-{
+-	if (p->hashtab) return p->hashtab[1];
+-
+-	size_t nsym, i;
+-	uint32_t *buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
+-	uint32_t *hashval;
+-	for (i = nsym = 0; i < p->ghashtab[0]; i++) {
+-		if (buckets[i] > nsym)
+-			nsym = buckets[i];
+-	}
+-	if (nsym) {
+-		hashval = buckets + p->ghashtab[0] + (nsym - p->ghashtab[1]);
+-		do nsym++;
+-		while (!(*hashval++ & 1));
+-	}
+-	return nsym;
+-}
+-
+-static void *dl_mmap(size_t n)
+-{
+-	void *p;
+-	int prot = PROT_READ|PROT_WRITE, flags = MAP_ANONYMOUS|MAP_PRIVATE;
+-#ifdef SYS_mmap2
+-	p = (void *)__syscall(SYS_mmap2, 0, n, prot, flags, -1, 0);
+-#else
+-	p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
+-#endif
+-	return p == MAP_FAILED ? 0 : p;
+-}
+-
+-static void makefuncdescs(struct dso *p)
+-{
+-	static int self_done;
+-	size_t nsym = count_syms(p);
+-	size_t i, size = nsym * sizeof(*p->funcdescs);
+-
+-	if (!self_done) {
+-		p->funcdescs = dl_mmap(size);
+-		self_done = 1;
+-	} else {
+-		p->funcdescs = malloc(size);
+-	}
+-	if (!p->funcdescs) {
+-		if (!runtime) a_crash();
+-		error("Error allocating function descriptors for %s", p->name);
+-		longjmp(*rtld_fail, 1);
+-	}
+-	for (i=0; i<nsym; i++) {
+-		if ((p->syms[i].st_info&0xf)==STT_FUNC && p->syms[i].st_shndx) {
+-			p->funcdescs[i].addr = laddr(p, p->syms[i].st_value);
+-			p->funcdescs[i].got = p->got;
+-		} else {
+-			p->funcdescs[i].addr = 0;
+-			p->funcdescs[i].got = 0;
+-		}
+-	}
+-}
+-
+-static struct dso *load_library(const char *name, struct dso *needed_by)
+-{
+-	char buf[2*NAME_MAX+2];
+-	const char *pathname;
+-	unsigned char *map;
+-	struct dso *p, temp_dso = {0};
+-	int fd;
+-	struct stat st;
+-	size_t alloc_size;
+-	int n_th = 0;
+-	int is_self = 0;
+-
+-	if (!*name) {
+-		errno = EINVAL;
+-		return 0;
+-	}
+-
+-	/* Catch and block attempts to reload the implementation itself */
+-	if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
+-		static const char *rp, reserved[] =
+-			"c\0pthread\0rt\0m\0dl\0util\0xnet\0";
+-		char *z = strchr(name, '.');
+-		if (z) {
+-			size_t l = z-name;
+-			for (rp=reserved; *rp && strncmp(name+3, rp, l-3); rp+=strlen(rp)+1);
+-			if (*rp) {
+-				if (ldd_mode) {
+-					/* Track which names have been resolved
+-					 * and only report each one once. */
+-					static unsigned reported;
+-					unsigned mask = 1U<<(rp-reserved);
+-					if (!(reported & mask)) {
+-						reported |= mask;
+-						dprintf(1, "\t%s => %s (%p)\n",
+-							name, ldso.name,
+-							ldso.base);
+-					}
+-				}
+-				is_self = 1;
+-			}
+-		}
+-	}
+-	if (!strcmp(name, ldso.name)) is_self = 1;
+-	if (is_self) {
+-		if (!ldso.prev) {
+-			tail->next = &ldso;
+-			ldso.prev = tail;
+-			tail = ldso.next ? ldso.next : &ldso;
+-		}
+-		return &ldso;
+-	}
+-	if (strchr(name, '/')) {
+-		pathname = name;
+-		fd = open(name, O_RDONLY|O_CLOEXEC);
+-	} else {
+-		/* Search for the name to see if it's already loaded */
+-		for (p=head->next; p; p=p->next) {
+-			if (p->shortname && !strcmp(p->shortname, name)) {
+-				p->refcnt++;
+-				return p;
+-			}
+-		}
+-		if (strlen(name) > NAME_MAX) return 0;
+-		fd = -1;
+-		if (env_path) fd = path_open(name, env_path, buf, sizeof buf);
+-		for (p=needed_by; fd == -1 && p; p=p->needed_by) {
+-			if (fixup_rpath(p, buf, sizeof buf) < 0)
+-				fd = -2; /* Inhibit further search. */
+-			if (p->rpath)
+-				fd = path_open(name, p->rpath, buf, sizeof buf);
+-		}
+-		if (fd == -1) {
+-			if (!sys_path) {
+-				char *prefix = 0;
+-				size_t prefix_len;
+-				if (ldso.name[0]=='/') {
+-					char *s, *t, *z;
+-					for (s=t=z=ldso.name; *s; s++)
+-						if (*s=='/') z=t, t=s;
+-					prefix_len = z-ldso.name;
+-					if (prefix_len < PATH_MAX)
+-						prefix = ldso.name;
+-				}
+-				if (!prefix) {
+-					prefix = "";
+-					prefix_len = 0;
+-				}
+-				char etc_ldso_path[prefix_len + 1
+-					+ sizeof "/etc/ld-musl-" LDSO_ARCH ".path"];
+-				snprintf(etc_ldso_path, sizeof etc_ldso_path,
+-					"%.*s/etc/ld-musl-" LDSO_ARCH ".path",
+-					(int)prefix_len, prefix);
+-				FILE *f = fopen(etc_ldso_path, "rbe");
+-				if (f) {
+-					if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
+-						free(sys_path);
+-						sys_path = "";
+-					}
+-					fclose(f);
+-				} else if (errno != ENOENT) {
+-					sys_path = "";
+-				}
+-			}
+-			if (!sys_path) sys_path = "/lib:/usr/local/lib:/usr/lib";
+-			fd = path_open(name, sys_path, buf, sizeof buf);
+-		}
+-		pathname = buf;
+-	}
+-	if (fd < 0) return 0;
+-	if (fstat(fd, &st) < 0) {
+-		close(fd);
+-		return 0;
+-	}
+-	for (p=head->next; p; p=p->next) {
+-		if (p->dev == st.st_dev && p->ino == st.st_ino) {
+-			/* If this library was previously loaded with a
+-			 * pathname but a search found the same inode,
+-			 * setup its shortname so it can be found by name. */
+-			if (!p->shortname && pathname != name)
+-				p->shortname = strrchr(p->name, '/')+1;
+-			close(fd);
+-			p->refcnt++;
+-			return p;
+-		}
+-	}
+-	map = noload ? 0 : map_library(fd, &temp_dso);
+-	close(fd);
+-	if (!map) return 0;
+-
+-	/* Allocate storage for the new DSO. When there is TLS, this
+-	 * storage must include a reservation for all pre-existing
+-	 * threads to obtain copies of both the new TLS, and an
+-	 * extended DTV capable of storing an additional slot for
+-	 * the newly-loaded DSO. */
+-	alloc_size = sizeof *p + strlen(pathname) + 1;
+-	if (runtime && temp_dso.tls_image) {
+-		size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+-			+ sizeof(void *) * (tls_cnt+3);
+-		n_th = libc.threads_minus_1 + 1;
+-		if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
+-		else alloc_size += n_th * per_th;
+-	}
+-	p = calloc(1, alloc_size);
+-	if (!p) {
+-		unmap_library(&temp_dso);
+-		return 0;
+-	}
+-	memcpy(p, &temp_dso, sizeof temp_dso);
+-	decode_dyn(p);
+-	p->dev = st.st_dev;
+-	p->ino = st.st_ino;
+-	p->refcnt = 1;
+-	p->needed_by = needed_by;
+-	p->name = p->buf;
+-	strcpy(p->name, pathname);
+-	/* Add a shortname only if name arg was not an explicit pathname. */
+-	if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
+-	if (p->tls_image) {
+-		p->tls_id = ++tls_cnt;
+-		tls_align = MAXP2(tls_align, p->tls_align);
+-#ifdef TLS_ABOVE_TP
+-		p->tls_offset = tls_offset + ( (tls_align-1) &
+-			-(tls_offset + (uintptr_t)p->tls_image) );
+-		tls_offset += p->tls_size;
+-#else
+-		tls_offset += p->tls_size + p->tls_align - 1;
+-		tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
+-			& (p->tls_align-1);
+-		p->tls_offset = tls_offset;
+-#endif
+-		p->new_dtv = (void *)(-sizeof(size_t) &
+-			(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
+-		p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+-	}
+-
+-	tail->next = p;
+-	p->prev = tail;
+-	tail = p;
+-
+-	if (DL_FDPIC) makefuncdescs(p);
+-
+-	if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, p->base);
+-
+-	return p;
+-}
+-
+-static void load_deps(struct dso *p)
+-{
+-	size_t i, ndeps=0;
+-	struct dso ***deps = &p->deps, **tmp, *dep;
+-	for (; p; p=p->next) {
+-		for (i=0; p->dynv[i]; i+=2) {
+-			if (p->dynv[i] != DT_NEEDED) continue;
+-			dep = load_library(p->strings + p->dynv[i+1], p);
+-			if (!dep) {
+-				error("Error loading shared library %s: %m (needed by %s)",
+-					p->strings + p->dynv[i+1], p->name);
+-				if (runtime) longjmp(*rtld_fail, 1);
+-				continue;
+-			}
+-			if (runtime) {
+-				tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
+-				if (!tmp) longjmp(*rtld_fail, 1);
+-				tmp[ndeps++] = dep;
+-				tmp[ndeps] = 0;
+-				*deps = tmp;
+-			}
+-		}
+-	}
+-}
+-
+-static void load_preload(char *s)
+-{
+-	int tmp;
+-	char *z;
+-	for (z=s; *z; s=z) {
+-		for (   ; *s && (isspace(*s) || *s==':'); s++);
+-		for (z=s; *z && !isspace(*z) && *z!=':'; z++);
+-		tmp = *z;
+-		*z = 0;
+-		load_library(s, 0);
+-		*z = tmp;
+-	}
+-}
+-
+-static void make_global(struct dso *p)
+-{
+-	for (; p; p=p->next) p->global = 1;
+-}
+-
+-static void do_mips_relocs(struct dso *p, size_t *got)
+-{
+-	size_t i, j, rel[2];
+-	unsigned char *base = p->base;
+-	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
+-	if (p==&ldso) {
+-		got += i;
+-	} else {
+-		while (i--) *got++ += (size_t)base;
+-	}
+-	j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
+-	i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
+-	Sym *sym = p->syms + j;
+-	rel[0] = (unsigned char *)got - base;
+-	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
+-		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
+-		do_relocs(p, rel, sizeof rel, 2);
+-	}
+-}
+-
+-static void reloc_all(struct dso *p)
+-{
+-	size_t dyn[DYN_CNT];
+-	for (; p; p=p->next) {
+-		if (p->relocated) continue;
+-		decode_vec(p->dynv, dyn, DYN_CNT);
+-		if (NEED_MIPS_GOT_RELOCS)
+-			do_mips_relocs(p, laddr(p, dyn[DT_PLTGOT]));
+-		do_relocs(p, laddr(p, dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
+-			2+(dyn[DT_PLTREL]==DT_RELA));
+-		do_relocs(p, laddr(p, dyn[DT_REL]), dyn[DT_RELSZ], 2);
+-		do_relocs(p, laddr(p, dyn[DT_RELA]), dyn[DT_RELASZ], 3);
+-
+-		if (head != &ldso && p->relro_start != p->relro_end &&
+-		    mprotect(laddr(p, p->relro_start), p->relro_end-p->relro_start, PROT_READ)
+-		    && errno != ENOSYS) {
+-			error("Error relocating %s: RELRO protection failed: %m",
+-				p->name);
+-			if (runtime) longjmp(*rtld_fail, 1);
+-		}
+-
+-		p->relocated = 1;
+-	}
+-}
+-
+-static void kernel_mapped_dso(struct dso *p)
+-{
+-	size_t min_addr = -1, max_addr = 0, cnt;
+-	Phdr *ph = p->phdr;
+-	for (cnt = p->phnum; cnt--; ph = (void *)((char *)ph + p->phentsize)) {
+-		if (ph->p_type == PT_DYNAMIC) {
+-			p->dynv = laddr(p, ph->p_vaddr);
+-		} else if (ph->p_type == PT_GNU_RELRO) {
+-			p->relro_start = ph->p_vaddr & -PAGE_SIZE;
+-			p->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
+-		}
+-		if (ph->p_type != PT_LOAD) continue;
+-		if (ph->p_vaddr < min_addr)
+-			min_addr = ph->p_vaddr;
+-		if (ph->p_vaddr+ph->p_memsz > max_addr)
+-			max_addr = ph->p_vaddr+ph->p_memsz;
+-	}
+-	min_addr &= -PAGE_SIZE;
+-	max_addr = (max_addr + PAGE_SIZE-1) & -PAGE_SIZE;
+-	p->map = p->base + min_addr;
+-	p->map_len = max_addr - min_addr;
+-	p->kernel_mapped = 1;
+-}
+-
+-static void do_fini()
+-{
+-	struct dso *p;
+-	size_t dyn[DYN_CNT];
+-	for (p=fini_head; p; p=p->fini_next) {
+-		if (!p->constructed) continue;
+-		decode_vec(p->dynv, dyn, DYN_CNT);
+-		if (dyn[0] & (1<<DT_FINI_ARRAY)) {
+-			size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
+-			size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
+-			while (n--) ((void (*)(void))*--fn)();
+-		}
+-#ifndef NO_LEGACY_INITFINI
+-		if ((dyn[0] & (1<<DT_FINI)) && dyn[DT_FINI])
+-			fpaddr(p, dyn[DT_FINI])();
+-#endif
+-	}
+-}
+-
+-static void do_init_fini(struct dso *p)
+-{
+-	size_t dyn[DYN_CNT];
+-	int need_locking = libc.threads_minus_1;
+-	/* Allow recursive calls that arise when a library calls
+-	 * dlopen from one of its constructors, but block any
+-	 * other threads until all ctors have finished. */
+-	if (need_locking) pthread_mutex_lock(&init_fini_lock);
+-	for (; p; p=p->prev) {
+-		if (p->constructed) continue;
+-		p->constructed = 1;
+-		decode_vec(p->dynv, dyn, DYN_CNT);
+-		if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
+-			p->fini_next = fini_head;
+-			fini_head = p;
+-		}
+-#ifndef NO_LEGACY_INITFINI
+-		if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
+-			fpaddr(p, dyn[DT_INIT])();
+-#endif
+-		if (dyn[0] & (1<<DT_INIT_ARRAY)) {
+-			size_t n = dyn[DT_INIT_ARRAYSZ]/sizeof(size_t);
+-			size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
+-			while (n--) ((void (*)(void))*fn++)();
+-		}
+-		if (!need_locking && libc.threads_minus_1) {
+-			need_locking = 1;
+-			pthread_mutex_lock(&init_fini_lock);
+-		}
+-	}
+-	if (need_locking) pthread_mutex_unlock(&init_fini_lock);
+-}
+-
+-void __libc_start_init(void)
+-{
+-	do_init_fini(tail);
+-}
+-
+-static void dl_debug_state(void)
+-{
+-}
+-
+-weak_alias(dl_debug_state, _dl_debug_state);
+-
+-void __reset_tls()
+-{
+-	pthread_t self = __pthread_self();
+-	struct dso *p;
+-	for (p=head; p; p=p->next) {
+-		if (!p->tls_id || !self->dtv[p->tls_id]) continue;
+-		memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
+-		memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
+-			p->tls_size - p->tls_len);
+-		if (p->tls_id == (size_t)self->dtv[0]) break;
+-	}
+-}
+-
+-void *__copy_tls(unsigned char *mem)
+-{
+-	pthread_t td;
+-	struct dso *p;
+-	void **dtv;
+-
+-#ifdef TLS_ABOVE_TP
+-	dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
+-
+-	mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
+-	td = (pthread_t)mem;
+-	mem += sizeof(struct pthread);
+-
+-	for (p=head; p; p=p->next) {
+-		if (!p->tls_id) continue;
+-		dtv[p->tls_id] = mem + p->tls_offset;
+-		memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
+-	}
+-#else
+-	dtv = (void **)mem;
+-
+-	mem += libc.tls_size - sizeof(struct pthread);
+-	mem -= (uintptr_t)mem & (tls_align-1);
+-	td = (pthread_t)mem;
+-
+-	for (p=head; p; p=p->next) {
+-		if (!p->tls_id) continue;
+-		dtv[p->tls_id] = mem - p->tls_offset;
+-		memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
+-	}
+-#endif
+-	dtv[0] = (void *)tls_cnt;
+-	td->dtv = td->dtv_copy = dtv;
+-	return td;
+-}
+-
+-__attribute__((__visibility__("hidden")))
+-void *__tls_get_new(size_t *v)
+-{
+-	pthread_t self = __pthread_self();
+-
+-	/* Block signals to make accessing new TLS async-signal-safe */
+-	sigset_t set;
+-	__block_all_sigs(&set);
+-	if (v[0]<=(size_t)self->dtv[0]) {
+-		__restore_sigs(&set);
+-		return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
+-	}
+-
+-	/* This is safe without any locks held because, if the caller
+-	 * is able to request the Nth entry of the DTV, the DSO list
+-	 * must be valid at least that far out and it was synchronized
+-	 * at program startup or by an already-completed call to dlopen. */
+-	struct dso *p;
+-	for (p=head; p->tls_id != v[0]; p=p->next);
+-
+-	/* Get new DTV space from new DSO if needed */
+-	if (v[0] > (size_t)self->dtv[0]) {
+-		void **newdtv = p->new_dtv +
+-			(v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
+-		memcpy(newdtv, self->dtv,
+-			((size_t)self->dtv[0]+1) * sizeof(void *));
+-		newdtv[0] = (void *)v[0];
+-		self->dtv = self->dtv_copy = newdtv;
+-	}
+-
+-	/* Get new TLS memory from all new DSOs up to the requested one */
+-	unsigned char *mem;
+-	for (p=head; ; p=p->next) {
+-		if (!p->tls_id || self->dtv[p->tls_id]) continue;
+-		mem = p->new_tls + (p->tls_size + p->tls_align)
+-			* a_fetch_add(&p->new_tls_idx,1);
+-		mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
+-			& (p->tls_align-1);
+-		self->dtv[p->tls_id] = mem;
+-		memcpy(mem, p->tls_image, p->tls_len);
+-		if (p->tls_id == v[0]) break;
+-	}
+-	__restore_sigs(&set);
+-	return mem + v[1] + DTP_OFFSET;
+-}
+-
+-static void update_tls_size()
+-{
+-	libc.tls_size = ALIGN(
+-		(1+tls_cnt) * sizeof(void *) +
+-		tls_offset +
+-		sizeof(struct pthread) +
+-		tls_align * 2,
+-	tls_align);
+-}
+-
+-/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
+- * following stage 2 and stage 3 functions via primitive symbolic lookup
+- * since it does not have access to their addresses to begin with. */
+-
+-/* Stage 2 of the dynamic linker is called after relative relocations 
+- * have been processed. It can make function calls to static functions
+- * and access string literals and static data, but cannot use extern
+- * symbols. Its job is to perform symbolic relocations on the dynamic
+- * linker itself, but some of the relocations performed may need to be
+- * replaced later due to copy relocations in the main program. */
+-
+-__attribute__((__visibility__("hidden")))
+-void __dls2(unsigned char *base, size_t *sp)
+-{
+-	if (DL_FDPIC) {
+-		void *p1 = (void *)sp[-2];
+-		void *p2 = (void *)sp[-1];
+-		if (!p1) {
+-			size_t *auxv, aux[AUX_CNT];
+-			for (auxv=sp+1+*sp+1; *auxv; auxv++); auxv++;
+-			decode_vec(auxv, aux, AUX_CNT);
+-			if (aux[AT_BASE]) ldso.base = (void *)aux[AT_BASE];
+-			else ldso.base = (void *)(aux[AT_PHDR] & -4096);
+-		}
+-		app_loadmap = p2 ? p1 : 0;
+-		ldso.loadmap = p2 ? p2 : p1;
+-		ldso.base = laddr(&ldso, 0);
+-	} else {
+-		ldso.base = base;
+-	}
+-	Ehdr *ehdr = (void *)ldso.base;
+-	ldso.name = ldso.shortname = "libc.so";
+-	ldso.global = 1;
+-	ldso.phnum = ehdr->e_phnum;
+-	ldso.phdr = laddr(&ldso, ehdr->e_phoff);
+-	ldso.phentsize = ehdr->e_phentsize;
+-	kernel_mapped_dso(&ldso);
+-	decode_dyn(&ldso);
+-
+-	if (DL_FDPIC) makefuncdescs(&ldso);
+-
+-	/* Prepare storage for to save clobbered REL addends so they
+-	 * can be reused in stage 3. There should be very few. If
+-	 * something goes wrong and there are a huge number, abort
+-	 * instead of risking stack overflow. */
+-	size_t dyn[DYN_CNT];
+-	decode_vec(ldso.dynv, dyn, DYN_CNT);
+-	size_t *rel = laddr(&ldso, dyn[DT_REL]);
+-	size_t rel_size = dyn[DT_RELSZ];
+-	size_t symbolic_rel_cnt = 0;
+-	apply_addends_to = rel;
+-	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
+-		if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
+-	if (symbolic_rel_cnt >= ADDEND_LIMIT) a_crash();
+-	size_t addends[symbolic_rel_cnt+1];
+-	saved_addends = addends;
+-
+-	head = &ldso;
+-	reloc_all(&ldso);
+-
+-	ldso.relocated = 0;
+-
+-	/* Call dynamic linker stage-3, __dls3, looking it up
+-	 * symbolically as a barrier against moving the address
+-	 * load across the above relocation processing. */
+-	struct symdef dls3_def = find_sym(&ldso, "__dls3", 0);
+-	if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls3_def.sym-ldso.syms])(sp);
+-	else ((stage3_func)laddr(&ldso, dls3_def.sym->st_value))(sp);
+-}
+-
+-/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
+- * fully functional. Its job is to load (if not already loaded) and
+- * process dependencies and relocations for the main application and
+- * transfer control to its entry point. */
+-
+-_Noreturn void __dls3(size_t *sp)
+-{
+-	static struct dso app, vdso;
+-	size_t aux[AUX_CNT], *auxv;
+-	size_t i;
+-	char *env_preload=0;
+-	size_t vdso_base;
+-	int argc = *sp;
+-	char **argv = (void *)(sp+1);
+-	char **argv_orig = argv;
+-	char **envp = argv+argc+1;
+-
+-	/* Find aux vector just past environ[] and use it to initialize
+-	 * global data that may be needed before we can make syscalls. */
+-	__environ = envp;
+-	for (i=argc+1; argv[i]; i++);
+-	libc.auxv = auxv = (void *)(argv+i+1);
+-	decode_vec(auxv, aux, AUX_CNT);
+-	__hwcap = aux[AT_HWCAP];
+-	libc.page_size = aux[AT_PAGESZ];
+-	libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
+-		|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
+-
+-	/* Setup early thread pointer in builtin_tls for ldso/libc itself to
+-	 * use during dynamic linking. If possible it will also serve as the
+-	 * thread pointer at runtime. */
+-	libc.tls_size = sizeof builtin_tls;
+-	if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
+-		a_crash();
+-	}
+-
+-	/* Only trust user/env if kernel says we're not suid/sgid */
+-	if (!libc.secure) {
+-		env_path = getenv("LD_LIBRARY_PATH");
+-		env_preload = getenv("LD_PRELOAD");
+-	}
+-
+-	/* If the main program was already loaded by the kernel,
+-	 * AT_PHDR will point to some location other than the dynamic
+-	 * linker's program headers. */
+-	if (aux[AT_PHDR] != (size_t)ldso.phdr) {
+-		size_t interp_off = 0;
+-		size_t tls_image = 0;
+-		/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
+-		Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
+-		app.phnum = aux[AT_PHNUM];
+-		app.phentsize = aux[AT_PHENT];
+-		for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
+-			if (phdr->p_type == PT_PHDR)
+-				app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+-			else if (phdr->p_type == PT_INTERP)
+-				interp_off = (size_t)phdr->p_vaddr;
+-			else if (phdr->p_type == PT_TLS) {
+-				tls_image = phdr->p_vaddr;
+-				app.tls_len = phdr->p_filesz;
+-				app.tls_size = phdr->p_memsz;
+-				app.tls_align = phdr->p_align;
+-			}
+-		}
+-		if (DL_FDPIC) app.loadmap = app_loadmap;
+-		if (app.tls_size) app.tls_image = laddr(&app, tls_image);
+-		if (interp_off) ldso.name = laddr(&app, interp_off);
+-		if ((aux[0] & (1UL<<AT_EXECFN))
+-		    && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
+-			app.name = (char *)aux[AT_EXECFN];
+-		else
+-			app.name = argv[0];
+-		kernel_mapped_dso(&app);
+-	} else {
+-		int fd;
+-		char *ldname = argv[0];
+-		size_t l = strlen(ldname);
+-		if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
+-		argv++;
+-		while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
+-			char *opt = argv[0]+2;
+-			*argv++ = (void *)-1;
+-			if (!*opt) {
+-				break;
+-			} else if (!memcmp(opt, "list", 5)) {
+-				ldd_mode = 1;
+-			} else if (!memcmp(opt, "library-path", 12)) {
+-				if (opt[12]=='=') env_path = opt+13;
+-				else if (opt[12]) *argv = 0;
+-				else if (*argv) env_path = *argv++;
+-			} else if (!memcmp(opt, "preload", 7)) {
+-				if (opt[7]=='=') env_preload = opt+8;
+-				else if (opt[7]) *argv = 0;
+-				else if (*argv) env_preload = *argv++;
+-			} else {
+-				argv[0] = 0;
+-			}
+-		}
+-		argv[-1] = (void *)(argc - (argv-argv_orig));
+-		if (!argv[0]) {
+-			dprintf(2, "musl libc\n"
+-				"Version %s\n"
+-				"Dynamic Program Loader\n"
+-				"Usage: %s [options] [--] pathname%s\n",
+-				__libc_get_version(), ldname,
+-				ldd_mode ? "" : " [args]");
+-			_exit(1);
+-		}
+-		fd = open(argv[0], O_RDONLY);
+-		if (fd < 0) {
+-			dprintf(2, "%s: cannot load %s: %s\n", ldname, argv[0], strerror(errno));
+-			_exit(1);
+-		}
+-		runtime = 1;
+-		Ehdr *ehdr = (void *)map_library(fd, &app);
+-		if (!ehdr) {
+-			dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
+-			_exit(1);
+-		}
+-		runtime = 0;
+-		close(fd);
+-		ldso.name = ldname;
+-		app.name = argv[0];
+-		aux[AT_ENTRY] = (size_t)laddr(&app, ehdr->e_entry);
+-		/* Find the name that would have been used for the dynamic
+-		 * linker had ldd not taken its place. */
+-		if (ldd_mode) {
+-			for (i=0; i<app.phnum; i++) {
+-				if (app.phdr[i].p_type == PT_INTERP)
+-					ldso.name = laddr(&app, app.phdr[i].p_vaddr);
+-			}
+-			dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
+-		}
+-	}
+-	if (app.tls_size) {
+-		app.tls_id = tls_cnt = 1;
+-#ifdef TLS_ABOVE_TP
+-		app.tls_offset = 0;
+-		tls_offset = app.tls_size
+-			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+-			& (app.tls_align-1) );
+-#else
+-		tls_offset = app.tls_offset = app.tls_size
+-			+ ( -((uintptr_t)app.tls_image + app.tls_size)
+-			& (app.tls_align-1) );
+-#endif
+-		tls_align = MAXP2(tls_align, app.tls_align);
+-	}
+-	app.global = 1;
+-	decode_dyn(&app);
+-	if (DL_FDPIC) {
+-		makefuncdescs(&app);
+-		if (!app.loadmap) {
+-			app.loadmap = (void *)&app_dummy_loadmap;
+-			app.loadmap->nsegs = 1;
+-			app.loadmap->segs[0].addr = (size_t)app.map;
+-			app.loadmap->segs[0].p_vaddr = (size_t)app.map
+-				- (size_t)app.base;
+-			app.loadmap->segs[0].p_memsz = app.map_len;
+-		}
+-		argv[-3] = (void *)app.loadmap;
+-	}
+-
+-	/* Attach to vdso, if provided by the kernel */
+-	if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
+-		Ehdr *ehdr = (void *)vdso_base;
+-		Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
+-		vdso.phnum = ehdr->e_phnum;
+-		vdso.phentsize = ehdr->e_phentsize;
+-		for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
+-			if (phdr->p_type == PT_DYNAMIC)
+-				vdso.dynv = (void *)(vdso_base + phdr->p_offset);
+-			if (phdr->p_type == PT_LOAD)
+-				vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
+-		}
+-		vdso.name = "";
+-		vdso.shortname = "linux-gate.so.1";
+-		vdso.global = 1;
+-		vdso.relocated = 1;
+-		decode_dyn(&vdso);
+-		vdso.prev = &ldso;
+-		ldso.next = &vdso;
+-	}
+-
+-	/* Initial dso chain consists only of the app. */
+-	head = tail = &app;
+-
+-	/* Donate unused parts of app and library mapping to malloc */
+-	reclaim_gaps(&app);
+-	reclaim_gaps(&ldso);
+-
+-	/* Load preload/needed libraries, add their symbols to the global
+-	 * namespace, and perform all remaining relocations. */
+-	if (env_preload) load_preload(env_preload);
+-	load_deps(&app);
+-	make_global(&app);
+-
+-#ifndef DYNAMIC_IS_RO
+-	for (i=0; app.dynv[i]; i+=2)
+-		if (app.dynv[i]==DT_DEBUG)
+-			app.dynv[i+1] = (size_t)&debug;
+-#endif
+-
+-	/* The main program must be relocated LAST since it may contin
+-	 * copy relocations which depend on libraries' relocations. */
+-	reloc_all(app.next);
+-	reloc_all(&app);
+-
+-	update_tls_size();
+-	if (libc.tls_size > sizeof builtin_tls || tls_align > MIN_TLS_ALIGN) {
+-		void *initial_tls = calloc(libc.tls_size, 1);
+-		if (!initial_tls) {
+-			dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
+-				argv[0], libc.tls_size);
+-			_exit(127);
+-		}
+-		if (__init_tp(__copy_tls(initial_tls)) < 0) {
+-			a_crash();
+-		}
+-	} else {
+-		size_t tmp_tls_size = libc.tls_size;
+-		pthread_t self = __pthread_self();
+-		/* Temporarily set the tls size to the full size of
+-		 * builtin_tls so that __copy_tls will use the same layout
+-		 * as it did for before. Then check, just to be safe. */
+-		libc.tls_size = sizeof builtin_tls;
+-		if (__copy_tls((void*)builtin_tls) != self) a_crash();
+-		libc.tls_size = tmp_tls_size;
+-	}
+-	static_tls_cnt = tls_cnt;
+-
+-	if (ldso_fail) _exit(127);
+-	if (ldd_mode) _exit(0);
+-
+-	/* Switch to runtime mode: any further failures in the dynamic
+-	 * linker are a reportable failure rather than a fatal startup
+-	 * error. */
+-	runtime = 1;
+-
+-	debug.ver = 1;
+-	debug.bp = dl_debug_state;
+-	debug.head = head;
+-	debug.base = ldso.base;
+-	debug.state = 0;
+-	_dl_debug_state();
+-
+-	__init_libc(envp, argv[0]);
+-	atexit(do_fini);
+-	errno = 0;
+-
+-	CRTJMP((void *)aux[AT_ENTRY], argv-1);
+-	for(;;);
+-}
+-
+-void *dlopen(const char *file, int mode)
+-{
+-	struct dso *volatile p, *orig_tail, *next;
+-	size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
+-	size_t i;
+-	int cs;
+-	jmp_buf jb;
+-
+-	if (!file) return head;
+-
+-	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+-	pthread_rwlock_wrlock(&lock);
+-	__inhibit_ptc();
+-
+-	p = 0;
+-	orig_tls_cnt = tls_cnt;
+-	orig_tls_offset = tls_offset;
+-	orig_tls_align = tls_align;
+-	orig_tail = tail;
+-	noload = mode & RTLD_NOLOAD;
+-
+-	rtld_fail = &jb;
+-	if (setjmp(*rtld_fail)) {
+-		/* Clean up anything new that was (partially) loaded */
+-		if (p && p->deps) for (i=0; p->deps[i]; i++)
+-			if (p->deps[i]->global < 0)
+-				p->deps[i]->global = 0;
+-		for (p=orig_tail->next; p; p=next) {
+-			next = p->next;
+-			while (p->td_index) {
+-				void *tmp = p->td_index->next;
+-				free(p->td_index);
+-				p->td_index = tmp;
+-			}
+-			free(p->funcdescs);
+-			if (p->rpath != p->rpath_orig)
+-				free(p->rpath);
+-			free(p->deps);
+-			unmap_library(p);
+-			free(p);
+-		}
+-		tls_cnt = orig_tls_cnt;
+-		tls_offset = orig_tls_offset;
+-		tls_align = orig_tls_align;
+-		tail = orig_tail;
+-		tail->next = 0;
+-		p = 0;
+-		goto end;
+-	} else p = load_library(file, head);
+-
+-	if (!p) {
+-		error(noload ?
+-			"Library %s is not already loaded" :
+-			"Error loading shared library %s: %m",
+-			file);
+-		goto end;
+-	}
+-
+-	/* First load handling */
+-	if (!p->deps) {
+-		load_deps(p);
+-		if (p->deps) for (i=0; p->deps[i]; i++)
+-			if (!p->deps[i]->global)
+-				p->deps[i]->global = -1;
+-		if (!p->global) p->global = -1;
+-		reloc_all(p);
+-		if (p->deps) for (i=0; p->deps[i]; i++)
+-			if (p->deps[i]->global < 0)
+-				p->deps[i]->global = 0;
+-		if (p->global < 0) p->global = 0;
+-	}
+-
+-	if (mode & RTLD_GLOBAL) {
+-		if (p->deps) for (i=0; p->deps[i]; i++)
+-			p->deps[i]->global = 1;
+-		p->global = 1;
+-	}
+-
+-	update_tls_size();
+-	_dl_debug_state();
+-	orig_tail = tail;
+-end:
+-	__release_ptc();
+-	if (p) gencnt++;
+-	pthread_rwlock_unlock(&lock);
+-	if (p) do_init_fini(orig_tail);
+-	pthread_setcancelstate(cs, 0);
+-	return p;
+-}
+-
+-static int invalid_dso_handle(void *h)
+-{
+-	struct dso *p;
+-	for (p=head; p; p=p->next) if (h==p) return 0;
+-	error("Invalid library handle %p", (void *)h);
+-	return 1;
+-}
+-
+-static void *addr2dso(size_t a)
+-{
+-	struct dso *p;
+-	size_t i;
+-	if (DL_FDPIC) for (p=head; p; p=p->next) {
+-		i = count_syms(p);
+-		if (a-(size_t)p->funcdescs < i*sizeof(*p->funcdescs))
+-			return p;
+-	}
+-	for (p=head; p; p=p->next) {
+-		if (DL_FDPIC && p->loadmap) {
+-			for (i=0; i<p->loadmap->nsegs; i++) {
+-				if (a-p->loadmap->segs[i].p_vaddr
+-				    < p->loadmap->segs[i].p_memsz)
+-					return p;
+-			}
+-		} else {
+-			if (a-(size_t)p->map < p->map_len)
+-				return p;
+-		}
+-	}
+-	return 0;
+-}
+-
+-void *__tls_get_addr(size_t *);
+-
+-static void *do_dlsym(struct dso *p, const char *s, void *ra)
+-{
+-	size_t i;
+-	uint32_t h = 0, gh = 0, *ght;
+-	Sym *sym;
+-	if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
+-		if (p == RTLD_DEFAULT) {
+-			p = head;
+-		} else if (p == RTLD_NEXT) {
+-			p = addr2dso((size_t)ra);
+-			if (!p) p=head;
+-			p = p->next;
+-		}
+-		struct symdef def = find_sym(p, s, 0);
+-		if (!def.sym) goto failed;
+-		if ((def.sym->st_info&0xf) == STT_TLS)
+-			return __tls_get_addr((size_t []){def.dso->tls_id, def.sym->st_value});
+-		if (DL_FDPIC && (def.sym->st_info&0xf) == STT_FUNC)
+-			return def.dso->funcdescs + (def.sym - def.dso->syms);
+-		return laddr(def.dso, def.sym->st_value);
+-	}
+-	if (invalid_dso_handle(p))
+-		return 0;
+-	if ((ght = p->ghashtab)) {
+-		gh = gnu_hash(s);
+-		sym = gnu_lookup(gh, ght, p, s);
+-	} else {
+-		h = sysv_hash(s);
+-		sym = sysv_lookup(s, h, p);
+-	}
+-	if (sym && (sym->st_info&0xf) == STT_TLS)
+-		return __tls_get_addr((size_t []){p->tls_id, sym->st_value});
+-	if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
+-		return p->funcdescs + (sym - p->syms);
+-	if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
+-		return laddr(p, sym->st_value);
+-	if (p->deps) for (i=0; p->deps[i]; i++) {
+-		if ((ght = p->deps[i]->ghashtab)) {
+-			if (!gh) gh = gnu_hash(s);
+-			sym = gnu_lookup(gh, ght, p->deps[i], s);
+-		} else {
+-			if (!h) h = sysv_hash(s);
+-			sym = sysv_lookup(s, h, p->deps[i]);
+-		}
+-		if (sym && (sym->st_info&0xf) == STT_TLS)
+-			return __tls_get_addr((size_t []){p->deps[i]->tls_id, sym->st_value});
+-		if (DL_FDPIC && sym && sym->st_shndx && (sym->st_info&0xf) == STT_FUNC)
+-			return p->deps[i]->funcdescs + (sym - p->deps[i]->syms);
+-		if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
+-			return laddr(p->deps[i], sym->st_value);
+-	}
+-failed:
+-	error("Symbol not found: %s", s);
+-	return 0;
+-}
+-
+-int __dladdr(const void *addr, Dl_info *info)
+-{
+-	struct dso *p;
+-	Sym *sym, *bestsym;
+-	uint32_t nsym;
+-	char *strings;
+-	void *best = 0;
+-
+-	pthread_rwlock_rdlock(&lock);
+-	p = addr2dso((size_t)addr);
+-	pthread_rwlock_unlock(&lock);
+-
+-	if (!p) return 0;
+-
+-	sym = p->syms;
+-	strings = p->strings;
+-	nsym = count_syms(p);
+-
+-	if (DL_FDPIC) {
+-		size_t idx = ((size_t)addr-(size_t)p->funcdescs)
+-			/ sizeof(*p->funcdescs);
+-		if (idx < nsym && (sym[idx].st_info&0xf) == STT_FUNC) {
+-			best = p->funcdescs + idx;
+-			bestsym = sym + idx;
+-		}
+-	}
+-
+-	if (!best) for (; nsym; nsym--, sym++) {
+-		if (sym->st_value
+-		 && (1<<(sym->st_info&0xf) & OK_TYPES)
+-		 && (1<<(sym->st_info>>4) & OK_BINDS)) {
+-			void *symaddr = laddr(p, sym->st_value);
+-			if (symaddr > addr || symaddr < best)
+-				continue;
+-			best = symaddr;
+-			bestsym = sym;
+-			if (addr == symaddr)
+-				break;
+-		}
+-	}
+-
+-	if (!best) return 0;
+-
+-	if (DL_FDPIC && (bestsym->st_info&0xf) == STT_FUNC)
+-		best = p->funcdescs + (bestsym - p->syms);
+-
+-	info->dli_fname = p->name;
+-	info->dli_fbase = p->base;
+-	info->dli_sname = strings + bestsym->st_name;
+-	info->dli_saddr = best;
+-
+-	return 1;
+-}
+-
+-__attribute__((__visibility__("hidden")))
+-void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
+-{
+-	void *res;
+-	pthread_rwlock_rdlock(&lock);
+-	res = do_dlsym(p, s, ra);
+-	pthread_rwlock_unlock(&lock);
+-	return res;
+-}
+-
+-int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
+-{
+-	struct dso *current;
+-	struct dl_phdr_info info;
+-	int ret = 0;
+-	for(current = head; current;) {
+-		info.dlpi_addr      = (uintptr_t)current->base;
+-		info.dlpi_name      = current->name;
+-		info.dlpi_phdr      = current->phdr;
+-		info.dlpi_phnum     = current->phnum;
+-		info.dlpi_adds      = gencnt;
+-		info.dlpi_subs      = 0;
+-		info.dlpi_tls_modid = current->tls_id;
+-		info.dlpi_tls_data  = current->tls_image;
+-
+-		ret = (callback)(&info, sizeof (info), data);
+-
+-		if (ret != 0) break;
+-
+-		pthread_rwlock_rdlock(&lock);
+-		current = current->next;
+-		pthread_rwlock_unlock(&lock);
+-	}
+-	return ret;
+-}
+-#else
+-static int invalid_dso_handle(void *h)
+-{
+-	error("Invalid library handle %p", (void *)h);
+-	return 1;
+-}
+-void *dlopen(const char *file, int mode)
+-{
+-	error("Dynamic loading not supported");
+-	return 0;
+-}
+-void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
+-{
+-	error("Symbol not found: %s", s);
+-	return 0;
+-}
+-int __dladdr (const void *addr, Dl_info *info)
+-{
+-	return 0;
+-}
+-#endif
+-
+-int __dlinfo(void *dso, int req, void *res)
+-{
+-	if (invalid_dso_handle(dso)) return -1;
+-	if (req != RTLD_DI_LINKMAP) {
+-		error("Unsupported request %d", req);
+-		return -1;
+-	}
+-	*(struct link_map **)res = dso;
+-	return 0;
+-}
+-
+-char *dlerror()
+-{
+-	pthread_t self = __pthread_self();
+-	if (!self->dlerror_flag) return 0;
+-	self->dlerror_flag = 0;
+-	char *s = self->dlerror_buf;
+-	if (s == (void *)-1)
+-		return "Dynamic linker failed to allocate memory for error message";
+-	else
+-		return s;
+-}
+-
+-int dlclose(void *p)
+-{
+-	return invalid_dso_handle(p);
+-}
+-
+-void __dl_thread_cleanup(void)
+-{
+-	pthread_t self = __pthread_self();
+-	if (self->dlerror_buf != (void *)-1)
+-		free(self->dlerror_buf);
+-}
+-
+-static void error(const char *fmt, ...)
+-{
+-	va_list ap;
+-	va_start(ap, fmt);
+-#ifdef SHARED
+-	if (!runtime) {
+-		vdprintf(2, fmt, ap);
+-		dprintf(2, "\n");
+-		ldso_fail = 1;
+-		va_end(ap);
+-		return;
+-	}
+-#endif
+-	pthread_t self = __pthread_self();
+-	if (self->dlerror_buf != (void *)-1)
+-		free(self->dlerror_buf);
+-	size_t len = vsnprintf(0, 0, fmt, ap);
+-	va_end(ap);
+-	char *buf = malloc(len+1);
+-	if (buf) {
+-		va_start(ap, fmt);
+-		vsnprintf(buf, len+1, fmt, ap);
+-		va_end(ap);
+-	} else {
+-		buf = (void *)-1;	
+-	}
+-	self->dlerror_buf = buf;
+-	self->dlerror_flag = 1;
+-}
+--- a/src/ldso/tlsdesc.c
++++ b/src/ldso/tlsdesc.c
+@@ -1,5 +1,3 @@
+-#ifdef SHARED
+-
+ #include <stddef.h>
+ #include "libc.h"
+ 
+@@ -12,5 +10,3 @@ ptrdiff_t __tlsdesc_static()
+ }
+ 
+ weak_alias(__tlsdesc_static, __tlsdesc_dynamic);
+-
+-#endif
+--- a/src/legacy/utmpx.c
++++ b/src/legacy/utmpx.c
+@@ -1,5 +1,6 @@
+ #include <utmpx.h>
+ #include <stddef.h>
++#include <errno.h>
+ #include "libc.h"
+ 
+ void endutxent(void)
+@@ -34,6 +35,12 @@ void updwtmpx(const char *f, const struc
+ {
+ }
+ 
++int __utmpxname(const char *f)
++{
++	errno = ENOTSUP;
++	return -1;
++}
++
+ weak_alias(endutxent, endutent);
+ weak_alias(setutxent, setutent);
+ weak_alias(getutxent, getutent);
+@@ -41,3 +48,5 @@ weak_alias(getutxid, getutid);
+ weak_alias(getutxline, getutline);
+ weak_alias(pututxline, pututline);
+ weak_alias(updwtmpx, updwtmp);
++weak_alias(__utmpxname, utmpname);
++weak_alias(__utmpxname, utmpxname);
+--- /dev/null
++++ b/src/linux/x32/sysinfo.c
+@@ -0,0 +1,50 @@
++#include <sys/sysinfo.h>
++#include "syscall.h"
++#include "libc.h"
++
++#define klong long long
++#define kulong unsigned long long
++
++struct kernel_sysinfo {
++	klong uptime;
++	kulong loads[3];
++	kulong totalram;
++	kulong freeram;
++	kulong sharedram;
++	kulong bufferram;
++	kulong totalswap;
++	kulong freeswap;
++	short procs;
++	short pad;
++	kulong totalhigh;
++	kulong freehigh;
++	unsigned mem_unit;
++};
++
++int __lsysinfo(struct sysinfo *info)
++{
++	struct kernel_sysinfo tmp;
++	int ret = syscall(SYS_sysinfo, &tmp);
++	if(ret == -1) return ret;
++	info->uptime = tmp.uptime;
++	info->loads[0] = tmp.loads[0];
++	info->loads[1] = tmp.loads[1];
++	info->loads[2] = tmp.loads[2];
++	kulong shifts;
++	kulong max = tmp.totalram | tmp.totalswap;
++	__asm__("bsr %1,%0" : "=r"(shifts) : "r"(max));
++	shifts = shifts >= 32 ? shifts - 31 : 0;
++	info->totalram = tmp.totalram >> shifts;
++	info->freeram = tmp.freeram >> shifts;
++	info->sharedram = tmp.sharedram >> shifts;
++	info->bufferram = tmp.bufferram >> shifts;
++	info->totalswap = tmp.totalswap >> shifts;
++	info->freeswap = tmp.freeswap >> shifts;
++	info->procs = tmp.procs ;
++	info->totalhigh = tmp.totalhigh >> shifts;
++	info->freehigh = tmp.freehigh >> shifts;
++	info->mem_unit = (tmp.mem_unit ? tmp.mem_unit : 1) << shifts;
++	return ret;
++}
++
++weak_alias(__lsysinfo, sysinfo);
+--- a/src/linux/x32/sysinfo.s
++++ /dev/null
+@@ -1 +0,0 @@
+-# see arch/x32/src/sysinfo.c
+--- a/src/locale/langinfo.c
++++ b/src/locale/langinfo.c
+@@ -37,23 +37,23 @@ char *__nl_langinfo_l(nl_item item, loca
+ 	
+ 	switch (cat) {
+ 	case LC_NUMERIC:
+-		if (idx > 1) return NULL;
++		if (idx > 1) return "";
+ 		str = c_numeric;
+ 		break;
+ 	case LC_TIME:
+-		if (idx > 0x31) return NULL;
++		if (idx > 0x31) return "";
+ 		str = c_time;
+ 		break;
+ 	case LC_MONETARY:
+-		if (idx > 0) return NULL;
++		if (idx > 0) return "";
+ 		str = "";
+ 		break;
+ 	case LC_MESSAGES:
+-		if (idx > 3) return NULL;
++		if (idx > 3) return "";
+ 		str = c_messages;
+ 		break;
+ 	default:
+-		return NULL;
++		return "";
+ 	}
+ 
+ 	for (; idx; idx--, str++) for (; *str; str++);
+--- a/src/malloc/lite_malloc.c
++++ b/src/malloc/lite_malloc.c
+@@ -8,7 +8,7 @@
+ 
+ void *__expand_heap(size_t *);
+ 
+-void *__simple_malloc(size_t n)
++static void *__simple_malloc(size_t n)
+ {
+ 	static char *cur, *end;
+ 	static volatile int lock[2];
+--- a/src/math/__rem_pio2.c
++++ b/src/math/__rem_pio2.c
+@@ -118,7 +118,7 @@ int __rem_pio2(double x, double *y)
+ 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
+ medium:
+ 		/* rint(x/(pi/2)), Assume round-to-nearest. */
+-		fn = x*invpio2 + toint - toint;
++		fn = (double_t)x*invpio2 + toint - toint;
+ 		n = (int32_t)fn;
+ 		r = x - fn*pio2_1;
+ 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
+--- a/src/math/__rem_pio2f.c
++++ b/src/math/__rem_pio2f.c
+@@ -51,7 +51,7 @@ int __rem_pio2f(float x, double *y)
+ 	/* 25+53 bit pi is good enough for medium size */
+ 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
+ 		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
+-		fn = x*invpio2 + toint - toint;
++		fn = (double_t)x*invpio2 + toint - toint;
+ 		n  = (int32_t)fn;
+ 		*y = x - fn*pio2_1 - fn*pio2_1t;
+ 		return n;
+--- /dev/null
++++ b/src/math/arm/fabs.c
+@@ -0,0 +1,15 @@
++#include <math.h>
++
++#if __ARM_PCS_VFP
++
++double fabs(double x)
++{
++	__asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x));
++	return x;
++}
++
++#else
++
++#include "../fabs.c"
++
++#endif
+--- /dev/null
++++ b/src/math/arm/fabsf.c
+@@ -0,0 +1,15 @@
++#include <math.h>
++
++#if __ARM_PCS_VFP
++
++float fabsf(float x)
++{
++	__asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x));
++	return x;
++}
++
++#else
++
++#include "../fabsf.c"
++
++#endif
+--- /dev/null
++++ b/src/math/arm/sqrt.c
+@@ -0,0 +1,15 @@
++#include <math.h>
++
++#if __VFP_FP__ && !__SOFTFP__
++
++double sqrt(double x)
++{
++	__asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x));
++	return x;
++}
++
++#else
++
++#include "../sqrt.c"
++
++#endif
+--- /dev/null
++++ b/src/math/arm/sqrtf.c
+@@ -0,0 +1,15 @@
++#include <math.h>
++
++#if __VFP_FP__ && !__SOFTFP__
++
++float sqrtf(float x)
++{
++	__asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x));
++	return x;
++}
++
++#else
++
++#include "../sqrtf.c"
++
++#endif
+--- a/src/math/armebhf/fabs.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armhf/fabs.s
+--- a/src/math/armebhf/fabsf.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armhf/fabsf.s
+--- a/src/math/armebhf/sqrt.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armhf/sqrt.s
+--- a/src/math/armebhf/sqrtf.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armhf/sqrtf.s
+--- a/src/math/armhf/fabs.s
++++ /dev/null
+@@ -1,7 +0,0 @@
+-.fpu vfp
+-.text
+-.global fabs
+-.type   fabs,%function
+-fabs:
+-	vabs.f64 d0, d0
+-	bx lr
+--- a/src/math/armhf/fabs.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-fabs.s
+--- a/src/math/armhf/fabsf.s
++++ /dev/null
+@@ -1,7 +0,0 @@
+-.fpu vfp
+-.text
+-.global fabsf
+-.type   fabsf,%function
+-fabsf:
+-	vabs.f32 s0, s0
+-	bx lr
+--- a/src/math/armhf/fabsf.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-fabsf.s
+--- a/src/math/armhf/sqrt.s
++++ /dev/null
+@@ -1,7 +0,0 @@
+-.fpu vfp
+-.text
+-.global sqrt
+-.type   sqrt,%function
+-sqrt:
+-	vsqrt.f64 d0, d0
+-	bx lr
+--- a/src/math/armhf/sqrt.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-sqrt.s
+--- a/src/math/armhf/sqrtf.s
++++ /dev/null
+@@ -1,7 +0,0 @@
+-.fpu vfp
+-.text
+-.global sqrtf
+-.type   sqrtf,%function
+-sqrtf:
+-	vsqrt.f32 s0, s0
+-	bx lr
+--- a/src/math/armhf/sqrtf.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-sqrtf.s
+--- a/src/math/hypot.c
++++ b/src/math/hypot.c
+@@ -12,10 +12,10 @@ static void sq(double_t *hi, double_t *l
+ {
+ 	double_t xh, xl, xc;
+ 
+-	xc = x*SPLIT;
++	xc = (double_t)x*SPLIT;
+ 	xh = x - xc + xc;
+ 	xl = x - xh;
+-	*hi = x*x;
++	*hi = (double_t)x*x;
+ 	*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
+ }
+ 
+--- a/src/mman/mremap.c
++++ b/src/mman/mremap.c
+@@ -1,17 +1,31 @@
++#define _GNU_SOURCE
+ #include <unistd.h>
+ #include <sys/mman.h>
++#include <errno.h>
++#include <stdint.h>
+ #include <stdarg.h>
+ #include "syscall.h"
+ #include "libc.h"
+ 
++static void dummy(void) { }
++weak_alias(dummy, __vm_wait);
++
+ void *__mremap(void *old_addr, size_t old_len, size_t new_len, int flags, ...)
+ {
+ 	va_list ap;
+-	void *new_addr;
+-	
+-	va_start(ap, flags);
+-	new_addr = va_arg(ap, void *);
+-	va_end(ap);
++	void *new_addr = 0;
++
++	if (new_len >= PTRDIFF_MAX) {
++		errno = ENOMEM;
++		return MAP_FAILED;
++	}
++
++	if (flags & MREMAP_FIXED) {
++		__vm_wait();
++		va_start(ap, flags);
++		new_addr = va_arg(ap, void *);
++		va_end(ap);
++	}
+ 
+ 	return (void *)syscall(SYS_mremap, old_addr, old_len, new_len, flags, new_addr);
+ }
+--- a/src/network/getifaddrs.c
++++ b/src/network/getifaddrs.c
+@@ -162,13 +162,26 @@ static int netlink_msg_to_ifaddr(void *p
+ 		for (rta = NLMSG_RTA(h, sizeof(*ifa)); NLMSG_RTAOK(rta, h); rta = RTA_NEXT(rta)) {
+ 			switch (rta->rta_type) {
+ 			case IFA_ADDRESS:
+-				copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
++				/* If ifa_addr is already set we, received an IFA_LOCAL before
++				 * so treat this as destination address */
++				if (ifs->ifa.ifa_addr)
++					copy_addr(&ifs->ifa.ifa_dstaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
++				else
++					copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
+ 				break;
+ 			case IFA_BROADCAST:
+-				/* For point-to-point links this is peer, but ifa_broadaddr
+-				 * and ifa_dstaddr are union, so this works for both.  */
+ 				copy_addr(&ifs->ifa.ifa_broadaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
+ 				break;
++			case IFA_LOCAL:
++				/* If ifa_addr is set and we get IFA_LOCAL, assume we have
++				 * a point-to-point network. Move address to correct field. */
++				if (ifs->ifa.ifa_addr) {
++					ifs->ifu = ifs->addr;
++					ifs->ifa.ifa_dstaddr = &ifs->ifu.sa;
++					memset(&ifs->addr, 0, sizeof(ifs->addr));
++				}
++				copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
++				break;
+ 			case IFA_LABEL:
+ 				if (RTA_DATALEN(rta) < sizeof(ifs->name)) {
+ 					memcpy(ifs->name, RTA_DATA(rta), RTA_DATALEN(rta));
+--- a/src/network/getnameinfo.c
++++ b/src/network/getnameinfo.c
+@@ -135,13 +135,13 @@ int getnameinfo(const struct sockaddr *r
+ 	switch (af) {
+ 	case AF_INET:
+ 		a = (void *)&((struct sockaddr_in *)sa)->sin_addr;
+-		if (sl != sizeof(struct sockaddr_in)) return EAI_FAMILY;
++		if (sl < sizeof(struct sockaddr_in)) return EAI_FAMILY;
+ 		mkptr4(ptr, a);
+ 		scopeid = 0;
+ 		break;
+ 	case AF_INET6:
+ 		a = (void *)&((struct sockaddr_in6 *)sa)->sin6_addr;
+-		if (sl != sizeof(struct sockaddr_in6)) return EAI_FAMILY;
++		if (sl < sizeof(struct sockaddr_in6)) return EAI_FAMILY;
+ 		if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12))
+ 			mkptr6(ptr, a);
+ 		else
+--- a/src/network/if_nametoindex.c
++++ b/src/network/if_nametoindex.c
+@@ -10,7 +10,7 @@ unsigned if_nametoindex(const char *name
+ 	struct ifreq ifr;
+ 	int fd, r;
+ 
+-	if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return -1;
++	if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return 0;
+ 	strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
+ 	r = ioctl(fd, SIOCGIFINDEX, &ifr);
+ 	__syscall(SYS_close, fd);
+--- a/src/network/lookup.h
++++ b/src/network/lookup.h
+@@ -2,6 +2,7 @@
+ #define LOOKUP_H
+ 
+ #include <stdint.h>
++#include <stddef.h>
+ 
+ struct address {
+ 	int family;
+@@ -15,6 +16,14 @@ struct service {
+ 	unsigned char proto, socktype;
+ };
+ 
++#define MAXNS 3
++
++struct resolvconf {
++	struct address ns[MAXNS];
++	unsigned nns, attempts, ndots;
++	unsigned timeout;
++};
++
+ /* The limit of 48 results is a non-sharp bound on the number of addresses
+  * that can fit in one 512-byte DNS packet full of v4 results and a second
+  * packet full of v6 results. Due to headers, the actual limit is lower. */
+@@ -25,4 +34,6 @@ int __lookup_serv(struct service buf[sta
+ int __lookup_name(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, int flags);
+ int __lookup_ipliteral(struct address buf[static 1], const char *name, int family);
+ 
++int __get_resolv_conf(struct resolvconf *, char *, size_t);
++
+ #endif
+--- a/src/network/lookup_name.c
++++ b/src/network/lookup_name.c
+@@ -9,6 +9,7 @@
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <pthread.h>
++#include <errno.h>
+ #include "lookup.h"
+ #include "stdio_impl.h"
+ #include "syscall.h"
+@@ -51,7 +52,14 @@ static int name_from_hosts(struct addres
+ 	int cnt = 0;
+ 	unsigned char _buf[1032];
+ 	FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf);
+-	if (!f) return 0;
++	if (!f) switch (errno) {
++	case ENOENT:
++	case ENOTDIR:
++	case EACCES:
++		return 0;
++	default:
++		return EAI_SYSTEM;
++	}
+ 	while (fgets(line, sizeof line, f) && cnt < MAXADDRS) {
+ 		char *p, *z;
+ 
+@@ -85,7 +93,7 @@ struct dpc_ctx {
+ int __dns_parse(const unsigned char *, int, int (*)(void *, int, const void *, int, const void *), void *);
+ int __dn_expand(const unsigned char *, const unsigned char *, const unsigned char *, char *, int);
+ int __res_mkquery(int, const char *, int, int, const unsigned char *, int, const unsigned char*, unsigned char *, int);
+-int __res_msend(int, const unsigned char *const *, const int *, unsigned char *const *, int *, int);
++int __res_msend_rc(int, const unsigned char *const *, const int *, unsigned char *const *, int *, int, const struct resolvconf *);
+ 
+ #define RR_A 1
+ #define RR_CNAME 5
+@@ -117,7 +125,7 @@ static int dns_parse_callback(void *c, i
+ 	return 0;
+ }
+ 
+-static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family)
++static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, const struct resolvconf *conf)
+ {
+ 	unsigned char qbuf[2][280], abuf[2][512];
+ 	const unsigned char *qp[2] = { qbuf[0], qbuf[1] };
+@@ -137,17 +145,59 @@ static int name_from_dns(struct address
+ 		nq++;
+ 	}
+ 
+-	if (__res_msend(nq, qp, qlens, ap, alens, sizeof *abuf) < 0) return EAI_SYSTEM;
++	if (__res_msend_rc(nq, qp, qlens, ap, alens, sizeof *abuf, conf) < 0)
++		return EAI_SYSTEM;
+ 
+ 	for (i=0; i<nq; i++)
+ 		__dns_parse(abuf[i], alens[i], dns_parse_callback, &ctx);
+ 
+ 	if (ctx.cnt) return ctx.cnt;
+ 	if (alens[0] < 4 || (abuf[0][3] & 15) == 2) return EAI_AGAIN;
+-	if ((abuf[0][3] & 15) == 3) return EAI_NONAME;
++	if ((abuf[0][3] & 15) == 0) return EAI_NONAME;
++	if ((abuf[0][3] & 15) == 3) return 0;
+ 	return EAI_FAIL;
+ }
+ 
++static int name_from_dns_search(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family)
++{
++	char search[256];
++	struct resolvconf conf;
++	size_t l, dots;
++	char *p, *z;
++
++	if (__get_resolv_conf(&conf, search, sizeof search) < 0) return -1;
++
++	/* Count dots, suppress search when >=ndots or name ends in
++	 * a dot, which is an explicit request for global scope. */
++	for (dots=l=0; name[l]; l++) if (name[l]=='.') dots++;
++	if (dots >= conf.ndots || name[l-1]=='.') *search = 0;
++
++	/* This can never happen; the caller already checked length. */
++	if (l >= 256) return EAI_NONAME;
++
++	/* Name with search domain appended is setup in canon[]. This both
++	 * provides the desired default canonical name (if the requested
++	 * name is not a CNAME record) and serves as a buffer for passing
++	 * the full requested name to name_from_dns. */
++	memcpy(canon, name, l);
++	canon[l] = '.';
++
++	for (p=search; *p; p=z) {
++		for (; isspace(*p); p++);
++		for (z=p; *z && !isspace(*z); z++);
++		if (z==p) break;
++		if (z-p < 256 - l - 1) {
++			memcpy(canon+l+1, p, z-p);
++			canon[z-p+1+l] = 0;
++			int cnt = name_from_dns(buf, canon, canon, family, &conf);
++			if (cnt) return cnt;
++		}
++	}
++
++	canon[l] = 0;
++	return name_from_dns(buf, canon, name, family, &conf);
++}
++
+ static const struct policy {
+ 	unsigned char addr[16];
+ 	unsigned char len, mask;
+@@ -248,7 +298,7 @@ int __lookup_name(struct address buf[sta
+ 	if (!cnt) cnt = name_from_numeric(buf, name, family);
+ 	if (!cnt && !(flags & AI_NUMERICHOST)) {
+ 		cnt = name_from_hosts(buf, canon, name, family);
+-		if (!cnt) cnt = name_from_dns(buf, canon, name, family);
++		if (!cnt) cnt = name_from_dns_search(buf, canon, name, family);
+ 	}
+ 	if (cnt<=0) return cnt ? cnt : EAI_NONAME;
+ 
+--- a/src/network/lookup_serv.c
++++ b/src/network/lookup_serv.c
+@@ -4,6 +4,7 @@
+ #include <ctype.h>
+ #include <string.h>
+ #include <fcntl.h>
++#include <errno.h>
+ #include "lookup.h"
+ #include "stdio_impl.h"
+ 
+@@ -69,7 +70,14 @@ int __lookup_serv(struct service buf[sta
+ 
+ 	unsigned char _buf[1032];
+ 	FILE _f, *f = __fopen_rb_ca("/etc/services", &_f, _buf, sizeof _buf);
+-	if (!f) return EAI_SERVICE;
++	if (!f) switch (errno) {
++	case ENOENT:
++	case ENOTDIR:
++	case EACCES:
++		return EAI_SERVICE;
++	default:
++		return EAI_SYSTEM;
++	}
+ 
+ 	while (fgets(line, sizeof line, f) && cnt < MAXSERVS) {
+ 		if ((p=strchr(line, '#'))) *p++='\n', *p=0;
+--- a/src/network/proto.c
++++ b/src/network/proto.c
+@@ -9,21 +9,36 @@ static const unsigned char protos[] = {
+ 	"\001icmp\0"
+ 	"\002igmp\0"
+ 	"\003ggp\0"
++	"\004ipencap\0"
++	"\005st\0"
+ 	"\006tcp\0"
++	"\008egp\0"
+ 	"\014pup\0"
+ 	"\021udp\0"
+-	"\026idp\0"
++	"\024hmp\0"
++	"\026xns-idp\0"
++	"\033rdp\0"
++	"\035iso-tp4\0"
++	"\044xtp\0"
++	"\045ddp\0"
++	"\046idpr-cmtp\0"
+ 	"\051ipv6\0"
+ 	"\053ipv6-route\0"
+ 	"\054ipv6-frag\0"
++	"\055idrp\0"
++	"\056rsvp\0"
+ 	"\057gre\0"
+ 	"\062esp\0"
+ 	"\063ah\0"
++	"\071skip\0"
+ 	"\072ipv6-icmp\0"
+ 	"\073ipv6-nonxt\0"
+ 	"\074ipv6-opts\0"
++	"\111rspf\0"
++	"\121vmtp\0"
+ 	"\131ospf\0"
+ 	"\136ipip\0"
++	"\142encap\0"
+ 	"\147pim\0"
+ 	"\377raw"
+ };
+--- a/src/network/res_msend.c
++++ b/src/network/res_msend.c
+@@ -27,18 +27,16 @@ static unsigned long mtime()
+ 		+ ts.tv_nsec / 1000000;
+ }
+ 
+-int __res_msend(int nqueries, const unsigned char *const *queries,
+-	const int *qlens, unsigned char *const *answers, int *alens, int asize)
++int __res_msend_rc(int nqueries, const unsigned char *const *queries,
++	const int *qlens, unsigned char *const *answers, int *alens, int asize,
++	const struct resolvconf *conf)
+ {
+ 	int fd;
+-	FILE *f, _f;
+-	unsigned char _buf[256];
+-	char line[64], *s, *z;
+-	int timeout = 5000, attempts = 2, retry_interval, servfail_retry;
++	int timeout, attempts, retry_interval, servfail_retry;
+ 	union {
+ 		struct sockaddr_in sin;
+ 		struct sockaddr_in6 sin6;
+-	} sa = {0}, ns[3] = {{0}};
++	} sa = {0}, ns[MAXNS] = {{0}};
+ 	socklen_t sl = sizeof sa.sin;
+ 	int nns = 0;
+ 	int family = AF_INET;
+@@ -48,57 +46,27 @@ int __res_msend(int nqueries, const unsi
+ 	int cs;
+ 	struct pollfd pfd;
+ 	unsigned long t0, t1, t2;
+-	struct address iplit;
+ 
+ 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+ 
+-	/* Get nameservers from resolv.conf, fallback to localhost */
+-	f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf);
+-	if (f) for (nns=0; nns<3 && fgets(line, sizeof line, f); ) {
+-		if (!strncmp(line, "options", 7) && isspace(line[7])) {
+-			unsigned long x;
+-			char *p, *z;
+-			p = strstr(line, "timeout:");
+-			if (p && isdigit(p[8])) {
+-				p += 8;
+-				x = strtoul(p, &z, 10);
+-				if (z != p) timeout = x < 30 ? x*1000 : 30000;
+-			}
+-			p = strstr(line, "attempts:");
+-			if (p && isdigit(p[9])) {
+-				p += 9;
+-				x = strtoul(p, &z, 10);
+-				if (z != p) attempts = x < 10 ? x : 10;
+-				if (!attempts) attempts = 1;
+-			}
+-		}
+-		if (strncmp(line, "nameserver", 10) || !isspace(line[10]))
+-			continue;
+-		for (s=line+11; isspace(*s); s++);
+-		for (z=s; *z && !isspace(*z); z++);
+-		*z=0;
++	timeout = 1000*conf->timeout;
++	attempts = conf->attempts;
+ 
+-		if (__lookup_ipliteral(&iplit, s, AF_UNSPEC)>0) {
+-			if (iplit.family == AF_INET) {
+-				memcpy(&ns[nns].sin.sin_addr, iplit.addr, 4);
+-				ns[nns].sin.sin_port = htons(53);
+-				ns[nns++].sin.sin_family = AF_INET;
+-			} else {
+-				sl = sizeof sa.sin6;
+-				memcpy(&ns[nns].sin6.sin6_addr, iplit.addr, 16);
+-				ns[nns].sin6.sin6_port = htons(53);
+-				ns[nns].sin6.sin6_scope_id = iplit.scopeid;
+-				ns[nns++].sin6.sin6_family = family = AF_INET6;
+-			}
++	nns = conf->nns;
++	for (nns=0; nns<conf->nns; nns++) {
++		const struct address *iplit = &conf->ns[nns];
++		if (iplit->family == AF_INET) {
++			memcpy(&ns[nns].sin.sin_addr, iplit->addr, 4);
++			ns[nns].sin.sin_port = htons(53);
++			ns[nns].sin.sin_family = AF_INET;
++		} else {
++			sl = sizeof sa.sin6;
++			memcpy(&ns[nns].sin6.sin6_addr, iplit->addr, 16);
++			ns[nns].sin6.sin6_port = htons(53);
++			ns[nns].sin6.sin6_scope_id = iplit->scopeid;
++			ns[nns].sin6.sin6_family = family = AF_INET6;
+ 		}
+ 	}
+-	if (f) __fclose_ca(f);
+-	if (!nns) {
+-		ns[0].sin.sin_family = AF_INET;
+-		ns[0].sin.sin_port = htons(53);
+-		ns[0].sin.sin_addr.s_addr = htonl(0x7f000001);
+-		nns=1;
+-	}
+ 
+ 	/* Get local address and open/bind a socket */
+ 	sa.sin.sin_family = family;
+@@ -207,3 +175,11 @@ out:
+ 
+ 	return 0;
+ }
++
++int __res_msend(int nqueries, const unsigned char *const *queries,
++	const int *qlens, unsigned char *const *answers, int *alens, int asize)
++{
++	struct resolvconf conf;
++	if (__get_resolv_conf(&conf, 0, 0) < 0) return -1;
++	return __res_msend_rc(nqueries, queries, qlens, answers, alens, asize, &conf);
++}
+--- /dev/null
++++ b/src/network/resolvconf.c
+@@ -0,0 +1,93 @@
++#include "lookup.h"
++#include "stdio_impl.h"
++#include <ctype.h>
++#include <errno.h>
++#include <string.h>
++#include <netinet/in.h>
++
++int __get_resolv_conf(struct resolvconf *conf, char *search, size_t search_sz)
++{
++	char line[256];
++	unsigned char _buf[256];
++	FILE *f, _f;
++	int nns = 0;
++
++	conf->ndots = 1;
++	conf->timeout = 5;
++	conf->attempts = 2;
++	if (search) *search = 0;
++
++	f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf);
++	if (!f) switch (errno) {
++	case ENOENT:
++	case ENOTDIR:
++	case EACCES:
++		goto no_resolv_conf;
++	default:
++		return -1;
++	}
++
++	while (fgets(line, sizeof line, f)) {
++		char *p, *z;
++		if (!strchr(line, '\n') && !feof(f)) {
++			/* Ignore lines that get truncated rather than
++			 * potentially misinterpreting them. */
++			int c;
++			do c = getc(f);
++			while (c != '\n' && c != EOF);
++			continue;
++		}
++		if (!strncmp(line, "options", 7) && isspace(line[7])) {
++			p = strstr(line, "ndots:");
++			if (p && isdigit(p[6])) {
++				p += 6;
++				unsigned long x = strtoul(p, &z, 10);
++				if (z != p) conf->ndots = x > 15 ? 15 : x;
++			}
++			p = strstr(line, "attempts:");
++			if (p && isdigit(p[6])) {
++				p += 6;
++				unsigned long x = strtoul(p, &z, 10);
++				if (z != p) conf->attempts = x > 10 ? 10 : x;
++			}
++			p = strstr(line, "timeout:");
++			if (p && (isdigit(p[8]) || p[8]=='.')) {
++				p += 8;
++				unsigned long x = strtoul(p, &z, 10);
++				if (z != p) conf->timeout = x > 60 ? 60 : x;
++			}
++			continue;
++		}
++		if (!strncmp(line, "nameserver", 10) && isspace(line[10])) {
++			if (nns >= MAXNS) continue;
++			for (p=line+11; isspace(*p); p++);
++			for (z=p; *z && !isspace(*z); z++);
++			*z=0;
++			if (__lookup_ipliteral(conf->ns+nns, p, AF_UNSPEC) > 0)
++				nns++;
++			continue;
++		}
++
++		if (!search) continue;
++		if ((strncmp(line, "domain", 6) && strncmp(line, "search", 6))
++		    || !isspace(line[6]))
++			continue;
++		for (p=line+7; isspace(*p); p++);
++		size_t l = strlen(p);
++		/* This can never happen anyway with chosen buffer sizes. */
++		if (l >= search_sz) continue;
++		memcpy(search, p, l+1);
++	}
++
++	__fclose_ca(f);
++
++no_resolv_conf:
++	if (!nns) {
++		__lookup_ipliteral(conf->ns, "127.0.0.1", AF_UNSPEC);
++		nns = 1;
++	}
++
++	conf->nns = nns;
++
++	return 0;
++}
+--- a/src/search/tsearch_avl.c
++++ b/src/search/tsearch_avl.c
+@@ -77,38 +77,45 @@ static struct node *find(struct node *n,
+ 		return find(n->right, k, cmp);
+ }
+ 
+-static struct node *insert(struct node **n, const void *k,
+-	int (*cmp)(const void *, const void *), int *new)
++static struct node *insert(struct node *n, const void *k,
++	int (*cmp)(const void *, const void *), struct node **found)
+ {
+-	struct node *r = *n;
++	struct node *r;
+ 	int c;
+ 
+-	if (!r) {
+-		*n = r = malloc(sizeof **n);
+-		if (r) {
+-			r->key = k;
+-			r->left = r->right = 0;
+-			r->height = 1;
++	if (!n) {
++		n = malloc(sizeof *n);
++		if (n) {
++			n->key = k;
++			n->left = n->right = 0;
++			n->height = 1;
+ 		}
+-		*new = 1;
+-		return r;
++		*found = n;
++		return n;
++	}
++	c = cmp(k, n->key);
++	if (c == 0) {
++		*found = n;
++		return 0;
++	}
++	r = insert(c < 0 ? n->left : n->right, k, cmp, found);
++	if (r) {
++		if (c < 0)
++			n->left = r;
++		else
++			n->right = r;
++		r = balance(n);
+ 	}
+-	c = cmp(k, r->key);
+-	if (c == 0)
+-		return r;
+-	if (c < 0)
+-		r = insert(&r->left, k, cmp, new);
+-	else
+-		r = insert(&r->right, k, cmp, new);
+-	if (*new)
+-		*n = balance(*n);
+ 	return r;
+ }
+ 
+-static struct node *movr(struct node *n, struct node *r) {
+-	if (!n)
+-		return r;
+-	n->right = movr(n->right, r);
++static struct node *remove_rightmost(struct node *n, struct node **rightmost)
++{
++	if (!n->right) {
++		*rightmost = n;
++		return n->left;
++	}
++	n->right = remove_rightmost(n->right, rightmost);
+ 	return balance(n);
+ }
+ 
+@@ -122,7 +129,13 @@ static struct node *remove(struct node *
+ 	c = cmp(k, (*n)->key);
+ 	if (c == 0) {
+ 		struct node *r = *n;
+-		*n = movr(r->left, r->right);
++		if (r->left) {
++			r->left = remove_rightmost(r->left, n);
++			(*n)->left = r->left;
++			(*n)->right = r->right;
++			*n = balance(*n);
++		} else
++			*n = r->right;
+ 		free(r);
+ 		return parent;
+ 	}
+@@ -138,6 +151,8 @@ static struct node *remove(struct node *
+ void *tdelete(const void *restrict key, void **restrict rootp,
+ 	int(*compar)(const void *, const void *))
+ {
++	if (!rootp)
++		return 0;
+ 	struct node *n = *rootp;
+ 	struct node *ret;
+ 	/* last argument is arbitrary non-null pointer
+@@ -150,17 +165,21 @@ void *tdelete(const void *restrict key,
+ void *tfind(const void *key, void *const *rootp,
+ 	int(*compar)(const void *, const void *))
+ {
++	if (!rootp)
++		return 0;
+ 	return find(*rootp, key, compar);
+ }
+ 
+ void *tsearch(const void *key, void **rootp,
+ 	int (*compar)(const void *, const void *))
+ {
+-	int new = 0;
+-	struct node *n = *rootp;
++	struct node *update;
+ 	struct node *ret;
+-	ret = insert(&n, key, compar, &new);
+-	*rootp = n;
++	if (!rootp)
++		return 0;
++	update = insert(*rootp, key, compar, &ret);
++	if (update)
++		*rootp = update;
+ 	return ret;
+ }
+ 
+--- a/src/setjmp/arm/longjmp.s
++++ b/src/setjmp/arm/longjmp.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .global _longjmp
+ .global longjmp
+ .type _longjmp,%function
+@@ -20,7 +21,11 @@ longjmp:
+ 	ldc p2, cr4, [ip], #48
+ 2:	tst r1,#0x40
+ 	beq 2f
+-	.word 0xecbc8b10 /* vldmia ip!, {d8-d15} */
++	.fpu vfp
++	vldmia ip!, {d8-d15}
++	.fpu softvfp
++	.eabi_attribute 10, 0
++	.eabi_attribute 27, 0
+ 2:	tst r1,#0x200
+ 	beq 3f
+ 	ldcl p1, cr10, [ip], #8
+@@ -29,9 +34,7 @@ longjmp:
+ 	ldcl p1, cr13, [ip], #8
+ 	ldcl p1, cr14, [ip], #8
+ 	ldcl p1, cr15, [ip], #8
+-3:	tst lr,#1
+-	moveq pc,lr
+-	bx lr
++3:	bx lr
+ 
+ .hidden __hwcap
+ 1:	.word __hwcap-1b
+--- a/src/setjmp/arm/setjmp.s
++++ b/src/setjmp/arm/setjmp.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .global __setjmp
+ .global _setjmp
+ .global setjmp
+@@ -22,7 +23,11 @@ setjmp:
+ 	stc p2, cr4, [ip], #48
+ 2:	tst r1,#0x40
+ 	beq 2f
+-	.word 0xecac8b10 /* vstmia ip!, {d8-d15} */
++	.fpu vfp
++	vstmia ip!, {d8-d15}
++	.fpu softvfp
++	.eabi_attribute 10, 0
++	.eabi_attribute 27, 0
+ 2:	tst r1,#0x200
+ 	beq 3f
+ 	stcl p1, cr10, [ip], #8
+@@ -31,9 +36,7 @@ setjmp:
+ 	stcl p1, cr13, [ip], #8
+ 	stcl p1, cr14, [ip], #8
+ 	stcl p1, cr15, [ip], #8
+-3:	tst lr,#1
+-	moveq pc,lr
+-	bx lr
++3:	bx lr
+ 
+ .hidden __hwcap
+ 1:	.word __hwcap-1b
+--- a/src/setjmp/mips-sf/longjmp.s
++++ /dev/null
+@@ -1,25 +0,0 @@
+-.set noreorder
+-
+-.global _longjmp
+-.global longjmp
+-.type   _longjmp,@function
+-.type   longjmp,@function
+-_longjmp:
+-longjmp:
+-	move    $2, $5
+-	bne     $2, $0, 1f
+-	nop
+-	addu    $2, $2, 1
+-1:	lw      $ra,  0($4)
+-	lw      $sp,  4($4)
+-	lw      $16,  8($4)
+-	lw      $17, 12($4)
+-	lw      $18, 16($4)
+-	lw      $19, 20($4)
+-	lw      $20, 24($4)
+-	lw      $21, 28($4)
+-	lw      $22, 32($4)
+-	lw      $23, 36($4)
+-	lw      $30, 40($4)
+-	jr      $ra
+-	lw      $28, 44($4)
+--- a/src/setjmp/mips-sf/longjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-longjmp.s
+--- a/src/setjmp/mips-sf/setjmp.s
++++ /dev/null
+@@ -1,25 +0,0 @@
+-.set noreorder
+-
+-.global __setjmp
+-.global _setjmp
+-.global setjmp
+-.type   __setjmp,@function
+-.type   _setjmp,@function
+-.type   setjmp,@function
+-__setjmp:
+-_setjmp:
+-setjmp:
+-	sw      $ra,  0($4)
+-	sw      $sp,  4($4)
+-	sw      $16,  8($4)
+-	sw      $17, 12($4)
+-	sw      $18, 16($4)
+-	sw      $19, 20($4)
+-	sw      $20, 24($4)
+-	sw      $21, 28($4)
+-	sw      $22, 32($4)
+-	sw      $23, 36($4)
+-	sw      $30, 40($4)
+-	sw      $28, 44($4)
+-	jr      $ra
+-	li      $2, 0
+--- a/src/setjmp/mips-sf/setjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-setjmp.s
+--- /dev/null
++++ b/src/setjmp/mips/longjmp.S
+@@ -0,0 +1,40 @@
++.set noreorder
++
++.global _longjmp
++.global longjmp
++.type   _longjmp,@function
++.type   longjmp,@function
++_longjmp:
++longjmp:
++	move    $2, $5
++	bne     $2, $0, 1f
++	nop
++	addu    $2, $2, 1
++1:
++#ifndef __mips_soft_float
++	lwc1    $20, 56($4)
++	lwc1    $21, 60($4)
++	lwc1    $22, 64($4)
++	lwc1    $23, 68($4)
++	lwc1    $24, 72($4)
++	lwc1    $25, 76($4)
++	lwc1    $26, 80($4)
++	lwc1    $27, 84($4)
++	lwc1    $28, 88($4)
++	lwc1    $29, 92($4)
++	lwc1    $30, 96($4)
++	lwc1    $31, 100($4)
++#endif
++	lw      $ra,  0($4)
++	lw      $sp,  4($4)
++	lw      $16,  8($4)
++	lw      $17, 12($4)
++	lw      $18, 16($4)
++	lw      $19, 20($4)
++	lw      $20, 24($4)
++	lw      $21, 28($4)
++	lw      $22, 32($4)
++	lw      $23, 36($4)
++	lw      $30, 40($4)
++	jr      $ra
++	lw      $28, 44($4)
+--- a/src/setjmp/mips/longjmp.s
++++ /dev/null
+@@ -1,37 +0,0 @@
+-.set noreorder
+-
+-.global _longjmp
+-.global longjmp
+-.type   _longjmp,@function
+-.type   longjmp,@function
+-_longjmp:
+-longjmp:
+-	move    $2, $5
+-	bne     $2, $0, 1f
+-	nop
+-	addu    $2, $2, 1
+-1:	lwc1    $20, 56($4)
+-	lwc1    $21, 60($4)
+-	lwc1    $22, 64($4)
+-	lwc1    $23, 68($4)
+-	lwc1    $24, 72($4)
+-	lwc1    $25, 76($4)
+-	lwc1    $26, 80($4)
+-	lwc1    $27, 84($4)
+-	lwc1    $28, 88($4)
+-	lwc1    $29, 92($4)
+-	lwc1    $30, 96($4)
+-	lwc1    $31, 100($4)
+-	lw      $ra,  0($4)
+-	lw      $sp,  4($4)
+-	lw      $16,  8($4)
+-	lw      $17, 12($4)
+-	lw      $18, 16($4)
+-	lw      $19, 20($4)
+-	lw      $20, 24($4)
+-	lw      $21, 28($4)
+-	lw      $22, 32($4)
+-	lw      $23, 36($4)
+-	lw      $30, 40($4)
+-	jr      $ra
+-	lw      $28, 44($4)
+--- /dev/null
++++ b/src/setjmp/mips/setjmp.S
+@@ -0,0 +1,39 @@
++.set noreorder
++
++.global __setjmp
++.global _setjmp
++.global setjmp
++.type   __setjmp,@function
++.type   _setjmp,@function
++.type   setjmp,@function
++__setjmp:
++_setjmp:
++setjmp:
++	sw      $ra,  0($4)
++	sw      $sp,  4($4)
++	sw      $16,  8($4)
++	sw      $17, 12($4)
++	sw      $18, 16($4)
++	sw      $19, 20($4)
++	sw      $20, 24($4)
++	sw      $21, 28($4)
++	sw      $22, 32($4)
++	sw      $23, 36($4)
++	sw      $30, 40($4)
++	sw      $28, 44($4)
++#ifndef __mips_soft_float
++	swc1    $20, 56($4)
++	swc1    $21, 60($4)
++	swc1    $22, 64($4)
++	swc1    $23, 68($4)
++	swc1    $24, 72($4)
++	swc1    $25, 76($4)
++	swc1    $26, 80($4)
++	swc1    $27, 84($4)
++	swc1    $28, 88($4)
++	swc1    $29, 92($4)
++	swc1    $30, 96($4)
++	swc1    $31, 100($4)
++#endif
++	jr      $ra
++	li      $2, 0
+--- a/src/setjmp/mips/setjmp.s
++++ /dev/null
+@@ -1,37 +0,0 @@
+-.set noreorder
+-
+-.global __setjmp
+-.global _setjmp
+-.global setjmp
+-.type   __setjmp,@function
+-.type   _setjmp,@function
+-.type   setjmp,@function
+-__setjmp:
+-_setjmp:
+-setjmp:
+-	sw      $ra,  0($4)
+-	sw      $sp,  4($4)
+-	sw      $16,  8($4)
+-	sw      $17, 12($4)
+-	sw      $18, 16($4)
+-	sw      $19, 20($4)
+-	sw      $20, 24($4)
+-	sw      $21, 28($4)
+-	sw      $22, 32($4)
+-	sw      $23, 36($4)
+-	sw      $30, 40($4)
+-	sw      $28, 44($4)
+-	swc1    $20, 56($4)
+-	swc1    $21, 60($4)
+-	swc1    $22, 64($4)
+-	swc1    $23, 68($4)
+-	swc1    $24, 72($4)
+-	swc1    $25, 76($4)
+-	swc1    $26, 80($4)
+-	swc1    $27, 84($4)
+-	swc1    $28, 88($4)
+-	swc1    $29, 92($4)
+-	swc1    $30, 96($4)
+-	swc1    $31, 100($4)
+-	jr      $ra
+-	li      $2, 0
+--- a/src/setjmp/mipsel-sf/longjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../mips-sf/longjmp.s
+--- a/src/setjmp/mipsel-sf/setjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../mips-sf/setjmp.s
+--- a/src/setjmp/sh-nofpu/longjmp.s
++++ /dev/null
+@@ -1,22 +0,0 @@
+-.global _longjmp
+-.global longjmp
+-.type   _longjmp, @function
+-.type   longjmp,  @function
+-_longjmp:
+-longjmp:
+-	mov.l  @r4+, r8
+-	mov.l  @r4+, r9
+-	mov.l  @r4+, r10
+-	mov.l  @r4+, r11
+-	mov.l  @r4+, r12
+-	mov.l  @r4+, r13
+-	mov.l  @r4+, r14
+-	mov.l  @r4+, r15
+-	lds.l  @r4+, pr
+-
+-	tst  r5, r5
+-	movt r0
+-	add  r5, r0
+-
+-	rts
+-	 nop
+--- a/src/setjmp/sh-nofpu/longjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-longjmp.s
+--- a/src/setjmp/sh-nofpu/setjmp.s
++++ /dev/null
+@@ -1,24 +0,0 @@
+-.global ___setjmp
+-.hidden ___setjmp
+-.global __setjmp
+-.global _setjmp
+-.global setjmp
+-.type   __setjmp, @function
+-.type   _setjmp,  @function
+-.type   setjmp,   @function
+-___setjmp:
+-__setjmp:
+-_setjmp:
+-setjmp:
+-	add   #36, r4
+-	sts.l  pr,   @-r4
+-	mov.l  r15   @-r4
+-	mov.l  r14,  @-r4
+-	mov.l  r13,  @-r4
+-	mov.l  r12,  @-r4
+-	mov.l  r11,  @-r4
+-	mov.l  r10,  @-r4
+-	mov.l  r9,   @-r4
+-	mov.l  r8,   @-r4
+-	rts
+-	 mov  #0, r0
+--- a/src/setjmp/sh-nofpu/setjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-setjmp.s
+--- /dev/null
++++ b/src/setjmp/sh/longjmp.S
+@@ -0,0 +1,28 @@
++.global _longjmp
++.global longjmp
++.type   _longjmp, @function
++.type   longjmp,  @function
++_longjmp:
++longjmp:
++	mov.l  @r4+, r8
++	mov.l  @r4+, r9
++	mov.l  @r4+, r10
++	mov.l  @r4+, r11
++	mov.l  @r4+, r12
++	mov.l  @r4+, r13
++	mov.l  @r4+, r14
++	mov.l  @r4+, r15
++	lds.l  @r4+, pr
++#if __SH_FPU_ANY__ || __SH4__
++	fmov.s @r4+, fr12
++	fmov.s @r4+, fr13
++	fmov.s @r4+, fr14
++	fmov.s @r4+, fr15
++#endif
++
++	tst  r5, r5
++	movt r0
++	add  r5, r0
++
++	rts
++	 nop
+--- a/src/setjmp/sh/longjmp.s
++++ /dev/null
+@@ -1,26 +0,0 @@
+-.global _longjmp
+-.global longjmp
+-.type   _longjmp, @function
+-.type   longjmp,  @function
+-_longjmp:
+-longjmp:
+-	mov.l  @r4+, r8
+-	mov.l  @r4+, r9
+-	mov.l  @r4+, r10
+-	mov.l  @r4+, r11
+-	mov.l  @r4+, r12
+-	mov.l  @r4+, r13
+-	mov.l  @r4+, r14
+-	mov.l  @r4+, r15
+-	lds.l  @r4+, pr
+-	fmov.s @r4+, fr12
+-	fmov.s @r4+, fr13
+-	fmov.s @r4+, fr14
+-	fmov.s @r4+, fr15
+-
+-	tst  r5, r5
+-	movt r0
+-	add  r5, r0
+-
+-	rts
+-	 nop
+--- /dev/null
++++ b/src/setjmp/sh/setjmp.S
+@@ -0,0 +1,32 @@
++.global ___setjmp
++.hidden ___setjmp
++.global __setjmp
++.global _setjmp
++.global setjmp
++.type   __setjmp, @function
++.type   _setjmp,  @function
++.type   setjmp,   @function
++___setjmp:
++__setjmp:
++_setjmp:
++setjmp:
++#if __SH_FPU_ANY__ || __SH4__
++	add   #52, r4
++	fmov.s fr15, @-r4
++	fmov.s fr14, @-r4
++	fmov.s fr13, @-r4
++	fmov.s fr12, @-r4
++#else
++	add   #36, r4
++#endif
++	sts.l  pr,   @-r4
++	mov.l  r15,  @-r4
++	mov.l  r14,  @-r4
++	mov.l  r13,  @-r4
++	mov.l  r12,  @-r4
++	mov.l  r11,  @-r4
++	mov.l  r10,  @-r4
++	mov.l  r9,   @-r4
++	mov.l  r8,   @-r4
++	rts
++	 mov  #0, r0
+--- a/src/setjmp/sh/setjmp.s
++++ /dev/null
+@@ -1,28 +0,0 @@
+-.global ___setjmp
+-.hidden ___setjmp
+-.global __setjmp
+-.global _setjmp
+-.global setjmp
+-.type   __setjmp, @function
+-.type   _setjmp,  @function
+-.type   setjmp,   @function
+-___setjmp:
+-__setjmp:
+-_setjmp:
+-setjmp:
+-	add   #52, r4
+-	fmov.s fr15, @-r4
+-	fmov.s fr14, @-r4
+-	fmov.s fr13, @-r4
+-	fmov.s fr12, @-r4
+-	sts.l  pr,   @-r4
+-	mov.l  r15,  @-r4
+-	mov.l  r14,  @-r4
+-	mov.l  r13,  @-r4
+-	mov.l  r12,  @-r4
+-	mov.l  r11,  @-r4
+-	mov.l  r10,  @-r4
+-	mov.l  r9,   @-r4
+-	mov.l  r8,   @-r4
+-	rts
+-	 mov  #0, r0
+--- a/src/setjmp/sheb-nofpu/longjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../sh-nofpu/longjmp.s
+--- a/src/setjmp/sheb-nofpu/setjmp.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../sh-nofpu/setjmp.s
+--- a/src/signal/arm/restore.s
++++ b/src/signal/arm/restore.s
+@@ -1,3 +1,5 @@
++.syntax unified
++
+ .global __restore
+ .type __restore,%function
+ __restore:
+--- a/src/signal/arm/sigsetjmp.s
++++ b/src/signal/arm/sigsetjmp.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .global sigsetjmp
+ .global __sigsetjmp
+ .type sigsetjmp,%function
+--- a/src/signal/sigaction.c
++++ b/src/signal/sigaction.c
+@@ -17,10 +17,6 @@ void __get_handler_set(sigset_t *set)
+ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
+ {
+ 	struct k_sigaction ksa, ksa_old;
+-	if (sig >= (unsigned)_NSIG) {
+-		errno = EINVAL;
+-		return -1;
+-	}
+ 	if (sa) {
+ 		if ((uintptr_t)sa->sa_handler > 1UL) {
+ 			a_or_l(handler_set+(sig-1)/(8*sizeof(long)),
+@@ -57,7 +53,7 @@ int __libc_sigaction(int sig, const stru
+ 
+ int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
+ {
+-	if (sig-32U < 3) {
++	if (sig-32U < 3 || sig-1U >= _NSIG-1) {
+ 		errno = EINVAL;
+ 		return -1;
+ 	}
+--- a/src/signal/sigsetjmp_tail.c
++++ b/src/signal/sigsetjmp_tail.c
+@@ -2,9 +2,7 @@
+ #include <signal.h>
+ #include "syscall.h"
+ 
+-#ifdef SHARED
+ __attribute__((__visibility__("hidden")))
+-#endif
+ int __sigsetjmp_tail(sigjmp_buf jb, int ret)
+ {
+ 	void *p = jb->__ss;
+--- a/src/stdio/getdelim.c
++++ b/src/stdio/getdelim.c
+@@ -27,17 +27,18 @@ ssize_t getdelim(char **restrict s, size
+ 	for (;;) {
+ 		z = memchr(f->rpos, delim, f->rend - f->rpos);
+ 		k = z ? z - f->rpos + 1 : f->rend - f->rpos;
+-		if (i+k >= *n) {
++		if (i+k+1 >= *n) {
+ 			if (k >= SIZE_MAX/2-i) goto oom;
+-			*n = i+k+2;
+-			if (*n < SIZE_MAX/4) *n *= 2;
+-			tmp = realloc(*s, *n);
++			size_t m = i+k+2;
++			if (!z && m < SIZE_MAX/4) m += m/2;
++			tmp = realloc(*s, m);
+ 			if (!tmp) {
+-				*n = i+k+2;
+-				tmp = realloc(*s, *n);
++				m = i+k+2;
++				tmp = realloc(*s, m);
+ 				if (!tmp) goto oom;
+ 			}
+ 			*s = tmp;
++			*n = m;
+ 		}
+ 		memcpy(*s+i, f->rpos, k);
+ 		f->rpos += k;
+--- /dev/null
++++ b/src/string/arm/__aeabi_memclr.c
+@@ -0,0 +1,9 @@
++#include <string.h>
++#include "libc.h"
++
++void __aeabi_memclr(void *dest, size_t n)
++{
++	memset(dest, 0, n);
++}
++weak_alias(__aeabi_memclr, __aeabi_memclr4);
++weak_alias(__aeabi_memclr, __aeabi_memclr8);
+--- /dev/null
++++ b/src/string/arm/__aeabi_memcpy.c
+@@ -0,0 +1,9 @@
++#include <string.h>
++#include "libc.h"
++
++void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
++{
++	memcpy(dest, src, n);
++}
++weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
++weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
+--- /dev/null
++++ b/src/string/arm/__aeabi_memmove.c
+@@ -0,0 +1,9 @@
++#include <string.h>
++#include "libc.h"
++
++void __aeabi_memmove(void *dest, const void *src, size_t n)
++{
++	memmove(dest, src, n);
++}
++weak_alias(__aeabi_memmove, __aeabi_memmove4);
++weak_alias(__aeabi_memmove, __aeabi_memmove8);
+--- /dev/null
++++ b/src/string/arm/__aeabi_memset.c
+@@ -0,0 +1,9 @@
++#include <string.h>
++#include "libc.h"
++
++void __aeabi_memset(void *dest, size_t n, int c)
++{
++	memset(dest, c, n);
++}
++weak_alias(__aeabi_memset, __aeabi_memset4);
++weak_alias(__aeabi_memset, __aeabi_memset8);
+--- /dev/null
++++ b/src/string/arm/memcpy.c
+@@ -0,0 +1,3 @@
++#if __ARMEB__
++#include "../memcpy.c"
++#endif
+--- /dev/null
++++ b/src/string/arm/memcpy_le.S
+@@ -0,0 +1,383 @@
++#ifndef __ARMEB__
++
++/*
++ * Copyright (C) 2008 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *  * Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer.
++ *  * Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++
++
++/*
++ * Optimized memcpy() for ARM.
++ *
++ * note that memcpy() always returns the destination pointer,
++ * so we have to preserve R0.
++  */
++
++/*
++ * This file has been modified from the original for use in musl libc.
++ * The main changes are: addition of .type memcpy,%function to make the
++ * code safely callable from thumb mode, adjusting the return
++ * instructions to be compatible with pre-thumb ARM cpus, and removal
++ * of prefetch code that is not compatible with older cpus.
++ */
++
++.syntax unified
++
++.global memcpy
++.type memcpy,%function
++memcpy:
++	/* The stack must always be 64-bits aligned to be compliant with the
++	 * ARM ABI. Since we have to save R0, we might as well save R4
++	 * which we can use for better pipelining of the reads below
++	 */
++	.fnstart
++	.save       {r0, r4, lr}
++	stmfd       sp!, {r0, r4, lr}
++	/* Making room for r5-r11 which will be spilled later */
++	.pad        #28
++	sub         sp, sp, #28
++
++	/* it simplifies things to take care of len<4 early */
++	cmp     r2, #4
++	blo     copy_last_3_and_return
++
++	/* compute the offset to align the source
++	 * offset = (4-(src&3))&3 = -src & 3
++	 */
++	rsb     r3, r1, #0
++	ands    r3, r3, #3
++	beq     src_aligned
++
++	/* align source to 32 bits. We need to insert 2 instructions between
++	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
++	 * stall 2 cycles.
++	 */
++	movs    r12, r3, lsl #31
++	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
++	ldrbmi r3, [r1], #1
++	ldrbcs r4, [r1], #1
++	ldrbcs r12,[r1], #1
++	strbmi r3, [r0], #1
++	strbcs r4, [r0], #1
++	strbcs r12,[r0], #1
++
++src_aligned:
++
++	/* see if src and dst are aligned together (congruent) */
++	eor     r12, r0, r1
++	tst     r12, #3
++	bne     non_congruent
++
++	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
++	 * frame. Don't update sp.
++	 */
++	stmea   sp, {r5-r11}
++
++	/* align the destination to a cache-line */
++	rsb     r3, r0, #0
++	ands    r3, r3, #0x1C
++	beq     congruent_aligned32
++	cmp     r3, r2
++	andhi   r3, r2, #0x1C
++
++	/* conditionnaly copies 0 to 7 words (length in r3) */
++	movs    r12, r3, lsl #28
++	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
++	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
++	stmcs   r0!, {r4, r5, r6, r7}
++	stmmi   r0!, {r8, r9}
++	tst     r3, #0x4
++	ldrne   r10,[r1], #4                    /*  4 bytes */
++	strne   r10,[r0], #4
++	sub     r2, r2, r3
++
++congruent_aligned32:
++	/*
++	 * here source is aligned to 32 bytes.
++	 */
++
++cached_aligned32:
++	subs    r2, r2, #32
++	blo     less_than_32_left
++
++	/*
++	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
++	 * stall only until the requested world is fetched, but the linefill
++	 * continues in the the background.
++	 * While the linefill is going, we write our previous cache-line
++	 * into the write-buffer (which should have some free space).
++	 * When the linefill is done, the writebuffer will
++	 * start dumping its content into memory
++	 *
++	 * While all this is going, we then load a full cache line into
++	 * 8 registers, this cache line should be in the cache by now
++	 * (or partly in the cache).
++	 *
++	 * This code should work well regardless of the source/dest alignment.
++	 *
++	 */
++
++	/* Align the preload register to a cache-line because the cpu does
++	 * "critical word first" (the first word requested is loaded first).
++	 */
++	@ bic           r12, r1, #0x1F
++	@ add           r12, r12, #64
++
++1:      ldmia   r1!, { r4-r11 }
++	subs    r2, r2, #32
++
++	/* 
++	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
++	 * for ARM9 preload will not be safely guarded by the preceding subs.
++	 * When it is safely guarded the only possibility to have SIGSEGV here
++	 * is because the caller overstates the length.
++	 */
++	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
++	stmia   r0!, { r4-r11 }
++	bhs     1b
++
++	add     r2, r2, #32
++
++less_than_32_left:
++	/*
++	 * less than 32 bytes left at this point (length in r2)
++	 */
++
++	/* skip all this if there is nothing to do, which should
++	 * be a common case (if not executed the code below takes
++	 * about 16 cycles)
++	 */
++	tst     r2, #0x1F
++	beq     1f
++
++	/* conditionnaly copies 0 to 31 bytes */
++	movs    r12, r2, lsl #28
++	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
++	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
++	stmcs   r0!, {r4, r5, r6, r7}
++	stmmi   r0!, {r8, r9}
++	movs    r12, r2, lsl #30
++	ldrcs   r3, [r1], #4                    /*  4 bytes */
++	ldrhmi r4, [r1], #2                     /*  2 bytes */
++	strcs   r3, [r0], #4
++	strhmi r4, [r0], #2
++	tst     r2, #0x1
++	ldrbne r3, [r1]                         /*  last byte  */
++	strbne r3, [r0]
++
++	/* we're done! restore everything and return */
++1:      ldmfd   sp!, {r5-r11}
++	ldmfd   sp!, {r0, r4, lr}
++	bx      lr
++
++	/********************************************************************/
++
++non_congruent:
++	/*
++	 * here source is aligned to 4 bytes
++	 * but destination is not.
++	 *
++	 * in the code below r2 is the number of bytes read
++	 * (the number of bytes written is always smaller, because we have
++	 * partial words in the shift queue)
++	 */
++	cmp     r2, #4
++	blo     copy_last_3_and_return
++
++	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
++	 * frame. Don't update sp.
++	 */
++	stmea   sp, {r5-r11}
++
++	/* compute shifts needed to align src to dest */
++	rsb     r5, r0, #0
++	and     r5, r5, #3                      /* r5 = # bytes in partial words */
++	mov     r12, r5, lsl #3         /* r12 = right */
++	rsb     lr, r12, #32            /* lr = left  */
++
++	/* read the first word */
++	ldr     r3, [r1], #4
++	sub     r2, r2, #4
++
++	/* write a partial word (0 to 3 bytes), such that destination
++	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
++	 */
++	movs    r5, r5, lsl #31
++	strbmi r3, [r0], #1
++	movmi   r3, r3, lsr #8
++	strbcs r3, [r0], #1
++	movcs   r3, r3, lsr #8
++	strbcs r3, [r0], #1
++	movcs   r3, r3, lsr #8
++
++	cmp     r2, #4
++	blo     partial_word_tail
++
++	/* Align destination to 32 bytes (cache line boundary) */
++1:      tst     r0, #0x1c
++	beq     2f
++	ldr     r5, [r1], #4
++	sub     r2, r2, #4
++	orr     r4, r3, r5,             lsl lr
++	mov     r3, r5,                 lsr r12
++	str     r4, [r0], #4
++	cmp     r2, #4
++	bhs     1b
++	blo     partial_word_tail
++
++	/* copy 32 bytes at a time */
++2:      subs    r2, r2, #32
++	blo     less_than_thirtytwo
++
++	/* Use immediate mode for the shifts, because there is an extra cycle
++	 * for register shifts, which could account for up to 50% of
++	 * performance hit.
++	 */
++
++	cmp     r12, #24
++	beq     loop24
++	cmp     r12, #8
++	beq     loop8
++
++loop16:
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #16
++	mov     r4, r4, lsr #16
++	orr     r4, r4, r5, lsl #16
++	mov     r5, r5, lsr #16
++	orr     r5, r5, r6, lsl #16
++	mov     r6, r6, lsr #16
++	orr     r6, r6, r7, lsl #16
++	mov     r7, r7, lsr #16
++	orr     r7, r7, r8, lsl #16
++	mov     r8, r8, lsr #16
++	orr     r8, r8, r9, lsl #16
++	mov     r9, r9, lsr #16
++	orr     r9, r9, r10, lsl #16
++	mov     r10, r10,               lsr #16
++	orr     r10, r10, r11, lsl #16
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #16
++	bhs     1b
++	b       less_than_thirtytwo
++
++loop8:
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #24
++	mov     r4, r4, lsr #8
++	orr     r4, r4, r5, lsl #24
++	mov     r5, r5, lsr #8
++	orr     r5, r5, r6, lsl #24
++	mov     r6, r6,  lsr #8
++	orr     r6, r6, r7, lsl #24
++	mov     r7, r7,  lsr #8
++	orr     r7, r7, r8,             lsl #24
++	mov     r8, r8,  lsr #8
++	orr     r8, r8, r9,             lsl #24
++	mov     r9, r9,  lsr #8
++	orr     r9, r9, r10,    lsl #24
++	mov     r10, r10, lsr #8
++	orr     r10, r10, r11,  lsl #24
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #8
++	bhs     1b
++	b       less_than_thirtytwo
++
++loop24:
++	ldr     r12, [r1], #4
++1:      mov     r4, r12
++	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
++	subs    r2, r2, #32
++	ldrhs   r12, [r1], #4
++	orr     r3, r3, r4, lsl #8
++	mov     r4, r4, lsr #24
++	orr     r4, r4, r5, lsl #8
++	mov     r5, r5, lsr #24
++	orr     r5, r5, r6, lsl #8
++	mov     r6, r6, lsr #24
++	orr     r6, r6, r7, lsl #8
++	mov     r7, r7, lsr #24
++	orr     r7, r7, r8, lsl #8
++	mov     r8, r8, lsr #24
++	orr     r8, r8, r9, lsl #8
++	mov     r9, r9, lsr #24
++	orr     r9, r9, r10, lsl #8
++	mov     r10, r10, lsr #24
++	orr     r10, r10, r11, lsl #8
++	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
++	mov     r3, r11, lsr #24
++	bhs     1b
++
++less_than_thirtytwo:
++	/* copy the last 0 to 31 bytes of the source */
++	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
++	add     r2, r2, #32
++	cmp     r2, #4
++	blo     partial_word_tail
++
++1:      ldr     r5, [r1], #4
++	sub     r2, r2, #4
++	orr     r4, r3, r5,             lsl lr
++	mov     r3,     r5,                     lsr r12
++	str     r4, [r0], #4
++	cmp     r2, #4
++	bhs     1b
++
++partial_word_tail:
++	/* we have a partial word in the input buffer */
++	movs    r5, lr, lsl #(31-3)
++	strbmi r3, [r0], #1
++	movmi   r3, r3, lsr #8
++	strbcs r3, [r0], #1
++	movcs   r3, r3, lsr #8
++	strbcs r3, [r0], #1
++
++	/* Refill spilled registers from the stack. Don't update sp. */
++	ldmfd   sp, {r5-r11}
++
++copy_last_3_and_return:
++	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
++	ldrbmi r2, [r1], #1
++	ldrbcs r3, [r1], #1
++	ldrbcs r12,[r1]
++	strbmi r2, [r0], #1
++	strbcs r3, [r0], #1
++	strbcs r12,[r0]
++
++	/* we're done! restore sp and spilled registers and return */
++	add     sp,  sp, #28
++	ldmfd   sp!, {r0, r4, lr}
++	bx      lr
++
++#endif
+--- a/src/string/armel/memcpy.s
++++ /dev/null
+@@ -1,381 +0,0 @@
+-/*
+- * Copyright (C) 2008 The Android Open Source Project
+- * All rights reserved.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- *  * Redistributions of source code must retain the above copyright
+- *    notice, this list of conditions and the following disclaimer.
+- *  * Redistributions in binary form must reproduce the above copyright
+- *    notice, this list of conditions and the following disclaimer in
+- *    the documentation and/or other materials provided with the
+- *    distribution.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+- */
+-
+-
+-/*
+- * Optimized memcpy() for ARM.
+- *
+- * note that memcpy() always returns the destination pointer,
+- * so we have to preserve R0.
+-  */
+-
+-/*
+- * This file has been modified from the original for use in musl libc.
+- * The main changes are: addition of .type memcpy,%function to make the
+- * code safely callable from thumb mode, adjusting the return
+- * instructions to be compatible with pre-thumb ARM cpus, and removal
+- * of prefetch code that is not compatible with older cpus.
+- */
+-
+-.global memcpy
+-.type memcpy,%function
+-memcpy:
+-	/* The stack must always be 64-bits aligned to be compliant with the
+-	 * ARM ABI. Since we have to save R0, we might as well save R4
+-	 * which we can use for better pipelining of the reads below
+-	 */
+-	.fnstart
+-	.save       {r0, r4, lr}
+-	stmfd       sp!, {r0, r4, lr}
+-	/* Making room for r5-r11 which will be spilled later */
+-	.pad        #28
+-	sub         sp, sp, #28
+-
+-	/* it simplifies things to take care of len<4 early */
+-	cmp     r2, #4
+-	blo     copy_last_3_and_return
+-
+-	/* compute the offset to align the source
+-	 * offset = (4-(src&3))&3 = -src & 3
+-	 */
+-	rsb     r3, r1, #0
+-	ands    r3, r3, #3
+-	beq     src_aligned
+-
+-	/* align source to 32 bits. We need to insert 2 instructions between
+-	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
+-	 * stall 2 cycles.
+-	 */
+-	movs    r12, r3, lsl #31
+-	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
+-	.word 0x44d13001 /* ldrbmi r3, [r1], #1 */
+-	.word 0x24d14001 /* ldrbcs r4, [r1], #1 */
+-	.word 0x24d1c001 /* ldrbcs r12,[r1], #1 */
+-	.word 0x44c03001 /* strbmi r3, [r0], #1 */
+-	.word 0x24c04001 /* strbcs r4, [r0], #1 */
+-	.word 0x24c0c001 /* strbcs r12,[r0], #1 */
+-
+-src_aligned:
+-
+-	/* see if src and dst are aligned together (congruent) */
+-	eor     r12, r0, r1
+-	tst     r12, #3
+-	bne     non_congruent
+-
+-	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+-	 * frame. Don't update sp.
+-	 */
+-	stmea   sp, {r5-r11}
+-
+-	/* align the destination to a cache-line */
+-	rsb     r3, r0, #0
+-	ands    r3, r3, #0x1C
+-	beq     congruent_aligned32
+-	cmp     r3, r2
+-	andhi   r3, r2, #0x1C
+-
+-	/* conditionnaly copies 0 to 7 words (length in r3) */
+-	movs    r12, r3, lsl #28
+-	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
+-	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
+-	stmcs   r0!, {r4, r5, r6, r7}
+-	stmmi   r0!, {r8, r9}
+-	tst     r3, #0x4
+-	ldrne   r10,[r1], #4                    /*  4 bytes */
+-	strne   r10,[r0], #4
+-	sub     r2, r2, r3
+-
+-congruent_aligned32:
+-	/*
+-	 * here source is aligned to 32 bytes.
+-	 */
+-
+-cached_aligned32:
+-	subs    r2, r2, #32
+-	blo     less_than_32_left
+-
+-	/*
+-	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
+-	 * stall only until the requested world is fetched, but the linefill
+-	 * continues in the the background.
+-	 * While the linefill is going, we write our previous cache-line
+-	 * into the write-buffer (which should have some free space).
+-	 * When the linefill is done, the writebuffer will
+-	 * start dumping its content into memory
+-	 *
+-	 * While all this is going, we then load a full cache line into
+-	 * 8 registers, this cache line should be in the cache by now
+-	 * (or partly in the cache).
+-	 *
+-	 * This code should work well regardless of the source/dest alignment.
+-	 *
+-	 */
+-
+-	/* Align the preload register to a cache-line because the cpu does
+-	 * "critical word first" (the first word requested is loaded first).
+-	 */
+-	@ bic           r12, r1, #0x1F
+-	@ add           r12, r12, #64
+-
+-1:      ldmia   r1!, { r4-r11 }
+-	subs    r2, r2, #32
+-
+-	/* 
+-	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
+-	 * for ARM9 preload will not be safely guarded by the preceding subs.
+-	 * When it is safely guarded the only possibility to have SIGSEGV here
+-	 * is because the caller overstates the length.
+-	 */
+-	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
+-	stmia   r0!, { r4-r11 }
+-	bhs     1b
+-
+-	add     r2, r2, #32
+-
+-less_than_32_left:
+-	/*
+-	 * less than 32 bytes left at this point (length in r2)
+-	 */
+-
+-	/* skip all this if there is nothing to do, which should
+-	 * be a common case (if not executed the code below takes
+-	 * about 16 cycles)
+-	 */
+-	tst     r2, #0x1F
+-	beq     1f
+-
+-	/* conditionnaly copies 0 to 31 bytes */
+-	movs    r12, r2, lsl #28
+-	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
+-	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
+-	stmcs   r0!, {r4, r5, r6, r7}
+-	stmmi   r0!, {r8, r9}
+-	movs    r12, r2, lsl #30
+-	ldrcs   r3, [r1], #4                    /*  4 bytes */
+-	.word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /*  2 bytes */
+-	strcs   r3, [r0], #4
+-	.word 0x40c040b2 /* strhmi r4, [r0], #2 */
+-	tst     r2, #0x1
+-	.word 0x15d13000 /* ldrbne r3, [r1] */  /*  last byte  */
+-	.word 0x15c03000 /* strbne r3, [r0] */
+-
+-	/* we're done! restore everything and return */
+-1:      ldmfd   sp!, {r5-r11}
+-	ldmfd   sp!, {r0, r4, lr}
+-	tst     lr, #1
+-	moveq   pc, lr
+-	bx      lr
+-
+-	/********************************************************************/
+-
+-non_congruent:
+-	/*
+-	 * here source is aligned to 4 bytes
+-	 * but destination is not.
+-	 *
+-	 * in the code below r2 is the number of bytes read
+-	 * (the number of bytes written is always smaller, because we have
+-	 * partial words in the shift queue)
+-	 */
+-	cmp     r2, #4
+-	blo     copy_last_3_and_return
+-
+-	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+-	 * frame. Don't update sp.
+-	 */
+-	stmea   sp, {r5-r11}
+-
+-	/* compute shifts needed to align src to dest */
+-	rsb     r5, r0, #0
+-	and     r5, r5, #3                      /* r5 = # bytes in partial words */
+-	mov     r12, r5, lsl #3         /* r12 = right */
+-	rsb     lr, r12, #32            /* lr = left  */
+-
+-	/* read the first word */
+-	ldr     r3, [r1], #4
+-	sub     r2, r2, #4
+-
+-	/* write a partial word (0 to 3 bytes), such that destination
+-	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
+-	 */
+-	movs    r5, r5, lsl #31
+-	.word 0x44c03001 /* strbmi r3, [r0], #1 */
+-	movmi   r3, r3, lsr #8
+-	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+-	movcs   r3, r3, lsr #8
+-	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+-	movcs   r3, r3, lsr #8
+-
+-	cmp     r2, #4
+-	blo     partial_word_tail
+-
+-	/* Align destination to 32 bytes (cache line boundary) */
+-1:      tst     r0, #0x1c
+-	beq     2f
+-	ldr     r5, [r1], #4
+-	sub     r2, r2, #4
+-	orr     r4, r3, r5,             lsl lr
+-	mov     r3, r5,                 lsr r12
+-	str     r4, [r0], #4
+-	cmp     r2, #4
+-	bhs     1b
+-	blo     partial_word_tail
+-
+-	/* copy 32 bytes at a time */
+-2:      subs    r2, r2, #32
+-	blo     less_than_thirtytwo
+-
+-	/* Use immediate mode for the shifts, because there is an extra cycle
+-	 * for register shifts, which could account for up to 50% of
+-	 * performance hit.
+-	 */
+-
+-	cmp     r12, #24
+-	beq     loop24
+-	cmp     r12, #8
+-	beq     loop8
+-
+-loop16:
+-	ldr     r12, [r1], #4
+-1:      mov     r4, r12
+-	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-	subs    r2, r2, #32
+-	ldrhs   r12, [r1], #4
+-	orr     r3, r3, r4, lsl #16
+-	mov     r4, r4, lsr #16
+-	orr     r4, r4, r5, lsl #16
+-	mov     r5, r5, lsr #16
+-	orr     r5, r5, r6, lsl #16
+-	mov     r6, r6, lsr #16
+-	orr     r6, r6, r7, lsl #16
+-	mov     r7, r7, lsr #16
+-	orr     r7, r7, r8, lsl #16
+-	mov     r8, r8, lsr #16
+-	orr     r8, r8, r9, lsl #16
+-	mov     r9, r9, lsr #16
+-	orr     r9, r9, r10, lsl #16
+-	mov     r10, r10,               lsr #16
+-	orr     r10, r10, r11, lsl #16
+-	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-	mov     r3, r11, lsr #16
+-	bhs     1b
+-	b       less_than_thirtytwo
+-
+-loop8:
+-	ldr     r12, [r1], #4
+-1:      mov     r4, r12
+-	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-	subs    r2, r2, #32
+-	ldrhs   r12, [r1], #4
+-	orr     r3, r3, r4, lsl #24
+-	mov     r4, r4, lsr #8
+-	orr     r4, r4, r5, lsl #24
+-	mov     r5, r5, lsr #8
+-	orr     r5, r5, r6, lsl #24
+-	mov     r6, r6,  lsr #8
+-	orr     r6, r6, r7, lsl #24
+-	mov     r7, r7,  lsr #8
+-	orr     r7, r7, r8,             lsl #24
+-	mov     r8, r8,  lsr #8
+-	orr     r8, r8, r9,             lsl #24
+-	mov     r9, r9,  lsr #8
+-	orr     r9, r9, r10,    lsl #24
+-	mov     r10, r10, lsr #8
+-	orr     r10, r10, r11,  lsl #24
+-	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-	mov     r3, r11, lsr #8
+-	bhs     1b
+-	b       less_than_thirtytwo
+-
+-loop24:
+-	ldr     r12, [r1], #4
+-1:      mov     r4, r12
+-	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+-	subs    r2, r2, #32
+-	ldrhs   r12, [r1], #4
+-	orr     r3, r3, r4, lsl #8
+-	mov     r4, r4, lsr #24
+-	orr     r4, r4, r5, lsl #8
+-	mov     r5, r5, lsr #24
+-	orr     r5, r5, r6, lsl #8
+-	mov     r6, r6, lsr #24
+-	orr     r6, r6, r7, lsl #8
+-	mov     r7, r7, lsr #24
+-	orr     r7, r7, r8, lsl #8
+-	mov     r8, r8, lsr #24
+-	orr     r8, r8, r9, lsl #8
+-	mov     r9, r9, lsr #24
+-	orr     r9, r9, r10, lsl #8
+-	mov     r10, r10, lsr #24
+-	orr     r10, r10, r11, lsl #8
+-	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+-	mov     r3, r11, lsr #24
+-	bhs     1b
+-
+-less_than_thirtytwo:
+-	/* copy the last 0 to 31 bytes of the source */
+-	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
+-	add     r2, r2, #32
+-	cmp     r2, #4
+-	blo     partial_word_tail
+-
+-1:      ldr     r5, [r1], #4
+-	sub     r2, r2, #4
+-	orr     r4, r3, r5,             lsl lr
+-	mov     r3,     r5,                     lsr r12
+-	str     r4, [r0], #4
+-	cmp     r2, #4
+-	bhs     1b
+-
+-partial_word_tail:
+-	/* we have a partial word in the input buffer */
+-	movs    r5, lr, lsl #(31-3)
+-	.word 0x44c03001 /* strbmi r3, [r0], #1 */
+-	movmi   r3, r3, lsr #8
+-	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+-	movcs   r3, r3, lsr #8
+-	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+-
+-	/* Refill spilled registers from the stack. Don't update sp. */
+-	ldmfd   sp, {r5-r11}
+-
+-copy_last_3_and_return:
+-	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
+-	.word 0x44d12001 /* ldrbmi r2, [r1], #1 */
+-	.word 0x24d13001 /* ldrbcs r3, [r1], #1 */
+-	.word 0x25d1c000 /* ldrbcs r12,[r1] */
+-	.word 0x44c02001 /* strbmi r2, [r0], #1 */
+-	.word 0x24c03001 /* strbcs r3, [r0], #1 */
+-	.word 0x25c0c000 /* strbcs r12,[r0] */
+-
+-	/* we're done! restore sp and spilled registers and return */
+-	add     sp,  sp, #28
+-	ldmfd   sp!, {r0, r4, lr}
+-	tst     lr, #1
+-	moveq   pc, lr
+-	bx      lr
+--- a/src/string/armel/memcpy.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-memcpy.s
+--- a/src/string/armhf/memcpy.sub
++++ /dev/null
+@@ -1 +0,0 @@
+-../armel/memcpy.s
+--- a/src/thread/__syscall_cp.c
++++ b/src/thread/__syscall_cp.c
+@@ -1,9 +1,7 @@
+ #include "pthread_impl.h"
+ #include "syscall.h"
+ 
+-#ifdef SHARED
+ __attribute__((__visibility__("hidden")))
+-#endif
+ long __syscall_cp_c();
+ 
+ static long sccp(syscall_arg_t nr,
+--- a/src/thread/__tls_get_addr.c
++++ b/src/thread/__tls_get_addr.c
+@@ -1,16 +1,16 @@
+ #include <stddef.h>
+ #include "pthread_impl.h"
++#include "libc.h"
++
++__attribute__((__visibility__("hidden")))
++void *__tls_get_new(size_t *);
+ 
+ void *__tls_get_addr(size_t *v)
+ {
+ 	pthread_t self = __pthread_self();
+-#ifdef SHARED
+-	__attribute__((__visibility__("hidden")))
+-	void *__tls_get_new(size_t *);
+ 	if (v[0]<=(size_t)self->dtv[0])
+ 		return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
+ 	return __tls_get_new(v);
+-#else
+-	return (char *)self->dtv[1]+v[1]+DTP_OFFSET;
+-#endif
+ }
++
++weak_alias(__tls_get_addr, __tls_get_new);
+--- a/src/thread/aarch64/syscall_cp.s
++++ b/src/thread/aarch64/syscall_cp.s
+@@ -17,7 +17,7 @@
+ __syscall_cp_asm:
+ __cp_begin:
+ 	ldr w0,[x0]
+-	cbnz w0,1f
++	cbnz w0,__cp_cancel
+ 	mov x8,x1
+ 	mov x0,x2
+ 	mov x1,x3
+@@ -28,6 +28,5 @@ __cp_begin:
+ 	svc 0
+ __cp_end:
+ 	ret
+-
+-	// cbnz might not be able to jump far enough
+-1:	b __cancel
++__cp_cancel:
++	b __cancel
+--- /dev/null
++++ b/src/thread/arm/__set_thread_area.c
+@@ -0,0 +1,49 @@
++#include <stdint.h>
++#include <elf.h>
++#include "pthread_impl.h"
++#include "libc.h"
++
++#define HWCAP_TLS (1 << 15)
++
++extern const unsigned char __attribute__((__visibility__("hidden")))
++	__a_barrier_dummy[], __a_barrier_oldkuser[],
++	__a_barrier_v6[], __a_barrier_v7[],
++	__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
++	__a_gettp_dummy[];
++
++#define __a_barrier_kuser 0xffff0fa0
++#define __a_cas_kuser 0xffff0fc0
++#define __a_gettp_kuser 0xffff0fe0
++
++extern uintptr_t __attribute__((__visibility__("hidden")))
++	__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
++
++#define SET(op,ver) (__a_##op##_ptr = \
++	(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
++
++int __set_thread_area(void *p)
++{
++#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
++	if (__hwcap & HWCAP_TLS) {
++		size_t *aux;
++		SET(cas, v7);
++		SET(barrier, v7);
++		for (aux=libc.auxv; *aux; aux+=2) {
++			if (*aux != AT_PLATFORM) continue;
++			const char *s = (void *)aux[1];
++			if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
++			SET(cas, v6);
++			SET(barrier, v6);
++			break;
++		}
++	} else {
++		int ver = *(int *)0xffff0ffc;
++		SET(gettp, kuser);
++		SET(cas, kuser);
++		SET(barrier, kuser);
++		if (ver < 2) a_crash();
++		if (ver < 3) SET(barrier, oldkuser);
++	}
++#endif
++	return __syscall(0xf0005, p);
++}
+--- a/src/thread/arm/__set_thread_area.s
++++ /dev/null
+@@ -1 +0,0 @@
+-/* Replaced by C code in arch/arm/src */
+--- a/src/thread/arm/__unmapself.s
++++ b/src/thread/arm/__unmapself.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .text
+ .global __unmapself
+ .type   __unmapself,%function
+--- /dev/null
++++ b/src/thread/arm/atomics.s
+@@ -0,0 +1,111 @@
++.syntax unified
++.text
++
++.global __a_barrier
++.hidden __a_barrier
++.type __a_barrier,%function
++__a_barrier:
++	ldr ip,1f
++	ldr ip,[pc,ip]
++	add pc,pc,ip
++1:	.word __a_barrier_ptr-1b
++.global __a_barrier_dummy
++.hidden __a_barrier_dummy
++__a_barrier_dummy:
++	bx lr
++.global __a_barrier_oldkuser
++.hidden __a_barrier_oldkuser
++__a_barrier_oldkuser:
++	push {r0,r1,r2,r3,ip,lr}
++	mov r1,r0
++	mov r2,sp
++	ldr ip,=0xffff0fc0
++	mov lr,pc
++	mov pc,ip
++	pop {r0,r1,r2,r3,ip,lr}
++	bx lr
++.global __a_barrier_v6
++.hidden __a_barrier_v6
++__a_barrier_v6:
++	mcr p15,0,r0,c7,c10,5
++	bx lr
++.global __a_barrier_v7
++.hidden __a_barrier_v7
++__a_barrier_v7:
++	.word 0xf57ff05b        /* dmb ish */
++	bx lr
++
++.global __a_cas
++.hidden __a_cas
++.type __a_cas,%function
++__a_cas:
++	ldr ip,1f
++	ldr ip,[pc,ip]
++	add pc,pc,ip
++1:	.word __a_cas_ptr-1b
++.global __a_cas_dummy
++.hidden __a_cas_dummy
++__a_cas_dummy:
++	mov r3,r0
++	ldr r0,[r2]
++	subs r0,r3,r0
++	streq r1,[r2]
++	bx lr
++.global __a_cas_v6
++.hidden __a_cas_v6
++__a_cas_v6:
++	mov r3,r0
++	mcr p15,0,r0,c7,c10,5
++1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
++	subs r0,r3,r0
++	.word 0x01820f91        /* strexeq r0,r1,[r2] */
++	teqeq r0,#1
++	beq 1b
++	mcr p15,0,r0,c7,c10,5
++	bx lr
++.global __a_cas_v7
++.hidden __a_cas_v7
++__a_cas_v7:
++	mov r3,r0
++	.word 0xf57ff05b        /* dmb ish */
++1:	.word 0xe1920f9f        /* ldrex r0,[r2] */
++	subs r0,r3,r0
++	.word 0x01820f91        /* strexeq r0,r1,[r2] */
++	teqeq r0,#1
++	beq 1b
++	.word 0xf57ff05b        /* dmb ish */
++	bx lr
++
++.global __aeabi_read_tp
++.type __aeabi_read_tp,%function
++__aeabi_read_tp:
++
++.global __a_gettp
++.hidden __a_gettp
++.type __a_gettp,%function
++__a_gettp:
++	ldr r0,1f
++	ldr r0,[pc,r0]
++	add pc,pc,r0
++1:	.word __a_gettp_ptr-1b
++.global __a_gettp_dummy
++.hidden __a_gettp_dummy
++__a_gettp_dummy:
++	mrc p15,0,r0,c13,c0,3
++	bx lr
++
++.data
++.global __a_barrier_ptr
++.hidden __a_barrier_ptr
++__a_barrier_ptr:
++	.word 0
++
++.global __a_cas_ptr
++.hidden __a_cas_ptr
++__a_cas_ptr:
++	.word 0
++
++.global __a_gettp_ptr
++.hidden __a_gettp_ptr
++__a_gettp_ptr:
++	.word 0
+--- a/src/thread/arm/clone.s
++++ b/src/thread/arm/clone.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .text
+ .global __clone
+ .type   __clone,%function
+@@ -15,8 +16,6 @@ __clone:
+ 	tst r0,r0
+ 	beq 1f
+ 	ldmfd sp!,{r4,r5,r6,r7}
+-	tst lr,#1
+-	moveq pc,lr
+ 	bx lr
+ 
+ 1:	mov r0,r6
+--- a/src/thread/arm/syscall_cp.s
++++ b/src/thread/arm/syscall_cp.s
+@@ -1,3 +1,4 @@
++.syntax unified
+ .global __cp_begin
+ .hidden __cp_begin
+ .global __cp_end
+@@ -22,8 +23,6 @@ __cp_begin:
+ 	svc 0
+ __cp_end:
+ 	ldmfd sp!,{r4,r5,r6,r7,lr}
+-	tst lr,#1
+-	moveq pc,lr
+ 	bx lr
+ __cp_cancel:
+ 	ldmfd sp!,{r4,r5,r6,r7,lr}
+--- a/src/thread/microblaze/syscall_cp.s
++++ b/src/thread/microblaze/syscall_cp.s
+@@ -11,7 +11,7 @@
+ __syscall_cp_asm:
+ __cp_begin:
+ 	lwi     r5, r5, 0
+-	bnei    r5, __cancel
++	bnei    r5, __cp_cancel
+ 	addi    r12, r6, 0
+ 	add     r5, r7, r0
+ 	add     r6, r8, r0
+@@ -23,3 +23,5 @@ __cp_begin:
+ __cp_end:
+ 	rtsd    r15, 8
+ 	nop
++__cp_cancel:
++	bri     __cancel
+--- a/src/thread/or1k/syscall_cp.s
++++ b/src/thread/or1k/syscall_cp.s
+@@ -12,7 +12,7 @@ __syscall_cp_asm:
+ __cp_begin:
+ 	l.lwz	r3, 0(r3)
+ 	l.sfeqi	r3, 0
+-	l.bnf	__cancel
++	l.bnf	__cp_cancel
+ 	 l.ori	r11, r4, 0
+ 	l.ori	r3, r5, 0
+ 	l.ori	r4, r6, 0
+@@ -24,3 +24,6 @@ __cp_begin:
+ __cp_end:
+ 	l.jr	r9
+ 	 l.nop
++__cp_cancel:
++	l.j	__cancel
++	 l.nop
+--- a/src/thread/powerpc/syscall_cp.s
++++ b/src/thread/powerpc/syscall_cp.s
+@@ -38,7 +38,7 @@ __cp_begin:
+ 	cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7. 
+ 	beq+ cr7, 1f #jump to label 1 if r0 was 0
+ 	
+-	b __cancel #else call cancel 
++	b __cp_cancel #else call cancel
+ 1:
+ 	#ok, the cancel flag was not set
+ 	# syscall: number goes to r0, the rest 3-8
+@@ -55,3 +55,5 @@ __cp_end:
+ 	#else negate result.
+ 	neg 3, 3
+ 	blr
++__cp_cancel:
++	b __cancel
+--- a/src/thread/pthread_cancel.c
++++ b/src/thread/pthread_cancel.c
+@@ -1,12 +1,11 @@
++#define _GNU_SOURCE
+ #include <string.h>
+ #include "pthread_impl.h"
+ #include "syscall.h"
+ #include "libc.h"
+ 
+-#ifdef SHARED
+ __attribute__((__visibility__("hidden")))
+-#endif
+-long __cancel(), __cp_cancel(), __syscall_cp_asm(), __syscall_cp_c();
++long __cancel(), __syscall_cp_asm(), __syscall_cp_c();
+ 
+ long __cancel()
+ {
+@@ -17,12 +16,6 @@ long __cancel()
+ 	return -ECANCELED;
+ }
+ 
+-/* If __syscall_cp_asm has adjusted the stack pointer, it must provide a
+- * definition of __cp_cancel to undo those adjustments and call __cancel.
+- * Otherwise, __cancel provides a definition for __cp_cancel. */
+-
+-weak_alias(__cancel, __cp_cancel);
+-
+ long __syscall_cp_asm(volatile void *, syscall_arg_t,
+                       syscall_arg_t, syscall_arg_t, syscall_arg_t,
+                       syscall_arg_t, syscall_arg_t, syscall_arg_t);
+@@ -52,24 +45,22 @@ static void _sigaddset(sigset_t *set, in
+ 	set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1);
+ }
+ 
+-#ifdef SHARED
+ __attribute__((__visibility__("hidden")))
+-#endif
+-extern const char __cp_begin[1], __cp_end[1];
++extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1];
+ 
+ static void cancel_handler(int sig, siginfo_t *si, void *ctx)
+ {
+ 	pthread_t self = __pthread_self();
+ 	ucontext_t *uc = ctx;
+-	const char *ip = ((char **)&uc->uc_mcontext)[CANCEL_REG_IP];
++	uintptr_t pc = uc->uc_mcontext.MC_PC;
+ 
+ 	a_barrier();
+ 	if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return;
+ 
+ 	_sigaddset(&uc->uc_sigmask, SIGCANCEL);
+ 
+-	if (self->cancelasync || ip >= __cp_begin && ip < __cp_end) {
+-		((char **)&uc->uc_mcontext)[CANCEL_REG_IP] = (char *)__cp_cancel;
++	if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
++		uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel;
+ 		return;
+ 	}
+ 
+--- /dev/null
++++ b/src/thread/sh/__set_thread_area.c
+@@ -0,0 +1,40 @@
++#include "pthread_impl.h"
++#include "libc.h"
++#include <elf.h>
++
++/* Also perform sh-specific init */
++
++#define CPU_HAS_LLSC 0x0040
++#define CPU_HAS_CAS_L 0x0400
++
++__attribute__((__visibility__("hidden")))
++extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[];
++
++__attribute__((__visibility__("hidden")))
++const void *__sh_cas_ptr;
++
++__attribute__((__visibility__("hidden")))
++unsigned __sh_nommu;
++
++int __set_thread_area(void *p)
++{
++	size_t *aux;
++	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
++#ifndef __SH4A__
++	__sh_cas_ptr = __sh_cas_gusa;
++#if !defined(__SH3__) && !defined(__SH4__)
++	for (aux=libc.auxv; *aux; aux+=2) {
++		if (*aux != AT_PLATFORM) continue;
++		const char *s = (void *)aux[1];
++		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
++		__sh_cas_ptr = __sh_cas_imask;
++		__sh_nommu = 1;
++	}
++#endif
++	if (__hwcap & CPU_HAS_CAS_L)
++		__sh_cas_ptr = __sh_cas_cas_l;
++	else if (__hwcap & CPU_HAS_LLSC)
++		__sh_cas_ptr = __sh_cas_llsc;
++#endif
++	return 0;
++}
+--- /dev/null
++++ b/src/thread/sh/__unmapself.c
+@@ -0,0 +1,24 @@
++#include "pthread_impl.h"
++
++void __unmapself_sh_mmu(void *, size_t);
++void __unmapself_sh_nommu(void *, size_t);
++
++#if !defined(__SH3__) && !defined(__SH4__)
++#define __unmapself __unmapself_sh_nommu
++#include "dynlink.h"
++#undef CRTJMP
++#define CRTJMP(pc,sp) __asm__ __volatile__( \
++	"mov.l @%0+,r0 ; mov.l @%0,r12 ; jmp @r0 ; mov %1,r15" \
++	: : "r"(pc), "r"(sp) : "r0", "memory" )
++#include "../__unmapself.c"
++#undef __unmapself
++extern __attribute__((__visibility__("hidden"))) unsigned __sh_nommu;
++#else
++#define __sh_nommu 0
++#endif
++
++void __unmapself(void *base, size_t size)
++{
++	if (__sh_nommu) __unmapself_sh_nommu(base, size);
++	else __unmapself_sh_mmu(base, size);
++}
+--- a/src/thread/sh/__unmapself.s
++++ /dev/null
+@@ -1,22 +0,0 @@
+-.text
+-.global __unmapself_sh_mmu
+-.type   __unmapself_sh_mmu, @function
+-__unmapself_sh_mmu:
+-	mov   #91, r3  ! SYS_munmap
+-	trapa #31
+-
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+-
+-	mov   #1, r3   ! SYS_exit
+-	mov   #0, r4
+-	trapa #31
+-
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+-	or    r0, r0
+--- /dev/null
++++ b/src/thread/sh/__unmapself_mmu.s
+@@ -0,0 +1,22 @@
++.text
++.global __unmapself_sh_mmu
++.type   __unmapself_sh_mmu, @function
++__unmapself_sh_mmu:
++	mov   #91, r3  ! SYS_munmap
++	trapa #31
++
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++
++	mov   #1, r3   ! SYS_exit
++	mov   #0, r4
++	trapa #31
++
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
++	or    r0, r0
+--- /dev/null
++++ b/src/thread/sh/atomics.s
+@@ -0,0 +1,65 @@
++/* Contract for all versions is same as cas.l r2,r3,@r0
++ * pr and r1 are also clobbered (by jsr & r1 as temp).
++ * r0,r2,r4-r15 must be preserved.
++ * r3 contains result (==r2 iff cas succeeded). */
++
++	.align 2
++.global __sh_cas_gusa
++.hidden __sh_cas_gusa
++__sh_cas_gusa:
++	mov.l r5,@-r15
++	mov.l r4,@-r15
++	mov r0,r4
++	mova 1f,r0
++	mov r15,r1
++	mov #(0f-1f),r15
++0:	mov.l @r4,r5
++	cmp/eq r5,r2
++	bf 1f
++	mov.l r3,@r4
++1:	mov r1,r15
++	mov r5,r3
++	mov r4,r0
++	mov.l @r15+,r4
++	rts
++	 mov.l @r15+,r5
++
++.global __sh_cas_llsc
++.hidden __sh_cas_llsc
++__sh_cas_llsc:
++	mov r0,r1
++	synco
++0:	movli.l @r1,r0
++	cmp/eq r0,r2
++	bf 1f
++	mov r3,r0
++	movco.l r0,@r1
++	bf 0b
++	mov r2,r0
++1:	synco
++	mov r0,r3
++	rts
++	 mov r1,r0
++
++.global __sh_cas_imask
++.hidden __sh_cas_imask
++__sh_cas_imask:
++	mov r0,r1
++	stc sr,r0
++	mov.l r0,@-r15
++	or #0xf0,r0
++	ldc r0,sr
++	mov.l @r1,r0
++	cmp/eq r0,r2
++	bf 1f
++	mov.l r3,@r1
++1:	ldc.l @r15+,sr
++	mov r0,r3
++	rts
++	 mov r1,r0
++
++.global __sh_cas_cas_l
++.hidden __sh_cas_cas_l
++__sh_cas_cas_l:
++	rts
++	 .word 0x2323 /* cas.l r2,r3,@r0 */
+--- a/src/thread/sh/syscall_cp.s
++++ b/src/thread/sh/syscall_cp.s
+@@ -14,17 +14,8 @@ __syscall_cp_asm:
+ __cp_begin:
+ 	mov.l @r4, r4
+ 	tst   r4, r4
+-	bt    2f
+-
+-	mov.l L1, r0
+-	braf  r0
+-	 nop
+-1:
+-
+-.align 2
+-L1:	.long __cancel@PLT-(1b-.)
+-
+-2:	mov   r5, r3
++	bf    __cp_cancel
++	mov   r5, r3
+ 	mov   r6, r4
+ 	mov   r7, r5
+ 	mov.l @r15, r6
+@@ -43,3 +34,12 @@ __cp_end:
+ 
+ 	rts
+ 	 nop
++
++__cp_cancel:
++	mov.l 2f, r0
++	braf  r0
++	 nop
++1:
++
++.align 2
++2:	.long __cancel@PCREL-(1b-.)
+--- a/src/thread/x32/syscall_cp.s
++++ b/src/thread/x32/syscall_cp.s
+@@ -14,7 +14,7 @@ __syscall_cp_internal:
+ __cp_begin:
+ 	mov (%rdi),%eax
+ 	test %eax,%eax
+-	jnz __cancel
++	jnz __cp_cancel
+ 	mov %rdi,%r11
+ 	mov %rsi,%rax
+ 	mov %rdx,%rdi
+@@ -27,3 +27,5 @@ __cp_begin:
+ 	syscall
+ __cp_end:
+ 	ret
++__cp_cancel:
++	jmp __cancel
+--- /dev/null
++++ b/src/thread/x32/syscall_cp_fixup.c
+@@ -0,0 +1,38 @@
++#include <sys/syscall.h>
++
++__attribute__((__visibility__("hidden")))
++long __syscall_cp_internal(volatile void*, long long, long long, long long, long long,
++                             long long, long long, long long);
++
++struct __timespec { long long tv_sec; long tv_nsec; };
++struct __timespec_kernel { long long tv_sec; long long tv_nsec; };
++#define __tsc(X) ((struct __timespec*)(unsigned long)(X))
++#define __fixup(X) do { if(X) { \
++	ts->tv_sec = __tsc(X)->tv_sec; \
++	ts->tv_nsec = __tsc(X)->tv_nsec; \
++	(X) = (unsigned long)ts; } } while(0)
++
++__attribute__((__visibility__("hidden")))
++long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3,
++	                     long long a4, long long a5, long long a6)
++{
++	struct __timespec_kernel ts[1];
++	switch (n) {
++	case SYS_mq_timedsend: case SYS_mq_timedreceive: case SYS_pselect6:
++		__fixup(a5);
++		break;
++	case SYS_futex:
++		if((a2 & (~128 /* FUTEX_PRIVATE_FLAG */)) == 0 /* FUTEX_WAIT */)
++			__fixup(a4);
++		break;
++	case SYS_clock_nanosleep:
++	case SYS_rt_sigtimedwait: case SYS_ppoll:
++		__fixup(a3);
++		break;
++	case SYS_nanosleep:
++		__fixup(a1);
++		break;
++	}
++	return __syscall_cp_internal(foo, n, a1, a2, a3, a4, a5, a6);
++}
++
+--- a/src/thread/x86_64/syscall_cp.s
++++ b/src/thread/x86_64/syscall_cp.s
+@@ -14,7 +14,7 @@ __syscall_cp_asm:
+ __cp_begin:
+ 	mov (%rdi),%eax
+ 	test %eax,%eax
+-	jnz __cancel
++	jnz __cp_cancel
+ 	mov %rdi,%r11
+ 	mov %rsi,%rax
+ 	mov %rdx,%rdi
+@@ -27,3 +27,5 @@ __cp_begin:
+ 	syscall
+ __cp_end:
+ 	ret
++__cp_cancel:
++	jmp __cancel
+--- a/src/time/clock_gettime.c
++++ b/src/time/clock_gettime.c
+@@ -5,37 +5,54 @@
+ #include "libc.h"
+ #include "atomic.h"
+ 
+-static int sc_clock_gettime(clockid_t clk, struct timespec *ts)
++#ifdef VDSO_CGT_SYM
++
++void *__vdsosym(const char *, const char *);
++
++static void *volatile vdso_func;
++
++static int cgt_init(clockid_t clk, struct timespec *ts)
+ {
+-	int r = __syscall(SYS_clock_gettime, clk, ts);
+-	if (!r) return r;
+-	if (r == -ENOSYS) {
+-		if (clk == CLOCK_REALTIME) {
+-			__syscall(SYS_gettimeofday, ts, 0);
+-			ts->tv_nsec = (int)ts->tv_nsec * 1000;
+-			return 0;
+-		}
+-		r = -EINVAL;
+-	}
+-	errno = -r;
+-	return -1;
++	void *p = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
++	int (*f)(clockid_t, struct timespec *) =
++		(int (*)(clockid_t, struct timespec *))p;
++	a_cas_p(&vdso_func, (void *)cgt_init, p);
++	return f ? f(clk, ts) : -ENOSYS;
+ }
+ 
+-void *__vdsosym(const char *, const char *);
++static void *volatile vdso_func = (void *)cgt_init;
++
++#endif
+ 
+ int __clock_gettime(clockid_t clk, struct timespec *ts)
+ {
++	int r;
++
+ #ifdef VDSO_CGT_SYM
+-	static int (*volatile cgt)(clockid_t, struct timespec *);
+-	if (!cgt) {
+-		void *f = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
+-		if (!f) f = (void *)sc_clock_gettime;
+-		a_cas_p(&cgt, 0, f);
++	int (*f)(clockid_t, struct timespec *) =
++		(int (*)(clockid_t, struct timespec *))vdso_func;
++	if (f) {
++		r = f(clk, ts);
++		if (!r) return r;
++		if (r == -EINVAL) return __syscall_ret(r);
++		/* Fall through on errors other than EINVAL. Some buggy
++		 * vdso implementations return ENOSYS for clocks they
++		 * can't handle, rather than making the syscall. This
++		 * also handles the case where cgt_init fails to find
++		 * a vdso function to use. */
+ 	}
+-	return cgt(clk, ts);
+-#else
+-	return sc_clock_gettime(clk, ts);
+ #endif
++
++	r = __syscall(SYS_clock_gettime, clk, ts);
++	if (r == -ENOSYS) {
++		if (clk == CLOCK_REALTIME) {
++			__syscall(SYS_gettimeofday, ts, 0);
++			ts->tv_nsec = (int)ts->tv_nsec * 1000;
++			return 0;
++		}
++		r = -EINVAL;
++	}
++	return __syscall_ret(r);
+ }
+ 
+ weak_alias(__clock_gettime, clock_gettime);
diff --git a/toolchain/musl/patches/010-Add-PowerPC-soft-float-support.patch b/toolchain/musl/patches/010-Add-PowerPC-soft-float-support.patch
index 4a851571c6d..dd770f61bff 100644
--- a/toolchain/musl/patches/010-Add-PowerPC-soft-float-support.patch
+++ b/toolchain/musl/patches/010-Add-PowerPC-soft-float-support.patch
@@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau <nbd@openwrt.org>
  
 --- a/configure
 +++ b/configure
-@@ -595,6 +595,10 @@ trycppif "_MIPSEL || __MIPSEL || __MIPSE
+@@ -604,6 +604,10 @@ trycppif "_MIPSEL || __MIPSEL || __MIPSE
  trycppif __mips_soft_float "$t" && SUBARCH=${SUBARCH}-sf
  fi
  
diff --git a/toolchain/musl/patches/030-mips-add-vdso-support.patch b/toolchain/musl/patches/030-mips-add-vdso-support.patch
deleted file mode 100644
index 537a1ca0b43..00000000000
--- a/toolchain/musl/patches/030-mips-add-vdso-support.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From 93332ebdcd54b0e0c0e86bced537cc96247bc1f1 Mon Sep 17 00:00:00 2001
-From: Hauke Mehrtens <hauke@hauke-m.de>
-Date: Sat, 23 Jan 2016 16:23:09 +0100
-Subject: [PATCH 2/2] mips: add vdso support
-
-vdso support is available on mips starting with kernel 4.4, see kernel
-commit a7f4df4e21 "MIPS: VDSO: Add implementations of gettimeofday()
-and clock_gettime()" for details.
-
-In Linux kernel 4.4.0 the mips code returns -ENOSYS in case it can not
-handle the vdso call and assumes the libc will call the original
-syscall in this case. Handle this case in musl. Currently Linux kernel
-4.4.0 handles the following types: CLOCK_REALTIME_COARSE,
-CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME and CLOCK_MONOTONIC.
-
-These are some measurements of calling clock_gettime(CLOCK_MONOTONIC,
-&tp); 1.000.000 times.
-
-without vdso:
-root@OpenWrt:/# time ./vdso-test
-real 0m 0.95s
-user 0m 0.24s
-sys 0m 0.70s
-
-with vdso:
-root@OpenWrt:/# time /usr/bin/vdso-test
-real 0m 0.35s
-user 0m 0.34s
-sys 0m 0.00s
-
-Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
----
- arch/mips/syscall_arch.h |  4 ++++
- src/time/clock_gettime.c | 12 +++++++++++-
- 2 files changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
-index e74e0ad..39c0ea3 100644
---- a/arch/mips/syscall_arch.h
-+++ b/arch/mips/syscall_arch.h
-@@ -161,3 +161,7 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
- 	if (n == SYS_fstatat) __stat_fix(c);
- 	return r2;
- }
-+
-+#define VDSO_USEFUL
-+#define VDSO_CGT_SYM "__vdso_clock_gettime"
-+#define VDSO_CGT_VER "LINUX_2.6"
-diff --git a/src/time/clock_gettime.c b/src/time/clock_gettime.c
-index 1572de0..dba99ff 100644
---- a/src/time/clock_gettime.c
-+++ b/src/time/clock_gettime.c
-@@ -26,13 +26,23 @@ void *__vdsosym(const char *, const char *);
- int __clock_gettime(clockid_t clk, struct timespec *ts)
- {
- #ifdef VDSO_CGT_SYM
-+	int ret;
- 	static int (*volatile cgt)(clockid_t, struct timespec *);
- 	if (!cgt) {
- 		void *f = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
- 		if (!f) f = (void *)sc_clock_gettime;
- 		a_cas_p(&cgt, 0, f);
- 	}
--	return cgt(clk, ts);
-+	ret = cgt(clk, ts);
-+
-+	/*
-+	 * mips in linux kernel 4.4.0 returns -ENOSYS if it can not
-+	 * handle the syscall in vdso, the original syscall should be
-+	 * called by the libc in such a case.
-+	 */
-+	if (ret == -ENOSYS)
-+		return sc_clock_gettime(clk, ts);
-+	return ret;
- #else
- 	return sc_clock_gettime(clk, ts);
- #endif
--- 
-2.7.0.rc3
-
diff --git a/toolchain/musl/patches/040-Add-format-attribute-to-some-function-declarations.patch b/toolchain/musl/patches/040-Add-format-attribute-to-some-function-declarations.patch
index 1d61d92ff3a..c495d67e080 100644
--- a/toolchain/musl/patches/040-Add-format-attribute-to-some-function-declarations.patch
+++ b/toolchain/musl/patches/040-Add-format-attribute-to-some-function-declarations.patch
@@ -30,8 +30,6 @@ Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
  include/syslog.h   | 12 ++++++++++--
  4 files changed, 57 insertions(+), 22 deletions(-)
 
-diff --git a/include/err.h b/include/err.h
-index 9f5cb6b..a5e3cde 100644
 --- a/include/err.h
 +++ b/include/err.h
 @@ -8,15 +8,23 @@
@@ -42,17 +40,16 @@ index 9f5cb6b..a5e3cde 100644
 -void vwarn(const char *, va_list);
 -void warnx(const char *, ...);
 -void vwarnx(const char *, va_list);
--
--_Noreturn void err(int, const char *, ...);
--_Noreturn void verr(int, const char *, va_list);
--_Noreturn void errx(int, const char *, ...);
--_Noreturn void verrx(int, const char *, va_list);
 +#if __GNUC__ >= 3
 +#define __fp(x, y) __attribute__ ((__format__ (__printf__, x, y)))
 +#else
 +#define __fp(x, y)
 +#endif
-+
+ 
+-_Noreturn void err(int, const char *, ...);
+-_Noreturn void verr(int, const char *, va_list);
+-_Noreturn void errx(int, const char *, ...);
+-_Noreturn void verrx(int, const char *, va_list);
 +void warn(const char *, ...) __fp(1, 2);
 +void vwarn(const char *, va_list) __fp(1, 0);
 +void warnx(const char *, ...) __fp(1, 2);
@@ -67,8 +64,6 @@ index 9f5cb6b..a5e3cde 100644
  
  #ifdef __cplusplus
  }
-diff --git a/include/monetary.h b/include/monetary.h
-index a91fa56..85c4d23 100644
 --- a/include/monetary.h
 +++ b/include/monetary.h
 @@ -13,8 +13,16 @@ extern "C" {
@@ -90,8 +85,6 @@ index a91fa56..85c4d23 100644
  
  #ifdef __cplusplus
  }
-diff --git a/include/stdio.h b/include/stdio.h
-index 884d2e6..17ca68e 100644
 --- a/include/stdio.h
 +++ b/include/stdio.h
 @@ -21,6 +21,14 @@ extern "C" {
@@ -156,7 +149,7 @@ index 884d2e6..17ca68e 100644
  #endif
  
  #ifdef _GNU_SOURCE
-@@ -184,6 +192,9 @@ char *fgets_unlocked(char *, int, FILE *);
+@@ -184,6 +192,9 @@ char *fgets_unlocked(char *, int, FILE *
  int fputs_unlocked(const char *, FILE *);
  #endif
  
@@ -166,8 +159,6 @@ index 884d2e6..17ca68e 100644
  #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
  #define tmpfile64 tmpfile
  #define fopen64 fopen
-diff --git a/include/syslog.h b/include/syslog.h
-index 5b4d296..33b549d 100644
 --- a/include/syslog.h
 +++ b/include/syslog.h
 @@ -56,16 +56,22 @@ extern "C" {
@@ -204,6 +195,3 @@ index 5b4d296..33b549d 100644
  #ifdef __cplusplus
  }
  #endif
--- 
-2.7.0.rc3
-
diff --git a/toolchain/musl/patches/200-add_libssp_nonshared.patch b/toolchain/musl/patches/200-add_libssp_nonshared.patch
index 0b97b346f84..f6758cdae84 100644
--- a/toolchain/musl/patches/200-add_libssp_nonshared.patch
+++ b/toolchain/musl/patches/200-add_libssp_nonshared.patch
@@ -12,7 +12,7 @@ Signed-off-by: Steven Barth <steven@midlink.org>
 
 --- a/Makefile
 +++ b/Makefile
-@@ -56,7 +56,7 @@ CRT_LIBS = lib/crt1.o lib/Scrt1.o lib/rc
+@@ -60,7 +60,7 @@ CRT_LIBS = lib/crt1.o lib/Scrt1.o lib/rc
  STATIC_LIBS = lib/libc.a
  SHARED_LIBS = lib/libc.so
  TOOL_LIBS = lib/musl-gcc.specs
@@ -21,18 +21,18 @@ Signed-off-by: Steven Barth <steven@midlink.org>
  ALL_TOOLS = obj/musl-gcc
  
  WRAPCC_GCC = gcc
-@@ -117,7 +117,8 @@ NOSSP_SRCS = $(wildcard crt/*.c) \
- 	src/env/__libc_start_main.c src/env/__init_tls.c \
- 	src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \
- 	src/string/memset.c src/string/memcpy.c \
--	src/ldso/dlstart.c src/ldso/dynlink.c
-+	src/ldso/dlstart.c src/ldso/dynlink.c \
+@@ -123,7 +123,8 @@ NOSSP_SRCS = $(wildcard crt/*.c) \
+ 	src/thread/__set_thread_area.c src/thread/$(ARCH)/__set_thread_area.c \
+ 	src/string/memset.c src/string/$(ARCH)/memset.c \
+ 	src/string/memcpy.c src/string/$(ARCH)/memcpy.c \
+-	ldso/dlstart.c ldso/dynlink.c
++	ldso/dlstart.c ldso/dynlink.c \
 +	src/libssp_nonshared/__stack_chk_fail_local.c
  $(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
  
  $(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT
-@@ -161,6 +162,11 @@ lib/libc.a: $(OBJS)
- 	$(AR) rc $@ $(OBJS)
+@@ -167,6 +168,11 @@ lib/libc.a: $(AOBJS)
+ 	$(AR) rc $@ $(AOBJS)
  	$(RANLIB) $@
  
 +lib/libssp_nonshared.a: obj/src/libssp_nonshared/__stack_chk_fail_local.o
diff --git a/toolchain/musl/patches/300-relative.patch b/toolchain/musl/patches/300-relative.patch
index 3735516ba7a..3a9c13eb369 100644
--- a/toolchain/musl/patches/300-relative.patch
+++ b/toolchain/musl/patches/300-relative.patch
@@ -1,6 +1,6 @@
 --- a/Makefile
 +++ b/Makefile
-@@ -210,7 +210,7 @@ $(DESTDIR)$(includedir)/%: $(srcdir)/inc
+@@ -219,7 +219,7 @@ $(DESTDIR)$(includedir)/%: $(srcdir)/inc
  	$(INSTALL) -D -m 644 $< $@
  
  $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so
-- 
GitLab