33 files changed, 1947 insertions, 154 deletions
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 322d510f38..cafb25172c 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
 /*
@@ -66,69 +68,6 @@ static inline void atomic_long_sub(long i, atomic_long_t *l)
 	atomic64_sub(i, v);
 }
 
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
 #define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
 
 #define atomic_long_cmpxchg(l, old, new) \
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
new file mode 100644
index 0000000000..74429e1c37
--- /dev/null
+++ b/include/asm-generic/atomic.h
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  based on linux/include/asm-arm/atomic.h
+ *
+ *  Copyright (c) 1996 Russell King.
+ */
+
+#ifndef __ASM_GENERIC_ATOMIC_H
+#define __ASM_GENERIC_ATOMIC_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_SMP
+#error SMP not supported
+#endif
+#define ATOMIC_INIT(i)	{ (i) }
+
+typedef struct { s64 counter; } atomic64_t;
+
+#define atomic64_read(v)	((v)->counter)
+#define atomic64_set(v, i)	(((v)->counter) = (i))
+
+static inline void atomic64_add(s64 i, volatile atomic64_t *v)
+{
+	v->counter += i;
+}
+
+static inline void atomic64_sub(s64 i, volatile atomic64_t *v)
+{
+	v->counter -= i;
+}
+
+static inline void atomic64_inc(volatile atomic64_t *v)
+{
+	v->counter += 1;
+}
+
+static inline void atomic64_dec(volatile atomic64_t *v)
+{
+	v->counter -= 1;
+}
+
+static inline int atomic64_dec_and_test(volatile atomic64_t *v)
+{
+	s64 val;
+
+	val = v->counter;
+	v->counter = val -= 1;
+
+	return val == 0;
+}
+
+static inline int atomic64_add_negative(s64 i, volatile atomic64_t *v)
+{
+	s64 val;
+
+	val = v->counter;
+	v->counter = val += i;
+
+	return val < 0;
+}
+
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#define atomic_read(v)	((v)->counter)
+#define atomic_set(v,i)	(((v)->counter) = (i))
+
+static inline void atomic_add(int i, volatile atomic_t *v)
+{
+	v->counter += i;
+}
+
+static inline void atomic_sub(int i, volatile atomic_t *v)
+{
+	v->counter -= i;
+}
+
+static inline int atomic_inc_return(volatile atomic_t *v)
+{
+	return ++v->counter;
+}
+
+static inline int atomic_dec_return(volatile atomic_t *v)
+{
+	return --v->counter;
+}
+
+#define atomic_inc(v) ((void)atomic_inc_return(v))
+#define atomic_dec(v) ((void)atomic_dec_return(v))
+
+static inline int atomic_dec_and_test(volatile atomic_t *v)
+{
+	int val;
+
+	val = v->counter;
+	v->counter = val -= 1;
+
+	return val == 0;
+}
+
+static inline int atomic_add_negative(int i, volatile atomic_t *v)
+{
+	int val;
+
+	val = v->counter;
+	v->counter = val += i;
+
+	return val < 0;
+}
+
+static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+{
+	*addr &= ~mask;
+}
+
+#define atomic_cmpxchg(v, o, n)	cmpxchg(&((v)->counter), o, n)
+
+/* Atomic operations are already serializing on ARM */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#endif
diff --git a/include/asm-generic/barebox.lds.h b/include/asm-generic/barebox.lds.h
index b6ca8eb2be..d3736ebaed 100644
--- a/include/asm-generic/barebox.lds.h
+++ b/include/asm-generic/barebox.lds.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 
 /*
  * Align to a 32 byte boundary equal to the
@@ -6,17 +8,12 @@
 #define STRUCT_ALIGNMENT 32
 #define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT)
 
-#if defined CONFIG_X86 || \
-	defined CONFIG_ARCH_EP93XX || \
-	defined CONFIG_ARCH_ZYNQ
-#include <mach/barebox.lds.h>
-#endif
-
 #ifndef PRE_IMAGE
 #define PRE_IMAGE
 #endif
 
 #define BAREBOX_INITCALLS			\
+	STRUCT_ALIGN();				\
 	__barebox_initcalls_start = .;		\
 	KEEP(*(.initcall.0))			\
 	KEEP(*(.initcall.1))			\
@@ -33,9 +30,12 @@
 	KEEP(*(.initcall.12))			\
 	KEEP(*(.initcall.13))			\
 	KEEP(*(.initcall.14))			\
+	KEEP(*(.initcall.15))			\
+	KEEP(*(.initcall.16))			\
 	__barebox_initcalls_end = .;
 
 #define BAREBOX_EXITCALLS			\
+	STRUCT_ALIGN();				\
 	__barebox_exitcalls_start = .;		\
 	KEEP(*(.exitcall.0))			\
 	KEEP(*(.exitcall.1))			\
@@ -47,37 +47,44 @@
 	__barebox_exitcalls_end = .;
 
 #define BAREBOX_CMDS				\
+	STRUCT_ALIGN();				\
 	__barebox_cmd_start = .;		\
 	KEEP(*(SORT_BY_NAME(.barebox_cmd*)))	\
 	__barebox_cmd_end = .;
 
 #define BAREBOX_RATP_CMDS			\
+	STRUCT_ALIGN();				\
 	__barebox_ratp_cmd_start = .;		\
 	KEEP(*(SORT_BY_NAME(.barebox_ratp_cmd*)))	\
 	__barebox_ratp_cmd_end = .;
 
 #define BAREBOX_SYMS				\
+	STRUCT_ALIGN();				\
 	__usymtab_start = .;			\
 	KEEP(*(__usymtab))			\
 	__usymtab_end = .;
 
 #define BAREBOX_MAGICVARS			\
+	STRUCT_ALIGN();				\
 	__barebox_magicvar_start = .;		\
 	KEEP(*(SORT_BY_NAME(.barebox_magicvar*)))	\
 	__barebox_magicvar_end = .;
 
 #define BAREBOX_CLK_TABLE			\
+	STRUCT_ALIGN();				\
 	__clk_of_table_start = .;		\
 	KEEP(*(.__clk_of_table));		\
 	KEEP(*(.__clk_of_table_end));		\
 	__clk_of_table_end = .;
 
 #define BAREBOX_DTB				\
+	STRUCT_ALIGN();				\
 	__dtb_start = .;			\
 	KEEP(*(.dtb.rodata.*));			\
 	__dtb_end = .;
 
 #define BAREBOX_IMD				\
+	STRUCT_ALIGN();				\
 	KEEP(*(.barebox_imd_start))		\
 	KEEP(*(.barebox_imd_1*))		\
 	*(.barebox_imd_0*)			\
@@ -85,6 +92,7 @@
 
 #ifdef CONFIG_PCI
 #define BAREBOX_PCI_FIXUP			\
+	STRUCT_ALIGN();				\
 	__start_pci_fixups_early = .;		\
 	KEEP(*(.pci_fixup_early))		\
 	__end_pci_fixups_early = .;		\
@@ -99,21 +107,42 @@
 #endif
 
 #define BAREBOX_RSA_KEYS			\
+	STRUCT_ALIGN();				\
 	__rsa_keys_start = .;			\
 	KEEP(*(.rsa_keys.rodata.*));		\
 	__rsa_keys_end = .;			\
 
+#define BAREBOX_DEEP_PROBE			\
+	STRUCT_ALIGN();				\
+	__barebox_deep_probe_start = .;		\
+	KEEP(*(SORT_BY_NAME(.barebox_deep_probe*)))	\
+	__barebox_deep_probe_end = .;
+
+
+#ifdef CONFIG_CONSTRUCTORS
+#define KERNEL_CTORS()  . = ALIGN(8);                      \
+			__ctors_start = .;                 \
+			KEEP(*(.ctors))                    \
+			KEEP(*(SORT(.init_array.*)))       \
+			KEEP(*(.init_array))               \
+			__ctors_end = .;
+#else
+#define KERNEL_CTORS()
+#endif
+
 #define RO_DATA_SECTION				\
 	BAREBOX_INITCALLS			\
 	BAREBOX_EXITCALLS			\
 	BAREBOX_CMDS				\
 	BAREBOX_RATP_CMDS			\
 	BAREBOX_SYMS				\
+	KERNEL_CTORS()				\
 	BAREBOX_MAGICVARS			\
 	BAREBOX_CLK_TABLE			\
 	BAREBOX_DTB				\
 	BAREBOX_RSA_KEYS			\
-	BAREBOX_PCI_FIXUP
+	BAREBOX_PCI_FIXUP			\
+	BAREBOX_DEEP_PROBE
 
 #if defined(CONFIG_ARCH_BAREBOX_MAX_BARE_INIT_SIZE) && \
 CONFIG_ARCH_BAREBOX_MAX_BARE_INIT_SIZE < CONFIG_BAREBOX_MAX_BARE_INIT_SIZE
diff --git a/include/asm-generic/bitio.h b/include/asm-generic/bitio.h
new file mode 100644
index 0000000000..99b85da59c
--- /dev/null
+++ b/include/asm-generic/bitio.h
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef __ASM_GENERIC_BITIO_H
+#define __ASM_GENERIC_BITIO_H
+
+#include <asm/io.h>
+
+/*
+ * Clear and set bits in one shot. These macros can be used to clear and
+ * set multiple bits in a register using a single call. These macros can
+ * also be used to set a multiple-bit bit pattern using a mask, by
+ * specifying the mask in the 'clear' parameter and the new bit pattern
+ * in the 'set' parameter.
+ */
+
+#ifndef out_arch
+#define out_arch(type,endian,a,v)	__raw_write##type(cpu_to_##endian(v),a)
+#endif
+
+#ifndef in_arch
+#define in_arch(type,endian,a)		endian##_to_cpu(__raw_read##type(a))
+#endif
+
+#ifndef out_le64
+#define out_le64(a,v)	out_arch(q,le64,a,v)
+#endif
+
+#ifndef out_le32
+#define out_le32(a,v)	out_arch(l,le32,a,v)
+#endif
+
+#ifndef out_le16
+#define out_le16(a,v)	out_arch(w,le16,a,v)
+#endif
+
+#ifndef in_le64
+#define in_le64(a)	in_arch(q,le64,a)
+#endif
+
+#ifndef in_le32
+#define in_le32(a)	in_arch(l,le32,a)
+#endif
+
+#ifndef in_le16
+#define in_le16(a)	in_arch(w,le16,a)
+#endif
+
+#ifndef out_be32
+#define out_be32(a,v)	out_arch(l,be32,a,v)
+#endif
+
+#ifndef out_be16
+#define out_be16(a,v)	out_arch(w,be16,a,v)
+#endif
+
+#ifndef in_be32
+#define in_be32(a)	in_arch(l,be32,a)
+#endif
+
+#ifndef in_be16
+#define in_be16(a)	in_arch(w,be16,a)
+#endif
+
+#ifndef out_8
+#define out_8(a,v)	__raw_writeb(v,a)
+#endif
+
+#ifndef in_8
+#define in_8(a)		__raw_readb(a)
+#endif
+
+#ifndef clrbits
+#define clrbits(type, addr, clear) \
+	out_##type((addr), in_##type(addr) & ~(clear))
+#endif
+
+#ifndef setbits
+#define setbits(type, addr, set) \
+	out_##type((addr), in_##type(addr) | (set))
+#endif
+
+#define clrsetbits(type, addr, clear, set) \
+	out_##type((addr), (in_##type(addr) & ~(clear)) | (set))
+
+#ifndef clrbits_be32
+#define clrbits_be32(addr, clear) clrbits(be32, addr, clear)
+#endif
+
+#ifndef setbits_be32
+#define setbits_be32(addr, set) setbits(be32, addr, set)
+#endif
+
+#ifndef clrsetbits_be32
+#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set)
+#endif
+
+#ifndef clrbits_le32
+#define clrbits_le32(addr, clear) clrbits(le32, addr, clear)
+#endif
+
+#ifndef setbits_le32
+#define setbits_le32(addr, set) setbits(le32, addr, set)
+#endif
+
+#ifndef clrsetbits_le32
+#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set)
+#endif
+
+#ifndef clrbits_be16
+#define clrbits_be16(addr, clear) clrbits(be16, addr, clear)
+#endif
+
+#ifndef setbits_be16
+#define setbits_be16(addr, set) setbits(be16, addr, set)
+#endif
+
+#ifndef clrsetbits_be16
+#define clrsetbits_be16(addr, clear, set) clrsetbits(be16, addr, clear, set)
+#endif
+
+#ifndef clrbits_le16
+#define clrbits_le16(addr, clear) clrbits(le16, addr, clear)
+#endif
+
+#ifndef setbits_le16
+#define setbits_le16(addr, set) setbits(le16, addr, set)
+#endif
+
+#ifndef clrsetbits_le16
+#define clrsetbits_le16(addr, clear, set) clrsetbits(le16, addr, clear, set)
+#endif
+
+#ifndef clrbits_8
+#define clrbits_8(addr, clear) clrbits(8, addr, clear)
+#endif
+
+#ifndef setbits_8
+#define setbits_8(addr, set) setbits(8, addr, set)
+#endif
+
+#ifndef clrsetbits_8
+#define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
+#endif
+
+#endif /* __ASM_GENERIC_IO_H */
diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h
index 9a3274aecf..6421acb3a5 100644
--- a/include/asm-generic/bitops/__ffs.h
+++ b/include/asm-generic/bitops/__ffs.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS___FFS_H_
 #define _ASM_GENERIC_BITOPS___FFS_H_
 
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
index be24465403..a5615539c3 100644
--- a/include/asm-generic/bitops/__fls.h
+++ b/include/asm-generic/bitops/__fls.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS___FLS_H_
 #define _ASM_GENERIC_BITOPS___FLS_H_
 
diff --git a/include/asm-generic/bitops/ffs.h b/include/asm-generic/bitops/ffs.h
index fbbb43af7d..4a375eae14 100644
--- a/include/asm-generic/bitops/ffs.h
+++ b/include/asm-generic/bitops/ffs.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_FFS_H_
 #define _ASM_GENERIC_BITOPS_FFS_H_
 
@@ -31,10 +33,8 @@ static inline int ffs(int x)
 		x >>= 2;
 		r += 2;
 	}
-	if (!(x & 1)) {
-		x >>= 1;
+	if (!(x & 1))
 		r += 1;
-	}
 	return r;
 }
 
diff --git a/include/asm-generic/bitops/ffz.h b/include/asm-generic/bitops/ffz.h
index 6744bd4cdf..f9f624837e 100644
--- a/include/asm-generic/bitops/ffz.h
+++ b/include/asm-generic/bitops/ffz.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_FFZ_H_
 #define _ASM_GENERIC_BITOPS_FFZ_H_
 
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 72a51e5a12..2ef6e94fa1 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_FIND_H_
 #define _ASM_GENERIC_BITOPS_FIND_H_
 
diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h
index 850859bc50..faa02fc48d 100644
--- a/include/asm-generic/bitops/fls.h
+++ b/include/asm-generic/bitops/fls.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_FLS_H_
 #define _ASM_GENERIC_BITOPS_FLS_H_
 
@@ -31,10 +33,8 @@ static inline int fls(int x)
 		x <<= 2;
 		r -= 2;
 	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
+	if (!(x & 0x80000000u))
 		r -= 1;
-	}
 	return r;
 }
 
diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 86d403f8b2..45533402f0 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_FLS64_H_
 #define _ASM_GENERIC_BITOPS_FLS64_H_
 
diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h
index 7268c8b9ab..f0c188b675 100644
--- a/include/asm-generic/bitops/hweight.h
+++ b/include/asm-generic/bitops/hweight.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
 #define _ASM_GENERIC_BITOPS_HWEIGHT_H_
 
diff --git a/include/asm-generic/bitops/ops.h b/include/asm-generic/bitops/ops.h
index f19bcaa29b..1684621922 100644
--- a/include/asm-generic/bitops/ops.h
+++ b/include/asm-generic/bitops/ops.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BITOPS_OPS_H_
 #define _ASM_GENERIC_BITOPS_OPS_H_
 
diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
index bb98650298..20c055c6bd 100644
--- a/include/asm-generic/bitsperlong.h
+++ b/include/asm-generic/bitsperlong.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef __ASM_GENERIC_BITS_PER_LONG
 #define __ASM_GENERIC_BITS_PER_LONG
 
@@ -7,4 +9,8 @@
 #define BITS_PER_LONG 32
 #endif /* CONFIG_64BIT */
 
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG 64
+#endif
+
 #endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 8583d2425f..18a1b419ff 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -1,7 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_BUG_H
 #define _ASM_GENERIC_BUG_H
 
 #include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <printk.h>
 
 #define BUG() do { \
 	printf("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __FUNCTION__); \
@@ -28,10 +32,39 @@
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on)) {					\
 		__WARN();						\
-		puts("WARNING: ");					\
-		printf(format);						\
+		printf("WARNING: " format);				\
 	}								\
 	unlikely(__ret_warn_on);					\
 })
 #endif
+
+#define WARN_ONCE(condition, format...) ({	\
+	static int __warned;			\
+	int __ret_warn_once = !!(condition);	\
+						\
+	if (unlikely(__ret_warn_once)) {	\
+		if (WARN(!__warned, format)) {	\
+			__warned = 1;		\
+			dump_stack();		\
+		}				\
+	}					\
+	unlikely(__ret_warn_once);		\
+})
+
+#define WARN_ON_ONCE(condition) ({			\
+	static int __warned;				\
+	int __ret_warn_once = !!(condition);		\
+							\
+	if (unlikely(__ret_warn_once && !__warned)) {	\
+		__warned = 1;				\
+		__WARN();				\
+	}						\
+	unlikely(__ret_warn_once);			\
+})
+
+#define ASSERT(expr)  do {				\
+	if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION))	\
+		BUG_ON(!(expr));			\
+} while (0)
+
 #endif
diff --git a/include/asm-generic/cache.h b/include/asm-generic/cache.h
new file mode 100644
index 0000000000..a766d835fd
--- /dev/null
+++ b/include/asm-generic/cache.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_GENERIC_CACHE_H_
+
+#ifndef sync_caches_for_execution
+#define sync_caches_for_execution() (void)0
+#endif
+
+#endif
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
new file mode 100644
index 0000000000..d93b103d5c
--- /dev/null
+++ b/include/asm-generic/cmpxchg-local.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_CMPXCHG_LOCAL_H
+#define __ASM_GENERIC_CMPXCHG_LOCAL_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+extern unsigned long wrong_size_cmpxchg(volatile void *ptr)
+	__noreturn;
+
+/*
+ * Generic version of __cmpxchg_local (disables interrupts). Takes an unsigned
+ * long parameter, supporting various types of architectures.
+ */
+static inline unsigned long __generic_cmpxchg_local(volatile void *ptr,
+		unsigned long old, unsigned long new, int size)
+{
+	unsigned long prev;
+
+	/*
+	 * Sanity checking, compile-time.
+	 */
+	if (size == 8 && sizeof(unsigned long) != 8)
+		wrong_size_cmpxchg(ptr);
+
+	switch (size) {
+	case 1: prev = *(u8 *)ptr;
+		if (prev == old)
+			*(u8 *)ptr = (u8)new;
+		break;
+	case 2: prev = *(u16 *)ptr;
+		if (prev == old)
+			*(u16 *)ptr = (u16)new;
+		break;
+	case 4: prev = *(u32 *)ptr;
+		if (prev == old)
+			*(u32 *)ptr = (u32)new;
+		break;
+	case 8: prev = *(u64 *)ptr;
+		if (prev == old)
+			*(u64 *)ptr = (u64)new;
+		break;
+	default:
+		wrong_size_cmpxchg(ptr);
+	}
+	return prev;
+}
+
+/*
+ * Generic version of __cmpxchg64_local. Takes an u64 parameter.
+ */
+static inline u64 __generic_cmpxchg64_local(volatile void *ptr,
+		u64 old, u64 new)
+{
+	u64 prev;
+
+	prev = *(u64 *)ptr;
+	if (prev == old)
+		*(u64 *)ptr = new;
+
+	return prev;
+}
+
+#endif
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
new file mode 100644
index 0000000000..b90524135e
--- /dev/null
+++ b/include/asm-generic/cmpxchg.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic non-atomic cmpxchg for porting kernel code.
+ */
+
+#ifndef __ASM_GENERIC_CMPXCHG_H
+#define __ASM_GENERIC_CMPXCHG_H
+
+#ifdef CONFIG_SMP
+#error "Cannot use generic cmpxchg on SMP"
+#endif
+
+#include <asm-generic/cmpxchg-local.h>
+
+#define generic_cmpxchg_local(ptr, o, n) ({					\
+	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))));			\
+})
+
+#define generic_cmpxchg64_local(ptr, o, n) \
+	__generic_cmpxchg64_local((ptr), (o), (n))
+
+#define cmpxchg		generic_cmpxchg_local
+#define cmpxchg64	generic_cmpxchg64_local
+
+#endif
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 2e0ba8389e..a3b98c86f0 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -1,9 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_DIV64_H
 #define _ASM_GENERIC_DIV64_H
 /*
  * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
  * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h
  *
+ * Optimization for constant divisors on 32-bit machines:
+ * Copyright (C) 2006-2015 Nicolas Pitre
+ *
  * The semantics of do_div() are:
  *
  * uint32_t do_div(uint64_t *n, uint32_t base)
@@ -18,8 +22,200 @@
  */
 
 #include <linux/types.h>
+#include <linux/compiler.h>
+
+#if BITS_PER_LONG == 64
+
+/**
+ * do_div - returns 2 values: calculate remainder and update new dividend
+ * @n: uint64_t dividend (will be updated)
+ * @base: uint32_t divisor
+ *
+ * Summary:
+ * ``uint32_t remainder = n % base;``
+ * ``n = n / base;``
+ *
+ * Return: (uint32_t)remainder
+ *
+ * NOTE: macro parameter @n is evaluated multiple times,
+ * beware of side effects!
+ */
+# define do_div(n,base) ({					\
+	uint32_t __base = (base);				\
+	uint32_t __rem;						\
+	__rem = ((uint64_t)(n)) % __base;			\
+	(n) = ((uint64_t)(n)) / __base;				\
+	__rem;							\
+ })
+
+#elif BITS_PER_LONG == 32
+
+#include <linux/log2.h>
+
+/*
+ * If the divisor happens to be constant, we determine the appropriate
+ * inverse at compile time to turn the division into a few inline
+ * multiplications which ought to be much faster. And yet only if compiling
+ * with a sufficiently recent gcc version to perform proper 64-bit constant
+ * propagation.
+ *
+ * (It is unfortunate that gcc doesn't perform all this internally.)
+ */
 
+#ifndef __div64_const32_is_OK
+#define __div64_const32_is_OK (__GNUC__ >= 4)
+#endif
+
+#define __div64_const32(n, ___b)					\
+({									\
+	/*								\
+	 * Multiplication by reciprocal of b: n / b = n * (p / b) / p	\
+	 *								\
+	 * We rely on the fact that most of this code gets optimized	\
+	 * away at compile time due to constant propagation and only	\
+	 * a few multiplication instructions should remain.		\
+	 * Hence this monstrous macro (static inline doesn't always	\
+	 * do the trick here).						\
+	 */								\
+	uint64_t ___res, ___x, ___t, ___m, ___n = (n);			\
+	uint32_t ___p, ___bias;						\
+									\
+	/* determine MSB of b */					\
+	___p = 1 << ilog2(___b);					\
+									\
+	/* compute m = ((p << 64) + b - 1) / b */			\
+	___m = (~0ULL / ___b) * ___p;					\
+	___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b;	\
+									\
+	/* one less than the dividend with highest result */		\
+	___x = ~0ULL / ___b * ___b - 1;					\
+									\
+	/* test our ___m with res = m * x / (p << 64) */		\
+	___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32;	\
+	___t = ___res += (___m & 0xffffffff) * (___x >> 32);		\
+	___res += (___x & 0xffffffff) * (___m >> 32);			\
+	___t = (___res < ___t) ? (1ULL << 32) : 0;			\
+	___res = (___res >> 32) + ___t;					\
+	___res += (___m >> 32) * (___x >> 32);				\
+	___res /= ___p;							\
+									\
+	/* Now sanitize and optimize what we've got. */			\
+	if (~0ULL % (___b / (___b & -___b)) == 0) {			\
+		/* special case, can be simplified to ... */		\
+		___n /= (___b & -___b);					\
+		___m = ~0ULL / (___b / (___b & -___b));			\
+		___p = 1;						\
+		___bias = 1;						\
+	} else if (___res != ___x / ___b) {				\
+		/*							\
+		 * We can't get away without a bias to compensate	\
+		 * for bit truncation errors.  To avoid it we'd need an	\
+		 * additional bit to represent m which would overflow	\
+		 * a 64-bit variable.					\
+		 *							\
+		 * Instead we do m = p / b and n / b = (n * m + m) / p.	\
+		 */							\
+		___bias = 1;						\
+		/* Compute m = (p << 64) / b */				\
+		___m = (~0ULL / ___b) * ___p;				\
+		___m += ((~0ULL % ___b + 1) * ___p) / ___b;		\
+	} else {							\
+		/*							\
+		 * Reduce m / p, and try to clear bit 31 of m when	\
+		 * possible, otherwise that'll need extra overflow	\
+		 * handling later.					\
+		 */							\
+		uint32_t ___bits = -(___m & -___m);			\
+		___bits |= ___m >> 32;					\
+		___bits = (~___bits) << 1;				\
+		/*							\
+		 * If ___bits == 0 then setting bit 31 is  unavoidable.	\
+		 * Simply apply the maximum possible reduction in that	\
+		 * case. Otherwise the MSB of ___bits indicates the	\
+		 * best reduction we should apply.			\
+		 */							\
+		if (!___bits) {						\
+			___p /= (___m & -___m);				\
+			___m /= (___m & -___m);				\
+		} else {						\
+			___p >>= ilog2(___bits);			\
+			___m >>= ilog2(___bits);			\
+		}							\
+		/* No bias needed. */					\
+		___bias = 0;						\
+	}								\
+									\
+	/*								\
+	 * Now we have a combination of 2 conditions:			\
+	 *								\
+	 * 1) whether or not we need to apply a bias, and		\
+	 *								\
+	 * 2) whether or not there might be an overflow in the cross	\
+	 *    product determined by (___m & ((1 << 63) | (1 << 31))).	\
+	 *								\
+	 * Select the best way to do (m_bias + m * n) / (1 << 64).	\
+	 * From now on there will be actual runtime code generated.	\
+	 */								\
+	___res = __arch_xprod_64(___m, ___n, ___bias);			\
+									\
+	___res /= ___p;							\
+})
+
+#ifndef __arch_xprod_64
+/*
+ * Default C implementation for __arch_xprod_64()
+ *
+ * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+ * Semantic:  retval = ((bias ? m : 0) + m * n) >> 64
+ *
+ * The product is a 128-bit value, scaled down to 64 bits.
+ * Assuming constant propagation to optimize away unused conditional code.
+ * Architectures may provide their own optimized assembly implementation.
+ */
+static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+{
+	uint32_t m_lo = m;
+	uint32_t m_hi = m >> 32;
+	uint32_t n_lo = n;
+	uint32_t n_hi = n >> 32;
+	uint64_t res;
+	uint32_t res_lo, res_hi, tmp;
+
+	if (!bias) {
+		res = ((uint64_t)m_lo * n_lo) >> 32;
+	} else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+		/* there can't be any overflow here */
+		res = (m + (uint64_t)m_lo * n_lo) >> 32;
+	} else {
+		res = m + (uint64_t)m_lo * n_lo;
+		res_lo = res >> 32;
+		res_hi = (res_lo < m_hi);
+		res = res_lo | ((uint64_t)res_hi << 32);
+	}
+
+	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+		/* there can't be any overflow here */
+		res += (uint64_t)m_lo * n_hi;
+		res += (uint64_t)m_hi * n_lo;
+		res >>= 32;
+	} else {
+		res += (uint64_t)m_lo * n_hi;
+		tmp = res >> 32;
+		res += (uint64_t)m_hi * n_lo;
+		res_lo = res >> 32;
+		res_hi = (res_lo < tmp);
+		res = res_lo | ((uint64_t)res_hi << 32);
+	}
+
+	res += (uint64_t)m_hi * n_hi;
+
+	return res;
+}
+#endif
+
+#ifndef __div64_32
 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
+#endif
 
 /* The unnecessary pointer compare is there
  * to check for type safety (n must be 64bit)
@@ -28,7 +224,19 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	uint32_t __base = (base);			\
 	uint32_t __rem;					\
 	(void)(((typeof((n)) *)0) == ((uint64_t *)0));	\
-	if (((n) >> 32) == 0) {			\
+	if (__builtin_constant_p(__base) &&		\
+	    is_power_of_2(__base)) {			\
+		__rem = (n) & (__base - 1);		\
+		(n) >>= ilog2(__base);			\
+	} else if (__div64_const32_is_OK &&		\
+		   __builtin_constant_p(__base) &&	\
+		   __base != 0) {			\
+		uint32_t __res_lo, __n_lo = (n);	\
+		(n) = __div64_const32(n, __base);	\
+		/* the remainder can be computed with 32-bit regs */ \
+		__res_lo = (n);				\
+		__rem = __n_lo - __res_lo * __base;	\
+	} else if (likely(((n) >> 32) == 0)) {		\
 		__rem = (uint32_t)(n) % __base;		\
 		(n) = (uint32_t)(n) / __base;		\
 	} else 						\
@@ -36,4 +244,10 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	__rem;						\
  })
 
+#else /* BITS_PER_LONG == ?? */
+
+# error do_div() does not yet support the C64
+
+#endif /* BITS_PER_LONG */
+
 #endif /* _ASM_GENERIC_DIV64_H */
diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h
index 7d99a95370..7629d5c8dd 100644
--- a/include/asm-generic/errno.h
+++ b/include/asm-generic/errno.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ARM_ERRNO_H
 #define _ARM_ERRNO_H
 
@@ -132,15 +134,4 @@
 #define	EKEYREVOKED	128	/* Key has been revoked */
 #define	EKEYREJECTED	129	/* Key was rejected by service */
 
-/* Should never be seen by user programs */
-#define ERESTARTSYS	512
-#define ERESTARTNOINTR	513
-#define ERESTARTNOHAND	514	/* restart if no handler.. */
-#define ENOIOCTLCMD	515	/* No ioctl command */
-#define EPROBE_DEFER	517	/* Driver requests probe retry */
-
-#define ENOTSUPP	524	/* Operation is not supported */
-
-#define _LAST_ERRNO	524
-
 #endif
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index f394147c07..8f220128d7 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 /*
  * asm-generic/int-ll64.h
  *
diff --git a/include/asm-generic/io-typeconfused.h b/include/asm-generic/io-typeconfused.h
new file mode 100644
index 0000000000..d25ed7db24
--- /dev/null
+++ b/include/asm-generic/io-typeconfused.h
@@ -0,0 +1,75 @@
+/* Generic I/O port emulation, based on MN10300 code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_IO_TYPECONFUSED_H
+#define __ASM_GENERIC_IO_TYPECONFUSED_H
+
+#include <linux/string.h> /* for memset() and memcpy() */
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/*****************************************************************************/
+/*
+ * Unlike the definitions in <asm-generic/io.h>, these macros don't complain
+ * about integer arguments and just silently cast them to pointers. This is
+ * a common cause of bugs, but lots of existing code depends on this, so
+ * this header is provided as a transitory measure.
+ */
+
+#ifndef __raw_readb
+#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a))
+#endif
+
+#ifndef __raw_readw
+#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
+#endif
+
+#ifndef __raw_readl
+#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a))
+#endif
+
+#ifndef readb
+#define readb __raw_readb
+#endif
+
+#ifndef readw
+#define readw(addr) __le16_to_cpu(__raw_readw(addr))
+#endif
+
+#ifndef readl
+#define readl(addr) __le32_to_cpu(__raw_readl(addr))
+#endif
+
+#ifndef __raw_writeb
+#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a) = (v))
+#endif
+
+#ifndef __raw_writew
+#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
+#endif
+
+#ifndef __raw_writel
+#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a) = (v))
+#endif
+
+#ifndef writeb
+#define writeb __raw_writeb
+#endif
+
+#ifndef writew
+#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr)
+#endif
+
+#ifndef writel
+#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr)
+#endif
+
+#endif /* __ASM_GENERIC_IO_TYPECONFUSED_H */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 973b8b954c..123ad5488f 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -11,81 +11,229 @@
 #ifndef __ASM_GENERIC_IO_H
 #define __ASM_GENERIC_IO_H
 
+#include <linux/string.h> /* for memset() and memcpy() */
+#include <linux/compiler.h>
+#include <linux/instruction_pointer.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
-/*****************************************************************************/
+#ifndef __LINUX_IO_STRICT_PROTOTYPES__
+#include <asm-generic/io-typeconfused.h>
+#endif
+
+#define __io_br()      barrier()
+#define __io_ar(v)      barrier()
+#define __io_bw()      barrier()
+#define __io_pbw()     __io_bw()
+#define __io_paw()     __io_aw()
+#define __io_aw()      do { } while (0)
+#define __io_pbr()     __io_br()
+#define __io_par(v)     __io_ar(v)
+
+static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+				  unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+				       unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_read_mmio(u8 width, const volatile void __iomem *addr,
+				 unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
+				      unsigned long caller_addr, unsigned long caller_addr0) {}
+
 /*
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the simple architectures, we just read/write the
- * memory location directly.
+ * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
+ *
+ * On some architectures memory mapped IO needs to be accessed differently.
+ * On the simple architectures, we just read/write the memory location
+ * directly.
  */
 
 #ifndef __raw_readb
-#define __raw_readb(a)		(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a))
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return *(const volatile u8 __force *)addr;
+}
 #endif
 
 #ifndef __raw_readw
-#define __raw_readw(a)		(__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return *(const volatile u16 __force *)addr;
+}
 #endif
 
 #ifndef __raw_readl
-#define __raw_readl(a)		(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a))
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return *(const volatile u32 __force *)addr;
+}
+#endif
+
+#ifdef CONFIG_64BIT
+#ifndef __raw_readq
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	return *(const volatile u64 __force *)addr;
+}
+#endif
+#endif /* CONFIG_64BIT */
+
+#ifndef __raw_writeb
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 value, volatile void __iomem *addr)
+{
+	*(volatile u8 __force *)addr = value;
+}
 #endif
 
+#ifndef __raw_writew
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 value, volatile void __iomem *addr)
+{
+	*(volatile u16 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writel
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 value, volatile void __iomem *addr)
+{
+	*(volatile u32 __force *)addr = value;
+}
+#endif
+
+#ifdef CONFIG_64BIT
+#ifndef __raw_writeq
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
+{
+	*(volatile u64 __force *)addr = value;
+}
+#endif
+#endif /* CONFIG_64BIT */
+
+/*
+ * {read,write}{b,w,l,q}() access little endian memory and return result in
+ * native endianness.
+ */
+
 #ifndef readb
-#define readb __raw_readb
+#define readb readb
+static inline u8 readb(const volatile void __iomem *addr)
+{
+	u8 val;
+
+	log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+	__io_br();
+	val = __raw_readb(addr);
+	__io_ar(val);
+	log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+	return val;
+}
 #endif
 
 #ifndef readw
-#define readw(addr) __le16_to_cpu(__raw_readw(addr))
+#define readw readw
+static inline u16 readw(const volatile void __iomem *addr)
+{
+	u16 val;
+
+	log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+	__io_br();
+	val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+	__io_ar(val);
+	log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+	return val;
+}
 #endif
 
 #ifndef readl
-#define readl(addr) __le32_to_cpu(__raw_readl(addr))
-#endif
+#define readl readl
+static inline u32 readl(const volatile void __iomem *addr)
+{
+	u32 val;
 
-#ifndef __raw_writeb
-#define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile unsigned char __force  *)(a) = (v))
+	log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+	__io_br();
+	val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+	__io_ar(val);
+	log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+	return val;
+}
 #endif
 
-#ifndef __raw_writew
-#define __raw_writew(v,a)	(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))
-#endif
+#ifdef CONFIG_64BIT
+#ifndef readq
+#define readq readq
+static inline u64 readq(const volatile void __iomem *addr)
+{
+	u64 val;
 
-#ifndef __raw_writel
-#define __raw_writel(v,a)	(__chk_io_ptr(a), *(volatile unsigned int __force   *)(a) = (v))
+	log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+	__io_br();
+	val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+	__io_ar(val);
+	log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+	return val;
+}
 #endif
+#endif /* CONFIG_64BIT */
 
 #ifndef writeb
-#define writeb __raw_writeb
+#define writeb writeb
+static inline void writeb(u8 value, volatile void __iomem *addr)
+{
+	log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+	__io_bw();
+	__raw_writeb(value, addr);
+	__io_aw();
+	log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
 #endif
 
 #ifndef writew
-#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr)
+#define writew writew
+static inline void writew(u16 value, volatile void __iomem *addr)
+{
+	log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+	__io_bw();
+	__raw_writew((u16 __force)cpu_to_le16(value), addr);
+	__io_aw();
+	log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
 #endif
 
 #ifndef writel
-#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr)
-#endif
-
-#ifdef CONFIG_64BIT
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+#define writel writel
+static inline void writel(u32 value, volatile void __iomem *addr)
 {
-	return *(const volatile u64 __force *) addr;
+	log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+	__io_bw();
+	__raw_writel((u32 __force)__cpu_to_le32(value), addr);
+	__io_aw();
+	log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 }
-#define readq(addr) __le64_to_cpu(__raw_readq(addr))
+#endif
 
-static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+#ifdef CONFIG_64BIT
+#ifndef writeq
+#define writeq writeq
+static inline void writeq(u64 value, volatile void __iomem *addr)
 {
-	*(volatile u64 __force *) addr = b;
+	log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+	__io_bw();
+	__raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+	__io_aw();
+	log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 }
-#define writeq(b,addr) __raw_writeq(__cpu_to_le64(b),addr)
 #endif
+#endif /* CONFIG_64BIT */
 
 #ifndef PCI_IOBASE
-#define PCI_IOBASE ((void __iomem *)0)
+#define PCI_IOBASE ((void __iomem *)RELOC_HIDE((void *)0, 0))
 #endif
 
 #ifndef IO_SPACE_LIMIT
@@ -194,111 +342,370 @@ static inline void outl_p(u32 value, unsigned long addr)
 }
 #endif
 
-#ifndef insb
-static inline void insb(unsigned long addr, void *buffer, int count)
+/*
+ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in
+ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times).
+ */
+#ifndef readsb
+#define readsb readsb
+static inline void readsb(const volatile void __iomem *addr, void *buffer,
+			  unsigned int count)
 {
 	if (count) {
 		u8 *buf = buffer;
+
 		do {
-			u8 x = inb(addr);
+			u8 x = __raw_readb(addr);
 			*buf++ = x;
 		} while (--count);
 	}
 }
 #endif
 
-#ifndef insw
-static inline void insw(unsigned long addr, void *buffer, int count)
+#ifndef readsw
+#define readsw readsw
+static inline void readsw(const volatile void __iomem *addr, void *buffer,
+			  unsigned int count)
 {
 	if (count) {
 		u16 *buf = buffer;
+
 		do {
-			u16 x = inw(addr);
+			u16 x = __raw_readw(addr);
 			*buf++ = x;
 		} while (--count);
 	}
 }
 #endif
 
-#ifndef insl
-static inline void insl(unsigned long addr, void *buffer, int count)
+#ifndef readsl
+#define readsl readsl
+static inline void readsl(const volatile void __iomem *addr, void *buffer,
+			  unsigned int count)
 {
 	if (count) {
 		u32 *buf = buffer;
+
 		do {
-			u32 x = inl(addr);
+			u32 x = __raw_readl(addr);
 			*buf++ = x;
 		} while (--count);
 	}
 }
 #endif
 
-#ifndef outsb
-static inline void outsb(unsigned long addr, const void *buffer, int count)
+#ifdef CONFIG_64BIT
+#ifndef readsq
+#define readsq readsq
+static inline void readsq(const volatile void __iomem *addr, void *buffer,
+			  unsigned int count)
+{
+	if (count) {
+		u64 *buf = buffer;
+
+		do {
+			u64 x = __raw_readq(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+#endif
+#endif /* CONFIG_64BIT */
+
+#ifndef writesb
+#define writesb writesb
+static inline void writesb(volatile void __iomem *addr, const void *buffer,
+			   unsigned int count)
 {
 	if (count) {
 		const u8 *buf = buffer;
+
 		do {
-			outb(*buf++, addr);
+			__raw_writeb(*buf++, addr);
 		} while (--count);
 	}
 }
 #endif
 
-#ifndef outsw
-static inline void outsw(unsigned long addr, const void *buffer, int count)
+#ifndef writesw
+#define writesw writesw
+static inline void writesw(volatile void __iomem *addr, const void *buffer,
+			   unsigned int count)
 {
 	if (count) {
 		const u16 *buf = buffer;
+
 		do {
-			outw(*buf++, addr);
+			__raw_writew(*buf++, addr);
 		} while (--count);
 	}
 }
 #endif
 
-#ifndef outsl
-static inline void outsl(unsigned long addr, const void *buffer, int count)
+#ifndef writesl
+#define writesl writesl
+static inline void writesl(volatile void __iomem *addr, const void *buffer,
+			   unsigned int count)
 {
 	if (count) {
 		const u32 *buf = buffer;
+
 		do {
-			outl(*buf++, addr);
+			__raw_writel(*buf++, addr);
 		} while (--count);
 	}
 }
 #endif
 
-static inline void readsl(const void __iomem *addr, void *buf, int len)
+#ifdef CONFIG_64BIT
+#ifndef writesq
+#define writesq writesq
+static inline void writesq(volatile void __iomem *addr, const void *buffer,
+			   unsigned int count)
 {
-	insl(addr - PCI_IOBASE, buf, len);
+	if (count) {
+		const u64 *buf = buffer;
+
+		do {
+			__raw_writeq(*buf++, addr);
+		} while (--count);
+	}
 }
+#endif
+#endif /* CONFIG_64BIT */
 
-static inline void readsw(const void __iomem *addr, void *buf, int len)
+/*
+ * {in,out}{b,w,l}() access little endian I/O. {in,out}{b,w,l}_p() can be
+ * implemented on hardware that needs an additional delay for I/O accesses to
+ * take effect.
+ */
+
+#if !defined(inb) && !defined(_inb)
+#define _inb _inb
+static inline u8 _inb(unsigned long addr)
+{
+	return __raw_readb(PCI_IOBASE + addr);
+}
+#endif
+
+#if !defined(inw) && !defined(_inw)
+#define _inw _inw
+static inline u16 _inw(unsigned long addr)
 {
-	insw(addr - PCI_IOBASE, buf, len);
+	return __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
 }
+#endif
+
+#if !defined(inl) && !defined(_inl)
+#define _inl _inl
+static inline u32 _inl(unsigned long addr)
+{
+	return __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
+}
+#endif
 
-static inline void readsb(const void __iomem *addr, void *buf, int len)
+#if !defined(outb) && !defined(_outb)
+#define _outb _outb
+static inline void _outb(u8 value, unsigned long addr)
 {
-	insb(addr - PCI_IOBASE, buf, len);
+	return __raw_writeb(value, PCI_IOBASE + addr);
 }
+#endif
+
+#if !defined(outw) && !defined(_outw)
+#define _outw _outw
+static inline void _outw(u16 value, unsigned long addr)
+{
+	return __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
+}
+#endif
 
-static inline void writesl(const void __iomem *addr, const void *buf, int len)
+#if !defined(outl) && !defined(_outl)
+#define _outl _outl
+static inline void _outl(u32 value, unsigned long addr)
 {
-	outsl(addr - PCI_IOBASE, buf, len);
+	return __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
 }
+#endif
+
+#ifndef inb
+#define inb _inb
+#endif
 
-static inline void writesw(const void __iomem *addr, const void *buf, int len)
+#ifndef inw
+#define inw _inw
+#endif
+
+#ifndef inl
+#define inl _inl
+#endif
+
+#ifndef outb
+#define outb _outb
+#endif
+
+#ifndef outw
+#define outw _outw
+#endif
+
+#ifndef outl
+#define outl _outl
+#endif
+
+#ifndef inb_p
+#define inb_p inb_p
+static inline u8 inb_p(unsigned long addr)
 {
-	outsw(addr - PCI_IOBASE, buf, len);
+	return inb(addr);
 }
+#endif
 
-static inline void writesb(const void __iomem *addr, const void *buf, int len)
+#ifndef inw_p
+#define inw_p inw_p
+static inline u16 inw_p(unsigned long addr)
 {
-	outsb(addr - PCI_IOBASE, buf, len);
+	return inw(addr);
 }
+#endif
 
+#ifndef inl_p
+#define inl_p inl_p
+static inline u32 inl_p(unsigned long addr)
+{
+	return inl(addr);
+}
+#endif
+
+#ifndef outb_p
+#define outb_p outb_p
+static inline void outb_p(u8 value, unsigned long addr)
+{
+	outb(value, addr);
+}
+#endif
+
+#ifndef outw_p
+#define outw_p outw_p
+static inline void outw_p(u16 value, unsigned long addr)
+{
+	outw(value, addr);
+}
+#endif
+
+#ifndef outl_p
+#define outl_p outl_p
+static inline void outl_p(u32 value, unsigned long addr)
+{
+	outl(value, addr);
+}
+#endif
+
+/*
+ * {in,out}s{b,w,l}{,_p}() are variants of the above that repeatedly access a
+ * single I/O port multiple times.
+ */
+
+#ifndef insb
+#define insb insb
+static inline void insb(unsigned long addr, void *buffer, unsigned int count)
+{
+	readsb(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef insw
+#define insw insw
+static inline void insw(unsigned long addr, void *buffer, unsigned int count)
+{
+	readsw(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef insl
+#define insl insl
+static inline void insl(unsigned long addr, void *buffer, unsigned int count)
+{
+	readsl(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef outsb
+#define outsb outsb
+static inline void outsb(unsigned long addr, const void *buffer,
+			 unsigned int count)
+{
+	writesb(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef outsw
+#define outsw outsw
+static inline void outsw(unsigned long addr, const void *buffer,
+			 unsigned int count)
+{
+	writesw(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef outsl
+#define outsl outsl
+static inline void outsl(unsigned long addr, const void *buffer,
+			 unsigned int count)
+{
+	writesl(PCI_IOBASE + addr, buffer, count);
+}
+#endif
+
+#ifndef insb_p
+#define insb_p insb_p
+static inline void insb_p(unsigned long addr, void *buffer, unsigned int count)
+{
+	insb(addr, buffer, count);
+}
+#endif
+
+#ifndef insw_p
+#define insw_p insw_p
+static inline void insw_p(unsigned long addr, void *buffer, unsigned int count)
+{
+	insw(addr, buffer, count);
+}
+#endif
+
+#ifndef insl_p
+#define insl_p insl_p
+static inline void insl_p(unsigned long addr, void *buffer, unsigned int count)
+{
+	insl(addr, buffer, count);
+}
+#endif
+
+#ifndef outsb_p
+#define outsb_p outsb_p
+static inline void outsb_p(unsigned long addr, const void *buffer,
+			   unsigned int count)
+{
+	outsb(addr, buffer, count);
+}
+#endif
+
+#ifndef outsw_p
+#define outsw_p outsw_p
+static inline void outsw_p(unsigned long addr, const void *buffer,
+			   unsigned int count)
+{
+	outsw(addr, buffer, count);
+}
+#endif
+
+#ifndef outsl_p
+#define outsl_p outsl_p
+static inline void outsl_p(unsigned long addr, const void *buffer,
+			   unsigned int count)
+{
+	outsl(addr, buffer, count);
+}
+#endif
 
 #ifndef ioread8
 #define ioread8 ioread8
@@ -420,4 +827,204 @@ static inline void iowrite64be(u64 value, volatile void __iomem *addr)
 #endif
 #endif /* CONFIG_64BIT */
 
+#ifndef ioread8_rep
+#define ioread8_rep ioread8_rep
+static inline void ioread8_rep(const volatile void __iomem *addr, void *buffer,
+			       unsigned int count)
+{
+	readsb(addr, buffer, count);
+}
+#endif
+
+#ifndef ioread16_rep
+#define ioread16_rep ioread16_rep
+static inline void ioread16_rep(const volatile void __iomem *addr,
+				void *buffer, unsigned int count)
+{
+	readsw(addr, buffer, count);
+}
+#endif
+
+#ifndef ioread32_rep
+#define ioread32_rep ioread32_rep
+static inline void ioread32_rep(const volatile void __iomem *addr,
+				void *buffer, unsigned int count)
+{
+	readsl(addr, buffer, count);
+}
+#endif
+
+#ifdef CONFIG_64BIT
+#ifndef ioread64_rep
+#define ioread64_rep ioread64_rep
+static inline void ioread64_rep(const volatile void __iomem *addr,
+				void *buffer, unsigned int count)
+{
+	readsq(addr, buffer, count);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
+#ifndef iowrite8_rep
+#define iowrite8_rep iowrite8_rep
+static inline void iowrite8_rep(volatile void __iomem *addr,
+				const void *buffer,
+				unsigned int count)
+{
+	writesb(addr, buffer, count);
+}
+#endif
+
+#ifndef iowrite16_rep
+#define iowrite16_rep iowrite16_rep
+static inline void iowrite16_rep(volatile void __iomem *addr,
+				 const void *buffer,
+				 unsigned int count)
+{
+	writesw(addr, buffer, count);
+}
+#endif
+
+#ifndef iowrite32_rep
+#define iowrite32_rep iowrite32_rep
+static inline void iowrite32_rep(volatile void __iomem *addr,
+				 const void *buffer,
+				 unsigned int count)
+{
+	writesl(addr, buffer, count);
+}
+#endif
+
+#ifdef CONFIG_64BIT
+#ifndef iowrite64_rep
+#define iowrite64_rep iowrite64_rep
+static inline void iowrite64_rep(volatile void __iomem *addr,
+				 const void *buffer,
+				 unsigned int count)
+{
+	writesq(addr, buffer, count);
+}
+#endif
+#endif /* CONFIG_64BIT */
+
+
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+#ifndef virt_to_phys
+#define virt_to_phys virt_to_phys
+static inline unsigned long virt_to_phys(const volatile void *mem)
+{
+	return (unsigned long)mem;
+}
+#endif
+
+#ifndef phys_to_virt
+#define phys_to_virt phys_to_virt
+static inline void *phys_to_virt(unsigned long phys)
+{
+	return (void *)phys;
+}
+#endif
+
+#ifndef IOMEM
+#ifndef __ASSEMBLY__
+#define IOMEM(addr)	((void __force __iomem *)(addr))
+#else
+#define IOMEM(addr)	addr
+#endif
+#endif
+
+#define __io_virt(x) ((void __force *)(x))
+
+#ifndef memset_io
+#define memset_io memset_io
+/**
+ * memset_io	Set a range of I/O memory to a constant value
+ * @addr:	The beginning of the I/O-memory range to set
+ * @val:	The value to set the memory to
+ * @count:	The number of bytes to set
+ *
+ * Set a range of I/O memory to a given value.
+ */
+static inline void memset_io(volatile void __iomem *addr, int value,
+			     size_t size)
+{
+	memset(__io_virt(addr), value, size);
+}
+#endif
+
+#ifndef memcpy_fromio
+#define memcpy_fromio memcpy_fromio
+/**
+ * memcpy_fromio	Copy a block of data from I/O memory
+ * @dst:		The (RAM) destination for the copy
+ * @src:		The (I/O memory) source for the data
+ * @count:		The number of bytes to copy
+ *
+ * Copy a block of data from I/O memory.
+ */
+static inline void memcpy_fromio(void *buffer,
+				 const volatile void __iomem *addr,
+				 size_t size)
+{
+	memcpy(buffer, __io_virt(addr), size);
+}
+#endif
+
+#ifndef memcpy_toio
+#define memcpy_toio memcpy_toio
+/**
+ * memcpy_toio		Copy a block of data into I/O memory
+ * @dst:		The (I/O memory) destination for the copy
+ * @src:		The (RAM) source for the data
+ * @count:		The number of bytes to copy
+ *
+ * Copy a block of data to I/O memory.
+ */
+static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
+			       size_t size)
+{
+	memcpy(__io_virt(addr), buffer, size);
+}
+#endif
+
+#include <asm-generic/bitio.h>
+
+#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err)
+
+#ifndef readq_relaxed
+#define readq_relaxed(addr) readq(addr)
+#endif
+
+#ifndef readl_relaxed
+#define readl_relaxed(addr) readl(addr)
+#endif
+
+#ifndef readw_relaxed
+#define readw_relaxed(addr) readw(addr)
+#endif
+
+#ifndef readb_relaxed
+#define readb_relaxed(addr) readb(addr)
+#endif
+
+#ifndef writeq_relaxed
+#define writeq_relaxed(val, addr) writeq((val), (addr))
+#endif
+
+#ifndef writel_relaxed
+#define writel_relaxed(val, addr) writel((val), (addr))
+#endif
+
+#ifndef writew_relaxed
+#define writew_relaxed(val, addr) writew((val), (addr))
+#endif
+
+#ifndef writeb_relaxed
+#define writeb_relaxed(val, addr) writeb((val), (addr))
+#endif
+
 #endif /* __ASM_GENERIC_IO_H */
diff --git a/include/asm-generic/ioctl.h b/include/asm-generic/ioctl.h
index 8641813855..eba15da2cc 100644
--- a/include/asm-generic/ioctl.h
+++ b/include/asm-generic/ioctl.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_IOCTL_H
 #define _ASM_GENERIC_IOCTL_H
 
diff --git a/include/asm-generic/memory_layout.h b/include/asm-generic/memory_layout.h
index 3f69664aa0..6af1db8113 100644
--- a/include/asm-generic/memory_layout.h
+++ b/include/asm-generic/memory_layout.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef __ASM_GENERIC_MEMORY_LAYOUT_H
 #define __ASM_GENERIC_MEMORY_LAYOUT_H
 
@@ -11,14 +13,27 @@
 #define MALLOC_BASE CONFIG_MALLOC_BASE
 #endif
 
-#ifdef CONFIG_BOOTM_OPTEE_SIZE
-#define OPTEE_SIZE CONFIG_BOOTM_OPTEE_SIZE
+#ifdef CONFIG_OPTEE_SIZE
+#define OPTEE_SIZE CONFIG_OPTEE_SIZE
 #else
 #define OPTEE_SIZE 0
 #endif
 
+#ifdef CONFIG_OPTEE_SHM_SIZE
+#define OPTEE_SHM_SIZE CONFIG_OPTEE_SHM_SIZE
+#else
+#define OPTEE_SHM_SIZE 0
+#endif
+
 #define HEAD_TEXT_BASE MALLOC_BASE
 #define MALLOC_SIZE CONFIG_MALLOC_SIZE
 #define STACK_SIZE  CONFIG_STACK_SIZE
 
+/*
+ * This generates a useless load from the specified symbol
+ * to ensure linker garbage collection doesn't delete it
+ */
+#define __keep_symbolref(sym)	\
+	__asm__ __volatile__("": :"r"(&sym) :)
+
 #endif /* __ASM_GENERIC_MEMORY_LAYOUT_H */
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
new file mode 100644
index 0000000000..98e1541b72
--- /dev/null
+++ b/include/asm-generic/module.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_MODULE_H
+#define __ASM_GENERIC_MODULE_H
+
+/*
+ * Many architectures just need a simple module
+ * loader without arch specific data.
+ */
+#ifndef CONFIG_HAVE_MOD_ARCH_SPECIFIC
+struct mod_arch_specific
+{
+};
+#endif
+
+#ifdef CONFIG_64BIT
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Sym		Elf64_Sym
+#define Elf_Dyn		Elf64_Dyn
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Addr	Elf64_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf64_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf64_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF64_R_SYM(X)
+
+#else /* CONFIG_64BIT */
+
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Dyn		Elf32_Dyn
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Addr	Elf32_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf32_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf32_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF32_R_SYM(X)
+#endif
+
+#endif /* __ASM_GENERIC_MODULE_H */
diff --git a/include/asm-generic/pointer.h b/include/asm-generic/pointer.h
new file mode 100644
index 0000000000..89817ce59e
--- /dev/null
+++ b/include/asm-generic/pointer.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_GENERIC_PTR_H___
+#define __ASM_GENERIC_PTR_H___
+
+#if __SIZEOF_POINTER__ == 8
+#ifdef __ASSEMBLY__
+#define ASM_PTR		.quad
+#define ASM_SZPTR	8
+#define ASM_LGPTR	3
+#define ASM_LD_PTR(x)	QUAD(x)
+#else
+#define ASM_PTR		".quad"
+#define ASM_SZPTR	"8"
+#define ASM_LGPTR	"3"
+#endif
+#elif __SIZEOF_POINTER__ == 4
+#ifdef __ASSEMBLY__
+#define ASM_PTR		.word
+#define ASM_SZPTR	4
+#define ASM_LGPTR	2
+#define ASM_LD_PTR(x)	LONG(x)
+#else
+#define ASM_PTR		".word"
+#define ASM_SZPTR	"4"
+#define ASM_LGPTR	"2"
+#endif
+#else
+#error "Unexpected __SIZEOF_POINTER__"
+#endif
+
+#endif /* __ASM_GENERIC_PTR_H___ */
diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h
index 136f161e15..35123c776a 100644
--- a/include/asm-generic/posix_types.h
+++ b/include/asm-generic/posix_types.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef __ASM_GENERIC_POSIX_TYPES_H
 #define __ASM_GENERIC_POSIX_TYPES_H
 
diff --git a/include/asm-generic/reloc.h b/include/asm-generic/reloc.h
new file mode 100644
index 0000000000..06fccbd6f3
--- /dev/null
+++ b/include/asm-generic/reloc.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _ASM_GENERIC_RELOC_H_
+#define _ASM_GENERIC_RELOC_H_
+
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+
+#ifndef global_variable_offset
+#define global_variable_offset() get_runtime_offset()
+#endif
+
+/*
+ * Using sizeof() on incomplete types always fails, so we use GCC's
+ * __builtin_object_size() instead. This is the mechanism underlying
+ * FORTIFY_SOURCE. &symbol should always be something GCC can compute
+ * a size for, even without annotations, unless it's incomplete.
+ * The second argument ensures we get 0 for failure.
+ */
+#define __has_type_complete(sym) __builtin_object_size(&(sym), 2)
+
+#define __has_type_byte_array(sym) (sizeof(*sym) == 1 + __must_be_array(sym))
+
+/*
+ * runtime_address() defined below is supposed to be used exclusively
+ * with linker defined symbols, e.g. unsigned char input_end[].
+ *
+ * We can't completely ensure that, but this gets us close enough
+ * to avoid most abuse of runtime_address().
+ */
+#define __is_incomplete_byte_array(sym) \
+	(!__has_type_complete(sym) && __has_type_byte_array(sym))
+
+/*
+ * While accessing global variables before C environment is setup is
+ * questionable, we can't avoid it when we decide to write our
+ * relocation routines in C. This invites a tricky problem with
+ * this naive code:
+ *
+ *   var = &variable + global_variable_offset(); relocate_to_current_adr();
+ *
+ * Compiler is within rights to rematerialize &variable after
+ * relocate_to_current_adr(), which is unfortunate because we
+ * then end up adding a relocated &variable with the relocation
+ * offset once more. We avoid this here by hiding address with
+ * RELOC_HIDE. This is required as a simple compiler barrier()
+ * with "memory" clobber is not immune to compiler proving that
+ * &sym fits in a register and as such is unaffected by the memory
+ * clobber. barrier_data(&sym) would work too, but that comes with
+ * aforementioned compiler "memory" barrier, that we don't care for.
+ *
+ * We don't necessarily need the volatile variable assignment when
+ * using the compiler-gcc.h RELOC_HIDE implementation as __asm__
+ * __volatile__ takes care of it, but the generic RELOC_HIDE
+ * implementation has GCC misscompile runtime_address() when not passing
+ * in a volatile object. Volatile casts instead of variable assignments
+ * also led to miscompilations with GCC v11.1.1 for THUMB2.
+ */
+
+#define runtime_address(sym) ({					\
+	void *volatile __addrof_sym = (sym);			\
+	if (!__is_incomplete_byte_array(sym))			\
+		__unsafe_runtime_address();			\
+	RELOC_HIDE(__addrof_sym, global_variable_offset());	\
+})
+
+/*
+ * Above will fail for "near" objects, e.g. data in the same
+ * translation unit or with LTO, as the compiler can be smart
+ * enough to omit relocation entry and just generate PC relative
+ * accesses leading to base address being added twice. We try to
+ * catch most of these here by triggering an error when runtime_address()
+ * is used with anything that is not a byte array of unknown size.
+ */
+extern void *__compiletime_error(
+	"runtime_address() may only be called on linker defined symbols."
+) __unsafe_runtime_address(void);
+
+#endif
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index f584cad48d..0b2ed5615b 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_SECTIONS_H_
 #define _ASM_GENERIC_SECTIONS_H_
 
@@ -13,6 +15,9 @@ extern void *_barebox_image_size;
 extern void *_barebox_bare_init_size;
 extern void *_barebox_pbl_size;
 
+/* Start and end of .ctors section - used for constructor calls. */
+extern char __ctors_start[], __ctors_end[];
+
 #define barebox_image_size	(__image_end - __image_start)
 #define barebox_bare_init_size	(unsigned int)&_barebox_bare_init_size
 #define barebox_pbl_size	(__piggydata_start - __image_start)
diff --git a/include/asm-generic/swab.h b/include/asm-generic/swab.h
index 3ab5add54f..f652dfe93f 100644
--- a/include/asm-generic/swab.h
+++ b/include/asm-generic/swab.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
 #ifndef _ASM_GENERIC_SWAB_H
 #define _ASM_GENERIC_SWAB_H
 
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
new file mode 100644
index 0000000000..73f1a895fd
--- /dev/null
+++ b/include/asm-generic/uaccess.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_UACCESS_H
+#define __ASM_GENERIC_UACCESS_H
+
+/*
+ * User space memory access functions, these should work
+ * on any machine that has kernel and user data in the same
+ * address space, e.g. all NOMMU machines.
+ */
+#include <linux/barebox-wrapper.h>
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <asm/unaligned.h>
+
+static inline void might_fault(void) { }
+static inline int access_ok(const void __user *ptr, unsigned long size) { return 1; }
+
+static __always_inline int
+__get_user_fn(size_t size, const void __user *from, void *to)
+{
+	BUILD_BUG_ON(!__builtin_constant_p(size));
+
+	switch (size) {
+	case 1:
+		*(u8 *)to = *((u8 __force *)from);
+		return 0;
+	case 2:
+		*(u16 *)to = get_unaligned((u16 __force *)from);
+		return 0;
+	case 4:
+		*(u32 *)to = get_unaligned((u32 __force *)from);
+		return 0;
+	case 8:
+		*(u64 *)to = get_unaligned((u64 __force *)from);
+		return 0;
+	default:
+		BUILD_BUG();
+		return 0;
+	}
+
+}
+#define __get_user_fn(sz, u, k)	__get_user_fn(sz, u, k)
+
+static __always_inline int
+__put_user_fn(size_t size, void __user *to, void *from)
+{
+	BUILD_BUG_ON(!__builtin_constant_p(size));
+
+	switch (size) {
+	case 1:
+		*(u8 __force *)to = *(u8 *)from;
+		return 0;
+	case 2:
+		put_unaligned(*(u16 *)from, (u16 __force *)to);
+		return 0;
+	case 4:
+		put_unaligned(*(u32 *)from, (u32 __force *)to);
+		return 0;
+	case 8:
+		put_unaligned(*(u64 *)from, (u64 __force *)to);
+		return 0;
+	default:
+		BUILD_BUG();
+		return 0;
+	}
+}
+#define __put_user_fn(sz, u, k)	__put_user_fn(sz, u, k)
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	*((type *)dst) = get_unaligned((type *)(src));			\
+	if (0) /* make sure the label looks used to the compiler */	\
+		goto err_label;						\
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	put_unaligned(*((type *)src), (type *)(dst));			\
+	if (0) /* make sure the label looks used to the compiler */	\
+		goto err_label;						\
+} while (0)
+
+static inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user * from, unsigned long n)
+{
+	memcpy(to, (const void __force *)from, n);
+	return 0;
+}
+
+static inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	memcpy((void __force *)to, from, n);
+	return 0;
+}
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ * This version just falls back to copy_{from,to}_user, which should
+ * provide a fast-path for small values.
+ */
+#define __put_user(x, ptr) \
+({								\
+	__typeof__(*(ptr)) __x = (x);				\
+	int __pu_err = -EFAULT;					\
+        __chk_user_ptr(ptr);                                    \
+	switch (sizeof (*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+	case 8:							\
+		__pu_err = __put_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		break;						\
+	default:						\
+		__put_user_bad();				\
+		break;						\
+	 }							\
+	__pu_err;						\
+})
+
+#define put_user(x, ptr)					\
+({								\
+	void __user *__p = (ptr);				\
+	might_fault();						\
+	access_ok(__p, sizeof(*ptr)) ?		\
+		__put_user((x), ((__typeof__(*(ptr)) __user *)__p)) :	\
+		-EFAULT;					\
+})
+
+extern int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr)					\
+({								\
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
+	switch (sizeof(*(ptr))) {				\
+	case 1: {						\
+		unsigned char __x = 0;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 2: {						\
+		unsigned short __x = 0;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 4: {						\
+		unsigned int __x = 0;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 8: {						\
+		unsigned long long __x = 0;			\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	default:						\
+		__get_user_bad();				\
+		break;						\
+	}							\
+	__gu_err;						\
+})
+
+#define get_user(x, ptr)					\
+({								\
+	const void __user *__p = (ptr);				\
+	might_fault();						\
+	access_ok(__p, sizeof(*ptr)) ?		\
+		__get_user((x), (__typeof__(*(ptr)) __user *)__p) :\
+		((x) = (__typeof__(*(ptr)))0,-EFAULT);		\
+})
+
+extern int __get_user_bad(void) __attribute__((noreturn));
+
+/*
+ * Zero Userspace
+ */
+static inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long n)
+{
+	memset((void __force *)to, 0, n);
+	return 0;
+}
+
+static inline __must_check unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+	might_fault();
+	if (!access_ok(to, n))
+		return n;
+
+	return __clear_user(to, n);
+}
+
+#endif /* __ASM_GENERIC_UACCESS_H */
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
new file mode 100644
index 0000000000..95a1d21410
--- /dev/null
+++ b/include/asm-generic/word-at-a-time.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <linux/kernel.h>
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+
+struct word_at_a_time {
+	const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->low_bits) + c->low_bits;
+	return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+	long byte = 0;
+#ifdef CONFIG_64BIT
+	if (mask >> 32)
+		mask >>= 32;
+	else
+		byte = 4;
+#endif
+	if (mask >> 16)
+		mask >>= 16;
+	else
+		byte += 2;
+	return (mask >> 8) ? byte : byte + 1;
+}
+
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long rhs = val | c->low_bits;
+	*data = rhs;
+	return (val + c->high_bits) & ~rhs;
+}
+
+#ifndef zero_bytemask
+#define zero_bytemask(mask) (~1ul << __fls(mask))
+#endif
+
+#else
+
+/*
+ * The optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+	return mask*0x0001020304050608ul >> 56;
+}
+
+#else	/* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+#endif
+
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+
+#endif /* __BIG_ENDIAN */
+
+#endif /* _ASM_WORD_AT_A_TIME_H */