diff options
-rw-r--r-- | arch/arm/Makefile | 24 | ||||
-rw-r--r-- | arch/arm/lib/Makefile | 27 | ||||
-rw-r--r-- | arch/arm/lib32/.gitignore (renamed from arch/arm/lib/.gitignore) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/Makefile | 27 | ||||
-rw-r--r-- | arch/arm/lib32/armlinux.c (renamed from arch/arm/lib/armlinux.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/ashldi3.S (renamed from arch/arm/lib/ashldi3.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/ashrdi3.S (renamed from arch/arm/lib/ashrdi3.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/barebox.lds.S (renamed from arch/arm/lib/barebox.lds.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/bootz.c (renamed from arch/arm/lib/bootz.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/copy_template.S (renamed from arch/arm/lib/copy_template.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/div0.c (renamed from arch/arm/lib/div0.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/findbit.S (renamed from arch/arm/lib/findbit.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-readsb.S (renamed from arch/arm/lib/io-readsb.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-readsl.S (renamed from arch/arm/lib/io-readsl.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-readsw-armv4.S (renamed from arch/arm/lib/io-readsw-armv4.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-writesb.S (renamed from arch/arm/lib/io-writesb.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-writesl.S (renamed from arch/arm/lib/io-writesl.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io-writesw-armv4.S (renamed from arch/arm/lib/io-writesw-armv4.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/io.c (renamed from arch/arm/lib/io.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/lib1funcs.S (renamed from arch/arm/lib/lib1funcs.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/lshrdi3.S (renamed from arch/arm/lib/lshrdi3.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/memcpy.S (renamed from arch/arm/lib/memcpy.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/memset.S (renamed from arch/arm/lib/memset.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/module.c (renamed from arch/arm/lib/module.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/runtime-offset.S (renamed from arch/arm/lib/runtime-offset.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/semihosting-trap.S (renamed from arch/arm/lib/semihosting-trap.S) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/semihosting.c (renamed from arch/arm/lib/semihosting.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib32/unwind.c (renamed from arch/arm/lib/unwind.c) | 0 | ||||
-rw-r--r-- | arch/arm/lib64/Makefile | 9 | ||||
-rw-r--r-- | arch/arm/lib64/armlinux.c | 50 | ||||
-rw-r--r-- | arch/arm/lib64/barebox.lds.S | 125 | ||||
-rw-r--r-- | arch/arm/lib64/copy_template.S | 192 | ||||
-rw-r--r-- | arch/arm/lib64/div0.c | 27 | ||||
-rw-r--r-- | arch/arm/lib64/memcpy.S | 74 | ||||
-rw-r--r-- | arch/arm/lib64/memset.S | 215 |
35 files changed, 739 insertions, 31 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 2cf5febeb4..2b056afd67 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -291,13 +291,29 @@ MACH := endif common-y += $(BOARD) arch/arm/boards/ $(MACH) -common-y += arch/arm/lib/ arch/arm/cpu/ -common-y += arch/arm/crypto/ +common-y += arch/arm/cpu/ +common-y += arch/arm/lib/ + +ifeq ($(CONFIG_CPU_V8), y) +common-y += arch/arm/lib64/ +else +common-y += arch/arm/lib32/ arch/arm/crypto/ +endif common-$(CONFIG_OFTREE) += arch/arm/dts/ -lds-y := arch/arm/lib/barebox.lds +ifeq ($(CONFIG_CPU_V8), y) +lds-y := arch/arm/lib64/barebox.lds +else +lds-y := arch/arm/lib32/barebox.lds +endif common- += $(patsubst %,arch/arm/boards/%/,$(board-)) -CLEAN_FILES += include/generated/mach-types.h arch/arm/lib/barebox.lds barebox-flash-image +CLEAN_FILES += include/generated/mach-types.h barebox-flash-image + +ifeq ($(CONFIG_CPU_V8), y) +CLEAN_FILES += arch/arm/lib64/barebox.lds +else +CLEAN_FILES += arch/arm/lib32/barebox.lds +endif diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index e1c6f5bfd3..33db7350f8 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -1,29 +1,2 @@ -obj-$(CONFIG_ARM_LINUX) += armlinux.o obj-$(CONFIG_BOOTM) += bootm.o -obj-$(CONFIG_CMD_BOOTZ) += bootz.o obj-$(CONFIG_CMD_BOOTU) += bootu.o -obj-y += div0.o -obj-y += findbit.o -obj-y += io.o -obj-y += io-readsb.o -obj-y += io-readsw-armv4.o -obj-y += io-readsl.o -obj-y += io-writesb.o -obj-y += io-writesw-armv4.o -obj-y += io-writesl.o -obj-y += lib1funcs.o -obj-y += ashrdi3.o -obj-y += ashldi3.o -obj-y += lshrdi3.o -obj-y += runtime-offset.o -pbl-y += runtime-offset.o -obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memcpy.o -obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o -obj-$(CONFIG_ARM_UNWIND) += unwind.o -obj-$(CONFIG_ARM_SEMIHOSTING) += semihosting-trap.o semihosting.o -obj-$(CONFIG_MODULES) += module.o -extra-y += barebox.lds - -pbl-y += lib1funcs.o -pbl-y += ashldi3.o -pbl-y += div0.o diff --git a/arch/arm/lib/.gitignore b/arch/arm/lib32/.gitignore index d1165788c9..d1165788c9 100644 --- a/arch/arm/lib/.gitignore +++ b/arch/arm/lib32/.gitignore diff --git a/arch/arm/lib32/Makefile b/arch/arm/lib32/Makefile new file mode 100644 index 0000000000..cdd07322cf --- /dev/null +++ b/arch/arm/lib32/Makefile @@ -0,0 +1,27 @@ +obj-$(CONFIG_ARM_LINUX) += armlinux.o +obj-$(CONFIG_CMD_BOOTZ) += bootz.o +obj-y += div0.o +obj-y += findbit.o +obj-y += io.o +obj-y += io-readsb.o +obj-y += io-readsw-armv4.o +obj-y += io-readsl.o +obj-y += io-writesb.o +obj-y += io-writesw-armv4.o +obj-y += io-writesl.o +obj-y += lib1funcs.o +obj-y += ashrdi3.o +obj-y += ashldi3.o +obj-y += lshrdi3.o +obj-y += runtime-offset.o +pbl-y += runtime-offset.o +obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memcpy.o +obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o +obj-$(CONFIG_ARM_UNWIND) += unwind.o +obj-$(CONFIG_ARM_SEMIHOSTING) += semihosting-trap.o semihosting.o +obj-$(CONFIG_MODULES) += module.o +extra-y += barebox.lds + +pbl-y += lib1funcs.o +pbl-y += ashldi3.o +pbl-y += div0.o diff --git a/arch/arm/lib/armlinux.c b/arch/arm/lib32/armlinux.c index 47b9bd33ed..47b9bd33ed 100644 --- a/arch/arm/lib/armlinux.c +++ b/arch/arm/lib32/armlinux.c diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib32/ashldi3.S index b62e06f602..b62e06f602 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib32/ashldi3.S diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib32/ashrdi3.S index db849b65fc..db849b65fc 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib32/ashrdi3.S diff --git a/arch/arm/lib/barebox.lds.S b/arch/arm/lib32/barebox.lds.S index 6dc8bd2f3c..6dc8bd2f3c 100644 --- a/arch/arm/lib/barebox.lds.S +++ b/arch/arm/lib32/barebox.lds.S diff --git a/arch/arm/lib/bootz.c b/arch/arm/lib32/bootz.c index 5167c9d20d..5167c9d20d 100644 --- a/arch/arm/lib/bootz.c +++ b/arch/arm/lib32/bootz.c diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib32/copy_template.S index d8eb06328a..d8eb06328a 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib32/copy_template.S diff --git a/arch/arm/lib/div0.c b/arch/arm/lib32/div0.c index 852cb72331..852cb72331 100644 --- a/arch/arm/lib/div0.c +++ b/arch/arm/lib32/div0.c diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib32/findbit.S index ef4caff1ad..ef4caff1ad 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib32/findbit.S diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib32/io-readsb.S index 963c455175..963c455175 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib32/io-readsb.S diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib32/io-readsl.S index 47a29744e9..47a29744e9 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib32/io-readsl.S diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib32/io-readsw-armv4.S index f5b34a3479..f5b34a3479 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib32/io-readsw-armv4.S diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib32/io-writesb.S index 1ab8e47cb4..1ab8e47cb4 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib32/io-writesb.S diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib32/io-writesl.S index 8a3bcd6456..8a3bcd6456 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib32/io-writesl.S diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib32/io-writesw-armv4.S index 9e8308dd77..9e8308dd77 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib32/io-writesw-armv4.S diff --git a/arch/arm/lib/io.c b/arch/arm/lib32/io.c index abfd887aac..abfd887aac 100644 --- a/arch/arm/lib/io.c +++ b/arch/arm/lib32/io.c diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib32/lib1funcs.S index bf1d0192d6..bf1d0192d6 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib32/lib1funcs.S diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib32/lshrdi3.S index e77e96c7bc..e77e96c7bc 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib32/lshrdi3.S diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib32/memcpy.S index 5123691ca9..5123691ca9 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib32/memcpy.S diff --git a/arch/arm/lib/memset.S b/arch/arm/lib32/memset.S index c4d2672038..c4d2672038 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib32/memset.S diff --git a/arch/arm/lib/module.c b/arch/arm/lib32/module.c index be7965d59c..be7965d59c 100644 --- a/arch/arm/lib/module.c +++ b/arch/arm/lib32/module.c diff --git a/arch/arm/lib/runtime-offset.S b/arch/arm/lib32/runtime-offset.S index f10c4c8469..f10c4c8469 100644 --- a/arch/arm/lib/runtime-offset.S +++ b/arch/arm/lib32/runtime-offset.S diff --git a/arch/arm/lib/semihosting-trap.S b/arch/arm/lib32/semihosting-trap.S index 9e40ebfe21..9e40ebfe21 100644 --- a/arch/arm/lib/semihosting-trap.S +++ b/arch/arm/lib32/semihosting-trap.S diff --git a/arch/arm/lib/semihosting.c b/arch/arm/lib32/semihosting.c index a7351961dc..a7351961dc 100644 --- a/arch/arm/lib/semihosting.c +++ b/arch/arm/lib32/semihosting.c diff --git a/arch/arm/lib/unwind.c b/arch/arm/lib32/unwind.c index c3dca5b61d..c3dca5b61d 100644 --- a/arch/arm/lib/unwind.c +++ b/arch/arm/lib32/unwind.c diff --git a/arch/arm/lib64/Makefile b/arch/arm/lib64/Makefile new file mode 100644 index 0000000000..87e26f6afa --- /dev/null +++ b/arch/arm/lib64/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_ARM_LINUX) += armlinux.o +obj-y += div0.o +obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memcpy.o +obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o +extra-y += barebox.lds + +pbl-y += lib1funcs.o +pbl-y += ashldi3.o +pbl-y += div0.o diff --git a/arch/arm/lib64/armlinux.c b/arch/arm/lib64/armlinux.c new file mode 100644 index 0000000000..020e6d70ff --- /dev/null +++ b/arch/arm/lib64/armlinux.c @@ -0,0 +1,50 @@ +/* + * (C) Copyright 2002 + * Sysgo Real-Time Solutions, GmbH <www.elinos.com> + * Marius Groeger <mgroeger@sysgo.de> + * + * Copyright (C) 2001 Erik Mouw (J.A.K.Mouw@its.tudelft.nl) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <boot.h> +#include <common.h> +#include <command.h> +#include <driver.h> +#include <environment.h> +#include <image.h> +#include <init.h> +#include <fs.h> +#include <linux/list.h> +#include <xfuncs.h> +#include <malloc.h> +#include <fcntl.h> +#include <errno.h> +#include <memory.h> +#include <of.h> +#include <magicvar.h> + +#include <asm/byteorder.h> +#include <asm/setup.h> +#include <asm/barebox-arm.h> +#include <asm/armlinux.h> +#include <asm/system.h> + +void start_linux(void *adr, int swap, unsigned long initrd_address, + unsigned long initrd_size, void *oftree) +{ + void (*kernel)(void *dtb) = adr; + + shutdown_barebox(); + + kernel(oftree); +} diff --git a/arch/arm/lib64/barebox.lds.S b/arch/arm/lib64/barebox.lds.S new file mode 100644 index 0000000000..240699f1a6 --- /dev/null +++ b/arch/arm/lib64/barebox.lds.S @@ -0,0 +1,125 @@ +/* + * (C) Copyright 2000-2004 + * Wolfgang Denk, DENX Software Engineering, wd@denx.de. + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + */ + +#include <asm-generic/barebox.lds.h> + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) +ENTRY(start) +SECTIONS +{ +#ifdef CONFIG_RELOCATABLE + . = 0x0; +#else + . = TEXT_BASE; +#endif + +#ifndef CONFIG_PBL_IMAGE + PRE_IMAGE +#endif + . = ALIGN(4); + .text : + { + _stext = .; + _text = .; + *(.text_entry*) + __bare_init_start = .; + *(.text_bare_init*) + __bare_init_end = .; + __exceptions_start = .; + KEEP(*(.text_exceptions*)) + __exceptions_stop = .; + *(.text*) + } + BAREBOX_BARE_INIT_SIZE + + . = ALIGN(4); + .rodata : { *(.rodata*) } + +#ifdef CONFIG_ARM_UNWIND + /* + * Stack unwinding tables + */ + . = ALIGN(8); + .ARM.unwind_idx : { + __start_unwind_idx = .; + *(.ARM.exidx*) + __stop_unwind_idx = .; + } + .ARM.unwind_tab : { + __start_unwind_tab = .; + *(.ARM.extab*) + __stop_unwind_tab = .; + } +#endif + _etext = .; /* End of text and rodata section */ + _sdata = .; + + . = ALIGN(4); + .data : { *(.data*) } + + .barebox_imd : { BAREBOX_IMD } + + . = .; + __barebox_cmd_start = .; + .barebox_cmd : { BAREBOX_CMDS } + __barebox_cmd_end = .; + + __barebox_magicvar_start = .; + .barebox_magicvar : { BAREBOX_MAGICVARS } + __barebox_magicvar_end = .; + + __barebox_initcalls_start = .; + .barebox_initcalls : { INITCALLS } + __barebox_initcalls_end = .; + + __barebox_exitcalls_start = .; + .barebox_exitcalls : { EXITCALLS } + __barebox_exitcalls_end = .; + + __usymtab_start = .; + __usymtab : { BAREBOX_SYMS } + __usymtab_end = .; + + .oftables : { BAREBOX_CLK_TABLE() } + + .dtb : { BAREBOX_DTB() } + + .rel.dyn : { + __rel_dyn_start = .; + *(.rel*) + __rel_dyn_end = .; + } + + .dynsym : { + __dynsym_start = .; + *(.dynsym) + __dynsym_end = .; + } + + _edata = .; + + . = ALIGN(4); + __bss_start = .; + .bss : { *(.bss*) } + __bss_stop = .; + _end = .; + _barebox_image_size = __bss_start - TEXT_BASE; +} diff --git a/arch/arm/lib64/copy_template.S b/arch/arm/lib64/copy_template.S new file mode 100644 index 0000000000..cc9a84260d --- /dev/null +++ b/arch/arm/lib64/copy_template.S @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm/lib64/div0.c b/arch/arm/lib64/div0.c new file mode 100644 index 0000000000..852cb72331 --- /dev/null +++ b/arch/arm/lib64/div0.c @@ -0,0 +1,27 @@ +/* + * (C) Copyright 2002 + * Wolfgang Denk, DENX Software Engineering, wd@denx.de. + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <common.h> + +extern void __div0(void); + +/* Replacement (=dummy) for GNU/Linux division-by zero handler */ +void __div0 (void) +{ + panic("division by zero\n"); +} diff --git a/arch/arm/lib64/memcpy.S b/arch/arm/lib64/memcpy.S new file mode 100644 index 0000000000..cfed3191c5 --- /dev/null +++ b/arch/arm/lib64/memcpy.S @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm + + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm + + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm + + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm + + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm + + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm + + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm + + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm + + .weak memcpy +ENTRY(memcpy) +#include "copy_template.S" + ret +ENDPROC(memcpy) diff --git a/arch/arm/lib64/memset.S b/arch/arm/lib64/memset.S new file mode 100644 index 0000000000..380a54097e --- /dev/null +++ b/arch/arm/lib64/memset.S @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Fill in the buffer with character c (alignment handled by the hardware) + * + * Parameters: + * x0 - buf + * x1 - c + * x2 - n + * Returns: + * x0 - buf + */ + +dstin .req x0 +val .req w1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +zva_len_x .req x5 +zva_len .req w5 +zva_bits_x .req x6 + +A_l .req x7 +A_lw .req w7 +dst .req x8 +tmp3w .req w9 +tmp3 .req x9 + + .weak memset +ENTRY(memset) + mov dst, dstin /* Preserve return value. */ + and A_lw, val, #255 + orr A_lw, A_lw, A_lw, lsl #8 + orr A_lw, A_lw, A_lw, lsl #16 + orr A_l, A_l, A_l, lsl #32 + + cmp count, #15 + b.hi .Lover16_proc + /*All store maybe are non-aligned..*/ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 2f + str A_lw, [dst], #4 +2: + tbz count, #1, 3f + strh A_lw, [dst], #2 +3: + tbz count, #0, 4f + strb A_lw, [dst] +4: + ret + +.Lover16_proc: + /*Whether the start address is aligned with 16.*/ + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq .Laligned +/* +* The count is not less than 16, we can use stp to store the start 16 bytes, +* then adjust the dst aligned with 16.This process will make the current +* memory address at alignment boundary. +*/ + stp A_l, A_l, [dst] /*non-aligned store..*/ + /*make the dst aligned..*/ + sub count, count, tmp2 + add dst, dst, tmp2 + +.Laligned: + cbz A_l, .Lzero_mem + +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail63: + ands tmp1, count, #0x30 + b.eq 3f + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst], #16 +1: + stp A_l, A_l, [dst], #16 +2: + stp A_l, A_l, [dst], #16 +/* +* The last store length is less than 16,use stp to write last 16 bytes. +* It will lead some bytes written twice and the access is non-aligned. +*/ +3: + ands count, count, #15 + cbz count, 4f + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ +4: + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. + */ +.Lnot_short: + sub dst, dst, #16/* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 +.Lexitfunc: + ret + + /* + * For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. + */ +.Lzero_mem: + cmp count, #63 + b.le .Ltail63 + /* + * For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. + */ + cmp count, #128 + b.lt .Lnot_short /*count is at least 128 bytes*/ + + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + + ands tmp3w, zva_len, #63 + /* + * ensure the zva_len is not less than 64. + * It is not meaningful to use ZVA if the block size is less than 64. + */ + b.ne .Lnot_short +.Lzero_by_line: + /* + * Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. + */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 2f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment.*/ + sub tmp1, count, tmp2 + /* + * grantee the remain length to be ZVA is bigger than 64, + * avoid to make the 2f's process over mem range.*/ + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* + * We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. + */ + mov count, tmp1 +1: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 1b + /* We've overrun a bit, so adjust dst downwards.*/ + add dst, dst, tmp2 +2: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret +ENDPROC(memset) |