From 6e9f1b4ef8a9113526b93e493c5393dd9ec87e10 Mon Sep 17 00:00:00 2001 From: Michael Olbrich Date: Mon, 10 Dec 2012 15:02:12 +0100 Subject: glibc-2.16: cleanup patches Signed-off-by: Michael Olbrich --- .../0003-i386-x86_64-revert-clone-cfi.patch | 58 - .../glibc-2.16.0/0004-queue-header-updates.patch | 92 -- .../0005-localedef-fix-trampoline.patch | 67 - patches/glibc-2.16.0/0006-resolv-dynamic.patch | 47 - patches/glibc-2.16.0/0007-section-comments.patch | 34 - patches/glibc-2.16.0/0008-fpscr-values.patch | 42 - ...zed-string-functions-for-NEON-from-Linaro.patch | 1292 -------------------- ...dden_builtin_def-for-all-cortex-functions.patch | 76 -- ...ef-segfault-when-run-under-exec-shield-Pa.patch | 67 + patches/glibc-2.16.0/0200-resolv-dynamic.patch | 47 + ...zed-string-functions-for-NEON-from-Linaro.patch | 1292 ++++++++++++++++++++ ...dden_builtin_def-for-all-cortex-functions.patch | 76 ++ patches/glibc-2.16.0/series | 18 +- 13 files changed, 1491 insertions(+), 1717 deletions(-) delete mode 100644 patches/glibc-2.16.0/0003-i386-x86_64-revert-clone-cfi.patch delete mode 100644 patches/glibc-2.16.0/0004-queue-header-updates.patch delete mode 100644 patches/glibc-2.16.0/0005-localedef-fix-trampoline.patch delete mode 100644 patches/glibc-2.16.0/0006-resolv-dynamic.patch delete mode 100644 patches/glibc-2.16.0/0007-section-comments.patch delete mode 100644 patches/glibc-2.16.0/0008-fpscr-values.patch delete mode 100644 patches/glibc-2.16.0/0009-optimized-string-functions-for-NEON-from-Linaro.patch delete mode 100644 patches/glibc-2.16.0/0010-add-libc_hidden_builtin_def-for-all-cortex-functions.patch create mode 100644 patches/glibc-2.16.0/0100-Fix-localedef-segfault-when-run-under-exec-shield-Pa.patch create mode 100644 patches/glibc-2.16.0/0200-resolv-dynamic.patch create mode 100644 patches/glibc-2.16.0/0300-optimized-string-functions-for-NEON-from-Linaro.patch create mode 100644 patches/glibc-2.16.0/0301-add-libc_hidden_builtin_def-for-all-cortex-functions.patch diff --git a/patches/glibc-2.16.0/0003-i386-x86_64-revert-clone-cfi.patch b/patches/glibc-2.16.0/0003-i386-x86_64-revert-clone-cfi.patch deleted file mode 100644 index 0477106..0000000 --- a/patches/glibc-2.16.0/0003-i386-x86_64-revert-clone-cfi.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] i386/x86_64 revert clone cfi - -revert cfi additions to clone on i386/x86_64 to workaround problems in -gcc's unwinder code. this is not a bug in glibc, it triggers problems -elsewhere. this cfi code does not gain us a whole lot anyways. - -http://gcc.gnu.org/ml/gcc/2006-12/msg00293.html ---- - sysdeps/unix/sysv/linux/i386/clone.S | 4 ---- - sysdeps/unix/sysv/linux/x86_64/clone.S | 4 ---- - 2 files changed, 8 deletions(-) - -diff --git a/sysdeps/unix/sysv/linux/i386/clone.S b/sysdeps/unix/sysv/linux/i386/clone.S -index 0603218..82c6536 100644 ---- a/sysdeps/unix/sysv/linux/i386/clone.S -+++ b/sysdeps/unix/sysv/linux/i386/clone.S -@@ -119,9 +119,6 @@ L(pseudo_end): - ret - - L(thread_start): -- cfi_startproc; -- /* Clearing frame pointer is insufficient, use CFI. */ -- cfi_undefined (eip); - /* Note: %esi is zero. */ - movl %esi,%ebp /* terminate the stack frame */ - #ifdef RESET_PID -@@ -154,7 +151,6 @@ L(nomoregetpid): - jmp L(haspid) - .previous - #endif -- cfi_endproc; - - cfi_startproc - PSEUDO_END (BP_SYM (__clone)) -diff --git a/sysdeps/unix/sysv/linux/x86_64/clone.S b/sysdeps/unix/sysv/linux/x86_64/clone.S -index 6bcb542..656b389 100644 ---- a/sysdeps/unix/sysv/linux/x86_64/clone.S -+++ b/sysdeps/unix/sysv/linux/x86_64/clone.S -@@ -88,9 +88,6 @@ L(pseudo_end): - ret - - L(thread_start): -- cfi_startproc; -- /* Clearing frame pointer is insufficient, use CFI. */ -- cfi_undefined (rip); - /* Clear the frame pointer. The ABI suggests this be done, to mark - the outermost frame obviously. */ - xorl %ebp, %ebp -@@ -115,7 +112,6 @@ L(thread_start): - /* Call exit with return value from function call. */ - movq %rax, %rdi - call HIDDEN_JUMPTARGET (_exit) -- cfi_endproc; - - cfi_startproc; - PSEUDO_END (BP_SYM (__clone)) diff --git a/patches/glibc-2.16.0/0004-queue-header-updates.patch b/patches/glibc-2.16.0/0004-queue-header-updates.patch deleted file mode 100644 index a542ef3..0000000 --- a/patches/glibc-2.16.0/0004-queue-header-updates.patch +++ /dev/null @@ -1,92 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] queue header updates - -grab some updates from FreeBSD - -http://bugs.gentoo.org/201979 ---- - misc/sys/queue.h | 37 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 37 insertions(+) - -diff --git a/misc/sys/queue.h b/misc/sys/queue.h -index daf4553..f2678ba 100644 ---- a/misc/sys/queue.h -+++ b/misc/sys/queue.h -@@ -136,6 +136,11 @@ struct { \ - (var); \ - (var) = ((var)->field.le_next)) - -+#define LIST_FOREACH_SAFE(var, head, field, tvar) \ -+ for ((var) = LIST_FIRST((head)); \ -+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \ -+ (var) = (tvar)) -+ - /* - * List access methods. - */ -@@ -197,6 +202,16 @@ struct { \ - #define SLIST_FOREACH(var, head, field) \ - for((var) = (head)->slh_first; (var); (var) = (var)->field.sle_next) - -+#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ -+ for ((var) = SLIST_FIRST((head)); \ -+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ -+ (var) = (tvar)) -+ -+#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ -+ for ((varp) = &SLIST_FIRST((head)); \ -+ ((var) = *(varp)) != NULL; \ -+ (varp) = &SLIST_NEXT((var), field)) -+ - /* - * Singly-linked List access methods. - */ -@@ -242,6 +257,12 @@ struct { \ - (head)->stqh_last = &(elm)->field.stqe_next; \ - } while (/*CONSTCOND*/0) - -+#define STAILQ_LAST(head, type, field) \ -+ (STAILQ_EMPTY((head)) ? \ -+ NULL : \ -+ ((struct type *)(void *) \ -+ ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) -+ - #define STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\ - (head)->stqh_last = &(elm)->field.stqe_next; \ -@@ -286,6 +307,11 @@ struct { \ - #define STAILQ_FIRST(head) ((head)->stqh_first) - #define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) - -+#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ -+ for ((var) = STAILQ_FIRST((head)); \ -+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ -+ (var) = (tvar)) -+ - - /* - * Simple queue definitions. -@@ -437,11 +463,22 @@ struct { \ - (var); \ - (var) = ((var)->field.tqe_next)) - -+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ -+ for ((var) = TAILQ_FIRST((head)); \ -+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ -+ (var) = (tvar)) -+ - #define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ - for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \ - (var); \ - (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last))) - -+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ -+ for ((var) = TAILQ_LAST((head), headname); \ -+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ -+ (var) = (tvar)) -+ -+ - #define TAILQ_CONCAT(head1, head2, field) do { \ - if (!TAILQ_EMPTY(head2)) { \ - *(head1)->tqh_last = (head2)->tqh_first; \ diff --git a/patches/glibc-2.16.0/0005-localedef-fix-trampoline.patch b/patches/glibc-2.16.0/0005-localedef-fix-trampoline.patch deleted file mode 100644 index 55d8c0d..0000000 --- a/patches/glibc-2.16.0/0005-localedef-fix-trampoline.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] localedef fix trampoline - -if [ $# -ne 2 ]; then - echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" - exit 1 -fi -case "$1" in - -patch) patch -d "$2" -f --no-backup-if-mismatch -p1 < $0;; - -unpatch) patch -d "$2" -f --no-backup-if-mismatch -R -p1 < $0;; - *) - echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" - exit 1 -esac -exit 0 ---- - locale/programs/3level.h | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/locale/programs/3level.h b/locale/programs/3level.h -index 6297720..4c6f4e1 100644 ---- a/locale/programs/3level.h -+++ b/locale/programs/3level.h -@@ -202,6 +202,42 @@ CONCAT(TABLE,_iterate) (struct TABLE *t, - } - } - } -+ -+/* GCC ATM seems to do a poor job with pointers to nested functions passed -+ to inlined functions. Help it a little bit with this hack. */ -+#define wchead_table_iterate(tp, fn) \ -+do \ -+ { \ -+ struct wchead_table *t = (tp); \ -+ uint32_t index1; \ -+ for (index1 = 0; index1 < t->level1_size; index1++) \ -+ { \ -+ uint32_t lookup1 = t->level1[index1]; \ -+ if (lookup1 != ((uint32_t) ~0)) \ -+ { \ -+ uint32_t lookup1_shifted = lookup1 << t->q; \ -+ uint32_t index2; \ -+ for (index2 = 0; index2 < (1 << t->q); index2++) \ -+ { \ -+ uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; \ -+ if (lookup2 != ((uint32_t) ~0)) \ -+ { \ -+ uint32_t lookup2_shifted = lookup2 << t->p; \ -+ uint32_t index3; \ -+ for (index3 = 0; index3 < (1 << t->p); index3++) \ -+ { \ -+ struct element_t *lookup3 \ -+ = t->level3[index3 + lookup2_shifted]; \ -+ if (lookup3 != NULL) \ -+ fn ((((index1 << t->q) + index2) << t->p) + index3, \ -+ lookup3); \ -+ } \ -+ } \ -+ } \ -+ } \ -+ } \ -+ } while (0) -+ - #endif - - #ifndef NO_FINALIZE diff --git a/patches/glibc-2.16.0/0006-resolv-dynamic.patch b/patches/glibc-2.16.0/0006-resolv-dynamic.patch deleted file mode 100644 index 076ea83..0000000 --- a/patches/glibc-2.16.0/0006-resolv-dynamic.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] resolv dynamic - -ripped from SuSE - -if /etc/resolv.conf is updated, then make sure applications -already running get the updated information. - -http://bugs.gentoo.org/177416 ---- - resolv/res_libc.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/resolv/res_libc.c b/resolv/res_libc.c -index 48d3200..a443345 100644 ---- a/resolv/res_libc.c -+++ b/resolv/res_libc.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - - /* The following bit is copied from res_data.c (where it is #ifdef'ed -@@ -95,6 +96,20 @@ int - __res_maybe_init (res_state resp, int preinit) - { - if (resp->options & RES_INIT) { -+ static time_t last_mtime, last_check; -+ time_t now; -+ struct stat statbuf; -+ -+ time (&now); -+ if (now != last_check) { -+ last_check = now; -+ if (stat (_PATH_RESCONF, &statbuf) == 0 && last_mtime != statbuf.st_mtime) { -+ last_mtime = statbuf.st_mtime; -+ atomicinclock (lock); -+ atomicinc (__res_initstamp); -+ atomicincunlock (lock); -+ } -+ } - if (__res_initstamp != resp->_u._ext.initstamp) { - if (resp->nscount > 0) - __res_iclose (resp, true); diff --git a/patches/glibc-2.16.0/0007-section-comments.patch b/patches/glibc-2.16.0/0007-section-comments.patch deleted file mode 100644 index cad6f24..0000000 --- a/patches/glibc-2.16.0/0007-section-comments.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] section comments - -http://sources.redhat.com/ml/binutils/2004-04/msg00665.html - -fixes building on some architectures (like m68k/arm/cris/etc...) because -it does the right thing - -This patch was taken from gentoo. ---- - include/libc-symbols.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/include/libc-symbols.h b/include/libc-symbols.h -index 12b3639..40dde1d 100644 ---- a/include/libc-symbols.h -+++ b/include/libc-symbols.h -@@ -205,12 +205,12 @@ - # define __make_section_unallocated(section_string) - #endif - --/* Tacking on "\n\t#" to the section name makes gcc put it's bogus -+/* Tacking on "\n#APP\n\t#" to the section name makes gcc put it's bogus - section attributes on what looks like a comment to the assembler. */ - #ifdef HAVE_SECTION_QUOTES --# define __sec_comment "\"\n\t#\"" -+# define __sec_comment "\"\n#APP\n\t#\"" - #else --# define __sec_comment "\n\t#" -+# define __sec_comment "\n#APP\n\t#" - #endif - #define link_warning(symbol, msg) \ - __make_section_unallocated (".gnu.warning." #symbol) \ diff --git a/patches/glibc-2.16.0/0008-fpscr-values.patch b/patches/glibc-2.16.0/0008-fpscr-values.patch deleted file mode 100644 index 50ec4d4..0000000 --- a/patches/glibc-2.16.0/0008-fpscr-values.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: unknown author -Date: Tue, 1 Nov 2011 18:58:26 +0100 -Subject: [PATCH] fpscr values - -http://rpm.sh-linux.org/rpm-2004/target/SRPMS/glibc-2.3.3-27.12.src.rpm - -http://bugs.gentoo.org/100696 ---- - sysdeps/unix/sysv/linux/sh/Versions | 1 + - sysdeps/unix/sysv/linux/sh/sysdep.S | 10 ++++++++++ - 2 files changed, 11 insertions(+) - -diff --git a/sysdeps/unix/sysv/linux/sh/Versions b/sysdeps/unix/sysv/linux/sh/Versions -index e0938c4..ca1d7da 100644 ---- a/sysdeps/unix/sysv/linux/sh/Versions -+++ b/sysdeps/unix/sysv/linux/sh/Versions -@@ -2,6 +2,7 @@ libc { - GLIBC_2.2 { - # functions used in other libraries - __xstat64; __fxstat64; __lxstat64; -+ __fpscr_values; - - # a* - alphasort64; -diff --git a/sysdeps/unix/sysv/linux/sh/sysdep.S b/sysdeps/unix/sysv/linux/sh/sysdep.S -index ba80b14..91e9008 100644 ---- a/sysdeps/unix/sysv/linux/sh/sysdep.S -+++ b/sysdeps/unix/sysv/linux/sh/sysdep.S -@@ -31,3 +31,13 @@ ENTRY (__syscall_error) - - #define __syscall_error __syscall_error_1 - #include -+ -+ .data -+ .align 3 -+ .globl ___fpscr_values -+ .type ___fpscr_values, @object -+ .size ___fpscr_values, 8 -+___fpscr_values: -+ .long 0 -+ .long 0x80000 -+weak_alias (___fpscr_values, __fpscr_values) diff --git a/patches/glibc-2.16.0/0009-optimized-string-functions-for-NEON-from-Linaro.patch b/patches/glibc-2.16.0/0009-optimized-string-functions-for-NEON-from-Linaro.patch deleted file mode 100644 index ef7dc02..0000000 --- a/patches/glibc-2.16.0/0009-optimized-string-functions-for-NEON-from-Linaro.patch +++ /dev/null @@ -1,1292 +0,0 @@ -From: Michael Olbrich -Date: Thu, 15 Sep 2011 16:50:56 +0200 -Subject: [PATCH] optimized string functions for NEON from Linaro - -Signed-off-by: Michael Olbrich ---- - cortex-strings/sysdeps/arm/armv7/memchr.S | 150 ++++++++++ - cortex-strings/sysdeps/arm/armv7/memcpy.S | 152 ++++++++++ - cortex-strings/sysdeps/arm/armv7/memset.S | 118 ++++++++ - cortex-strings/sysdeps/arm/armv7/strchr.S | 76 +++++ - cortex-strings/sysdeps/arm/armv7/strcmp.c | 449 +++++++++++++++++++++++++++++ - cortex-strings/sysdeps/arm/armv7/strcpy.c | 172 +++++++++++ - cortex-strings/sysdeps/arm/armv7/strlen.S | 111 +++++++ - 7 files changed, 1228 insertions(+) - create mode 100644 cortex-strings/sysdeps/arm/armv7/memchr.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/memcpy.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/memset.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/strchr.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/strcmp.c - create mode 100644 cortex-strings/sysdeps/arm/armv7/strcpy.c - create mode 100644 cortex-strings/sysdeps/arm/armv7/strlen.S - -diff --git a/cortex-strings/sysdeps/arm/armv7/memchr.S b/cortex-strings/sysdeps/arm/armv7/memchr.S -new file mode 100644 -index 0000000..8f5aaa9 ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memchr.S -@@ -0,0 +1,150 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert -+ -+ This memchr routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors. It has a fast past for short sizes, and has -+ an optimised path for large data sets; the worst case is finding the -+ match early in a large data set. */ -+ -+@ 2011-02-07 david.gilbert@linaro.org -+@ Extracted from local git a5b438d861 -+@ 2011-07-14 david.gilbert@linaro.org -+@ Import endianness fix from local git ea786f1b -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memchr -+ .type memchr,%function -+memchr: -+ @ r0 = start of memory to scan -+ @ r1 = character to look for -+ @ r2 = length -+ @ returns r0 = pointer to character or NULL if not found -+ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char -+ -+ cmp r2,#16 @ If it's short don't bother with anything clever -+ blt 20f -+ -+ tst r0, #7 @ If it's already aligned skip the next bit -+ beq 10f -+ -+ @ Work up to an aligned point -+5: -+ ldrb r3, [r0],#1 -+ subs r2, r2, #1 -+ cmp r3, r1 -+ beq 50f @ If it matches exit found -+ tst r0, #7 -+ cbz r2, 40f @ If we run off the end, exit not found -+ bne 5b @ If not aligned yet then do next byte -+ -+10: -+ @ At this point, we are aligned, we know we have at least 8 bytes to work with -+ push {r4,r5,r6,r7} -+ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes -+ orr r1, r1, r1, lsl #16 -+ bic r4, r2, #7 @ Number of double words to work with -+ mvns r7, #0 @ all F's -+ movs r3, #0 -+ -+15: -+ ldmia r0!,{r5,r6} -+ subs r4, r4, #8 -+ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target -+ eor r6,r6, r1 -+ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ cbnz r6, 60f -+ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again -+ -+ pop {r4,r5,r6,r7} -+ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above -+ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done -+ -+20: -+ cbz r2, 40f @ 0 length or hit the end already then not found -+ -+21: @ Post aligned section, or just a short call -+ ldrb r3,[r0],#1 -+ subs r2,r2,#1 -+ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub -+ cbz r3, 50f -+ bne 21b @ on r2 flags -+ -+40: -+ movs r0,#0 @ not found -+ bx lr -+ -+50: -+ subs r0,r0,#1 @ found -+ bx lr -+ -+60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was -+ @ r0 points to the start of the double word after the one that was tested -+ @ r5 has the 00/ff pattern for the first word, r6 has the chained value -+ cmp r5, #0 -+ itte eq -+ moveq r5, r6 @ the end is in the 2nd word -+ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word -+ subne r0,r0,#7 @ or 2nd byte of 1st word -+ -+ @ r0 currently points to the 3rd byte of the word containing the hit -+ tst r5, # CHARTSTMASK(0) @ 1st character -+ bne 61f -+ adds r0,r0,#1 -+ tst r5, # CHARTSTMASK(1) @ 2nd character -+ ittt eq -+ addeq r0,r0,#1 -+ tsteq r5, # (3<<15) @ 2nd & 3rd character -+ @ If not the 3rd must be the last one -+ addeq r0,r0,#1 -+ -+61: -+ pop {r4,r5,r6,r7} -+ subs r0,r0,#1 -+ bx lr -diff --git a/cortex-strings/sysdeps/arm/armv7/memcpy.S b/cortex-strings/sysdeps/arm/armv7/memcpy.S -new file mode 100644 -index 0000000..3be24ca ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memcpy.S -@@ -0,0 +1,152 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert -+ -+ This memcpy routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors with NEON. */ -+ -+@ 2011-09-01 david.gilbert@linaro.org -+@ Extracted from local git 2f11b436 -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memcpy -+ .type memcpy,%function -+memcpy: -+ @ r0 = dest -+ @ r1 = source -+ @ r2 = count -+ @ returns dest in r0 -+ @ Overlaps of source/dest not allowed according to spec -+ @ Note this routine relies on v7 misaligned loads/stores -+ pld [r1] -+ mov r12, r0 @ stash original r0 -+ cmp r2,#32 -+ blt 10f @ take the small copy case separately -+ -+ @ test for either source or destination being misaligned -+ @ (We only rely on word align) -+ tst r0,#3 -+ it eq -+ tsteq r1,#3 -+ bne 30f @ misaligned case -+ -+4: -+ @ at this point we are word (or better) aligned and have at least -+ @ 32 bytes to play with -+ -+ @ If it's a huge copy, try Neon -+ cmp r2, #128*1024 -+ bge 35f @ Sharing general non-aligned case here, aligned could be faster -+ -+ push {r3,r4,r5,r6,r7,r8,r10,r11} -+5: -+ ldmia r1!,{r3,r4,r5,r6,r7,r8,r10,r11} -+ sub r2,r2,#32 -+ pld [r1,#96] -+ cmp r2,#32 -+ stmia r0!,{r3,r4,r5,r6,r7,r8,r10,r11} -+ bge 5b -+ -+ pop {r3,r4,r5,r6,r7,r8,r10,r11} -+ @ We are now down to less than 32 bytes -+ cbz r2,15f @ quick exit for the case where we copied a multiple of 32 -+ -+10: @ small copies (not necessarily aligned - note might be slightly more than 32bytes) -+ cmp r2,#4 -+ blt 12f -+11: -+ sub r2,r2,#4 -+ cmp r2,#4 -+ ldr r3, [r1],#4 -+ str r3, [r0],#4 -+ bge 11b -+12: -+ tst r2,#2 -+ itt ne -+ ldrhne r3, [r1],#2 -+ strhne r3, [r0],#2 -+ -+ tst r2,#1 -+ itt ne -+ ldrbne r3, [r1],#1 -+ strbne r3, [r0],#1 -+ -+15: @ exit -+ mov r0,r12 @ restore r0 -+ bx lr -+ -+ .align 2 -+ .p2align 4,,15 -+30: @ non-aligned - at least 32 bytes to play with -+ @ Test for co-misalignment -+ eor r3, r0, r1 -+ tst r3,#3 -+ beq 50f -+ -+ @ Use Neon for misaligned -+35: -+ vld1.8 {d0,d1,d2,d3}, [r1]! -+ sub r2,r2,#32 -+ cmp r2,#32 -+ pld [r1,#96] -+ vst1.8 {d0,d1,d2,d3}, [r0]! -+ bge 35b -+ b 10b @ TODO: Probably a bad idea to switch to ARM at this point -+ -+ .align 2 -+ .p2align 4,,15 -+50: @ Co-misaligned -+ @ At this point we've got at least 32 bytes -+51: -+ ldrb r3,[r1],#1 -+ sub r2,r2,#1 -+ strb r3,[r0],#1 -+ tst r0,#7 -+ bne 51b -+ -+ cmp r2,#32 -+ blt 10b -+ b 4b -diff --git a/cortex-strings/sysdeps/arm/armv7/memset.S b/cortex-strings/sysdeps/arm/armv7/memset.S -new file mode 100644 -index 0000000..921cb75 ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memset.S -@@ -0,0 +1,118 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert -+ -+ This memset routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors. */ -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ 2011-08-30 david.gilbert@linaro.org -+@ Extracted from local git 2f11b436 -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memset -+ .type memset,%function -+memset: -+ @ r0 = address -+ @ r1 = character -+ @ r2 = count -+ @ returns original address in r0 -+ -+ mov r3, r0 @ Leave r0 alone -+ cbz r2, 10f @ Exit if 0 length -+ -+ tst r0, #7 -+ beq 2f @ Already aligned -+ -+ @ Ok, so we're misaligned here -+1: -+ strb r1, [r3], #1 -+ subs r2,r2,#1 -+ tst r3, #7 -+ cbz r2, 10f @ Exit if we hit the end -+ bne 1b @ go round again if still misaligned -+ -+2: -+ @ OK, so we're aligned -+ push {r4,r5,r6,r7} -+ bics r4, r2, #15 @ if less than 16 bytes then need to finish it off -+ beq 5f -+ -+3: -+ @ POSIX says that ch is cast to an unsigned char. A uxtb is one -+ @ byte and takes two cycles, where an AND is four bytes but one -+ @ cycle. -+ and r1, #0xFF -+ orr r1, r1, r1, lsl#8 @ Same character into all bytes -+ orr r1, r1, r1, lsl#16 -+ mov r5,r1 -+ mov r6,r1 -+ mov r7,r1 -+ -+4: -+ subs r4,r4,#16 -+ stmia r3!,{r1,r5,r6,r7} -+ bne 4b -+ and r2,r2,#15 -+ -+ @ At this point we're still aligned and we have upto align-1 bytes left to right -+ @ we can avoid some of the byte-at-a time now by testing for some big chunks -+ tst r2,#8 -+ itt ne -+ subne r2,r2,#8 -+ stmiane r3!,{r1,r5} -+ -+5: -+ pop {r4,r5,r6,r7} -+ cbz r2, 10f -+ -+ @ Got to do any last < alignment bytes -+6: -+ subs r2,r2,#1 -+ strb r1,[r3],#1 -+ bne 6b -+ -+10: -+ bx lr @ goodbye -diff --git a/cortex-strings/sysdeps/arm/armv7/strchr.S b/cortex-strings/sysdeps/arm/armv7/strchr.S -new file mode 100644 -index 0000000..8875dbf ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strchr.S -@@ -0,0 +1,76 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert -+ -+ A very simple strchr routine, from benchmarks on A9 it's a bit faster than -+ the current version in eglibc (2.12.1-0ubuntu14 package) -+ I don't think doing a word at a time version is worth it since a lot -+ of strchr cases are very short anyway */ -+ -+@ 2011-02-07 david.gilbert@linaro.org -+@ Extracted from local git a5b438d861 -+ -+ .syntax unified -+ .arch armv7-a -+ -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global strchr -+ .type strchr,%function -+strchr: -+ @ r0 = start of string -+ @ r1 = character to match -+ @ returns NULL for no match, or a pointer to the match -+ and r1,r1, #255 -+ -+1: -+ ldrb r2,[r0],#1 -+ cmp r2,r1 -+ cbz r2,10f -+ bne 1b -+ -+ @ We're here if it matched -+5: -+ subs r0,r0,#1 -+ bx lr -+ -+10: -+ @ We're here if we ran off the end -+ cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it -+ beq 5b @ A bit messy, if it's common we should branch at the start to a special loop -+ mov r0,#0 -+ bx lr -diff --git a/cortex-strings/sysdeps/arm/armv7/strcmp.c b/cortex-strings/sysdeps/arm/armv7/strcmp.c -new file mode 100644 -index 0000000..fb2280d ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strcmp.c -@@ -0,0 +1,449 @@ -+/* -+ * Copyright (c) 2008 ARM Ltd -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The name of the company may not be used to endorse or promote -+ * products derived from this software without specific prior written -+ * permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+ -+#undef strcmp -+ -+ -+#ifdef __ARMEB__ -+#define SHFT2LSB "lsl" -+#define SHFT2MSB "lsr" -+#define MSB "0x000000ff" -+#define LSB "0xff000000" -+#else -+#define SHFT2LSB "lsr" -+#define SHFT2MSB "lsl" -+#define MSB "0xff000000" -+#define LSB "0x000000ff" -+#endif -+ -+#ifdef __thumb2__ -+#define magic1(REG) "#0x01010101" -+#define magic2(REG) "#0x80808080" -+#else -+#define magic1(REG) #REG -+#define magic2(REG) #REG ", lsl #7" -+#endif -+ -+int -+__attribute__((naked)) strcmp (const char* s1, const char* s2) -+{ -+ asm( -+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ -+ (defined (__thumb__) && !defined (__thumb2__))) -+ "pld [r0, #0]\n\t" -+ "pld [r1, #0]\n\t" -+ "eor r2, r0, r1\n\t" -+ "tst r2, #3\n\t" -+ /* Strings not at same byte offset from a word boundary. */ -+ "bne strcmp_unaligned\n\t" -+ "ands r2, r0, #3\n\t" -+ "bic r0, r0, #3\n\t" -+ "bic r1, r1, #3\n\t" -+ "ldr ip, [r0], #4\n\t" -+ "it eq\n\t" -+ "ldreq r3, [r1], #4\n\t" -+ "beq 1f\n\t" -+ /* Although s1 and s2 have identical initial alignment, they are -+ not currently word aligned. Rather than comparing bytes, -+ make sure that any bytes fetched from before the addressed -+ bytes are forced to 0xff. Then they will always compare -+ equal. */ -+ "eor r2, r2, #3\n\t" -+ "lsl r2, r2, #3\n\t" -+ "mvn r3, #"MSB"\n\t" -+ SHFT2LSB" r2, r3, r2\n\t" -+ "ldr r3, [r1], #4\n\t" -+ "orr ip, ip, r2\n\t" -+ "orr r3, r3, r2\n" -+ "1:\n\t" -+#ifndef __thumb2__ -+ /* Load the 'magic' constant 0x01010101. */ -+ "str r4, [sp, #-4]!\n\t" -+ "mov r4, #1\n\t" -+ "orr r4, r4, r4, lsl #8\n\t" -+ "orr r4, r4, r4, lsl #16\n" -+#endif -+ ".p2align 2\n" -+ "4:\n\t" -+ "pld [r0, #8]\n\t" -+ "pld [r1, #8]\n\t" -+ "sub r2, ip, "magic1(r4)"\n\t" -+ "cmp ip, r3\n\t" -+ "itttt eq\n\t" -+ /* check for any zero bytes in first word */ -+ "biceq r2, r2, ip\n\t" -+ "tsteq r2, "magic2(r4)"\n\t" -+ "ldreq ip, [r0], #4\n\t" -+ "ldreq r3, [r1], #4\n\t" -+ "beq 4b\n" -+ "2:\n\t" -+ /* There's a zero or a different byte in the word */ -+ SHFT2MSB" r0, ip, #24\n\t" -+ SHFT2LSB" ip, ip, #8\n\t" -+ "cmp r0, #1\n\t" -+ "it cs\n\t" -+ "cmpcs r0, r3, "SHFT2MSB" #24\n\t" -+ "it eq\n\t" -+ SHFT2LSB"eq r3, r3, #8\n\t" -+ "beq 2b\n\t" -+ /* On a big-endian machine, r0 contains the desired byte in bits -+ 0-7; on a little-endian machine they are in bits 24-31. In -+ both cases the other bits in r0 are all zero. For r3 the -+ interesting byte is at the other end of the word, but the -+ other bits are not necessarily zero. We need a signed result -+ representing the differnece in the unsigned bytes, so for the -+ little-endian case we can't just shift the interesting bits -+ up. */ -+#ifdef __ARMEB__ -+ "sub r0, r0, r3, lsr #24\n\t" -+#else -+ "and r3, r3, #255\n\t" -+#ifdef __thumb2__ -+ /* No RSB instruction in Thumb2 */ -+ "lsr r0, r0, #24\n\t" -+ "sub r0, r0, r3\n\t" -+#else -+ "rsb r0, r3, r0, lsr #24\n\t" -+#endif -+#endif -+#ifndef __thumb2__ -+ "ldr r4, [sp], #4\n\t" -+#endif -+ "BX LR" -+#elif (defined (__thumb__) && !defined (__thumb2__)) -+ "1:\n\t" -+ "ldrb r2, [r0]\n\t" -+ "ldrb r3, [r1]\n\t" -+ "add r0, r0, #1\n\t" -+ "add r1, r1, #1\n\t" -+ "cmp r2, #0\n\t" -+ "beq 2f\n\t" -+ "cmp r2, r3\n\t" -+ "beq 1b\n\t" -+ "2:\n\t" -+ "sub r0, r2, r3\n\t" -+ "bx lr" -+#else -+ "3:\n\t" -+ "ldrb r2, [r0], #1\n\t" -+ "ldrb r3, [r1], #1\n\t" -+ "cmp r2, #1\n\t" -+ "it cs\n\t" -+ "cmpcs r2, r3\n\t" -+ "beq 3b\n\t" -+ "sub r0, r2, r3\n\t" -+ "BX LR" -+#endif -+ ); -+} -+ -+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ -+ (defined (__thumb__) && !defined (__thumb2__))) -+static int __attribute__((naked, used)) -+strcmp_unaligned(const char* s1, const char* s2) -+{ -+#if 0 -+ /* The assembly code below is based on the following alogrithm. */ -+#ifdef __ARMEB__ -+#define RSHIFT << -+#define LSHIFT >> -+#else -+#define RSHIFT >> -+#define LSHIFT << -+#endif -+ -+#define body(shift) \ -+ mask = 0xffffffffU RSHIFT shift; \ -+ w1 = *wp1++; \ -+ w2 = *wp2++; \ -+ do \ -+ { \ -+ t1 = w1 & mask; \ -+ if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ -+ { \ -+ w2 RSHIFT= shift; \ -+ break; \ -+ } \ -+ if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \ -+ { \ -+ /* See comment in assembler below re syndrome on big-endian */\ -+ if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \ -+ w2 RSHIFT= shift; \ -+ else \ -+ { \ -+ w2 = *wp2; \ -+ t1 = w1 RSHIFT (32 - shift); \ -+ w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ -+ } \ -+ break; \ -+ } \ -+ w2 = *wp2++; \ -+ t1 ^= w1; \ -+ if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ -+ { \ -+ t1 = w1 >> (32 - shift); \ -+ w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ -+ break; \ -+ } \ -+ w1 = *wp1++; \ -+ } while (1) -+ -+ const unsigned* wp1; -+ const unsigned* wp2; -+ unsigned w1, w2; -+ unsigned mask; -+ unsigned shift; -+ unsigned b1 = 0x01010101; -+ char c1, c2; -+ unsigned t1; -+ -+ while (((unsigned) s1) & 3) -+ { -+ c1 = *s1++; -+ c2 = *s2++; -+ if (c1 == 0 || c1 != c2) -+ return c1 - (int)c2; -+ } -+ wp1 = (unsigned*) (((unsigned)s1) & ~3); -+ wp2 = (unsigned*) (((unsigned)s2) & ~3); -+ t1 = ((unsigned) s2) & 3; -+ if (t1 == 1) -+ { -+ body(8); -+ } -+ else if (t1 == 2) -+ { -+ body(16); -+ } -+ else -+ { -+ body (24); -+ } -+ -+ do -+ { -+#ifdef __ARMEB__ -+ c1 = (char) t1 >> 24; -+ c2 = (char) w2 >> 24; -+#else -+ c1 = (char) t1; -+ c2 = (char) w2; -+#endif -+ t1 RSHIFT= 8; -+ w2 RSHIFT= 8; -+ } while (c1 != 0 && c1 == c2); -+ return c1 - c2; -+#endif -+ -+ asm("wp1 .req r0\n\t" -+ "wp2 .req r1\n\t" -+ "b1 .req r2\n\t" -+ "w1 .req r4\n\t" -+ "w2 .req r5\n\t" -+ "t1 .req ip\n\t" -+ "@ r3 is scratch\n" -+ -+ /* First of all, compare bytes until wp1(sp1) is word-aligned. */ -+ "1:\n\t" -+ "tst wp1, #3\n\t" -+ "beq 2f\n\t" -+ "ldrb r2, [wp1], #1\n\t" -+ "ldrb r3, [wp2], #1\n\t" -+ "cmp r2, #1\n\t" -+ "it cs\n\t" -+ "cmpcs r2, r3\n\t" -+ "beq 1b\n\t" -+ "sub r0, r2, r3\n\t" -+ "BX LR\n" -+ -+ "2:\n\t" -+ "str r5, [sp, #-4]!\n\t" -+ "str r4, [sp, #-4]!\n\t" -+ // "stmfd sp!, {r4, r5}\n\t" -+ "mov b1, #1\n\t" -+ "orr b1, b1, b1, lsl #8\n\t" -+ "orr b1, b1, b1, lsl #16\n\t" -+ -+ "and t1, wp2, #3\n\t" -+ "bic wp2, wp2, #3\n\t" -+ "ldr w1, [wp1], #4\n\t" -+ "ldr w2, [wp2], #4\n\t" -+ "cmp t1, #2\n\t" -+ "beq 2f\n\t" -+ "bhi 3f\n" -+ -+ /* Critical inner Loop: Block with 3 bytes initial overlap */ -+ ".p2align 2\n" -+ "1:\n\t" -+ "bic t1, w1, #"MSB"\n\t" -+ "cmp t1, w2, "SHFT2LSB" #8\n\t" -+ "sub r3, w1, b1\n\t" -+ "bic r3, r3, w1\n\t" -+ "bne 4f\n\t" -+ "ands r3, r3, b1, lsl #7\n\t" -+ "it eq\n\t" -+ "ldreq w2, [wp2], #4\n\t" -+ "bne 5f\n\t" -+ "eor t1, t1, w1\n\t" -+ "cmp t1, w2, "SHFT2MSB" #24\n\t" -+ "bne 6f\n\t" -+ "ldr w1, [wp1], #4\n\t" -+ "b 1b\n" -+ "4:\n\t" -+ SHFT2LSB" w2, w2, #8\n\t" -+ "b 8f\n" -+ -+ "5:\n\t" -+#ifdef __ARMEB__ -+ /* The syndrome value may contain false ones if the string ends -+ with the bytes 0x01 0x00 */ -+ "tst w1, #0xff000000\n\t" -+ "itt ne\n\t" -+ "tstne w1, #0x00ff0000\n\t" -+ "tstne w1, #0x0000ff00\n\t" -+ "beq 7f\n\t" -+#else -+ "bics r3, r3, #0xff000000\n\t" -+ "bne 7f\n\t" -+#endif -+ "ldrb w2, [wp2]\n\t" -+ SHFT2LSB" t1, w1, #24\n\t" -+#ifdef __ARMEB__ -+ "lsl w2, w2, #24\n\t" -+#endif -+ "b 8f\n" -+ -+ "6:\n\t" -+ SHFT2LSB" t1, w1, #24\n\t" -+ "and w2, w2, #"LSB"\n\t" -+ "b 8f\n" -+ -+ /* Critical inner Loop: Block with 2 bytes initial overlap */ -+ ".p2align 2\n" -+ "2:\n\t" -+ SHFT2MSB" t1, w1, #16\n\t" -+ "sub r3, w1, b1\n\t" -+ SHFT2LSB" t1, t1, #16\n\t" -+ "bic r3, r3, w1\n\t" -+ "cmp t1, w2, "SHFT2LSB" #16\n\t" -+ "bne 4f\n\t" -+ "ands r3, r3, b1, lsl #7\n\t" -+ "it eq\n\t" -+ "ldreq w2, [wp2], #4\n\t" -+ "bne 5f\n\t" -+ "eor t1, t1, w1\n\t" -+ "cmp t1, w2, "SHFT2MSB" #16\n\t" -+ "bne 6f\n\t" -+ "ldr w1, [wp1], #4\n\t" -+ "b 2b\n" -+ -+ "5:\n\t" -+#ifdef __ARMEB__ -+ /* The syndrome value may contain false ones if the string ends -+ with the bytes 0x01 0x00 */ -+ "tst w1, #0xff000000\n\t" -+ "it ne\n\t" -+ "tstne w1, #0x00ff0000\n\t" -+ "beq 7f\n\t" -+#else -+ "lsls r3, r3, #16\n\t" -+ "bne 7f\n\t" -+#endif -+ "ldrh w2, [wp2]\n\t" -+ SHFT2LSB" t1, w1, #16\n\t" -+#ifdef __ARMEB__ -+ "lsl w2, w2, #16\n\t" -+#endif -+ "b 8f\n" -+ -+ "6:\n\t" -+ SHFT2MSB" w2, w2, #16\n\t" -+ SHFT2LSB" t1, w1, #16\n\t" -+ "4:\n\t" -+ SHFT2LSB" w2, w2, #16\n\t" -+ "b 8f\n\t" -+ -+ /* Critical inner Loop: Block with 1 byte initial overlap */ -+ ".p2align 2\n" -+ "3:\n\t" -+ "and t1, w1, #"LSB"\n\t" -+ "cmp t1, w2, "SHFT2LSB" #24\n\t" -+ "sub r3, w1, b1\n\t" -+ "bic r3, r3, w1\n\t" -+ "bne 4f\n\t" -+ "ands r3, r3, b1, lsl #7\n\t" -+ "it eq\n\t" -+ "ldreq w2, [wp2], #4\n\t" -+ "bne 5f\n\t" -+ "eor t1, t1, w1\n\t" -+ "cmp t1, w2, "SHFT2MSB" #8\n\t" -+ "bne 6f\n\t" -+ "ldr w1, [wp1], #4\n\t" -+ "b 3b\n" -+ "4:\n\t" -+ SHFT2LSB" w2, w2, #24\n\t" -+ "b 8f\n" -+ "5:\n\t" -+ /* The syndrome value may contain false ones if the string ends -+ with the bytes 0x01 0x00 */ -+ "tst w1, #"LSB"\n\t" -+ "beq 7f\n\t" -+ "ldr w2, [wp2], #4\n" -+ "6:\n\t" -+ SHFT2LSB" t1, w1, #8\n\t" -+ "bic w2, w2, #"MSB"\n\t" -+ "b 8f\n" -+ "7:\n\t" -+ "mov r0, #0\n\t" -+ // "ldmfd sp!, {r4, r5}\n\t" -+ "ldr r4, [sp], #4\n\t" -+ "ldr r5, [sp], #4\n\t" -+ "BX LR\n" -+ "8:\n\t" -+ "and r2, t1, #"LSB"\n\t" -+ "and r0, w2, #"LSB"\n\t" -+ "cmp r0, #1\n\t" -+ "it cs\n\t" -+ "cmpcs r0, r2\n\t" -+ "itt eq\n\t" -+ SHFT2LSB"eq t1, t1, #8\n\t" -+ SHFT2LSB"eq w2, w2, #8\n\t" -+ "beq 8b\n\t" -+ "sub r0, r2, r0\n\t" -+ // "ldmfd sp!, {r4, r5}\n\t" -+ "ldr r4, [sp], #4\n\t" -+ "ldr r5, [sp], #4\n\t" -+ "BX LR"); -+} -+ -+#endif -+ -+libc_hidden_builtin_def (strcmp) -diff --git a/cortex-strings/sysdeps/arm/armv7/strcpy.c b/cortex-strings/sysdeps/arm/armv7/strcpy.c -new file mode 100644 -index 0000000..aa8cb06 ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strcpy.c -@@ -0,0 +1,172 @@ -+/* -+ * Copyright (c) 2008 ARM Ltd -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The name of the company may not be used to endorse or promote -+ * products derived from this software without specific prior written -+ * permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+ -+#undef strcmp -+ -+ -+#ifdef __thumb2__ -+#define magic1(REG) "#0x01010101" -+#define magic2(REG) "#0x80808080" -+#else -+#define magic1(REG) #REG -+#define magic2(REG) #REG ", lsl #7" -+#endif -+ -+char* __attribute__((naked)) -+strcpy (char* dst, const char* src) -+{ -+ asm ( -+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ -+ (defined (__thumb__) && !defined (__thumb2__))) -+ "pld [r1, #0]\n\t" -+ "eor r2, r0, r1\n\t" -+ "mov ip, r0\n\t" -+ "tst r2, #3\n\t" -+ "bne 4f\n\t" -+ "tst r1, #3\n\t" -+ "bne 3f\n" -+ "5:\n\t" -+#ifndef __thumb2__ -+ "str r5, [sp, #-4]!\n\t" -+ "mov r5, #0x01\n\t" -+ "orr r5, r5, r5, lsl #8\n\t" -+ "orr r5, r5, r5, lsl #16\n\t" -+#endif -+ -+ "str r4, [sp, #-4]!\n\t" -+ "tst r1, #4\n\t" -+ "ldr r3, [r1], #4\n\t" -+ "beq 2f\n\t" -+ "sub r2, r3, "magic1(r5)"\n\t" -+ "bics r2, r2, r3\n\t" -+ "tst r2, "magic2(r5)"\n\t" -+ "itt eq\n\t" -+ "streq r3, [ip], #4\n\t" -+ "ldreq r3, [r1], #4\n" -+ "bne 1f\n\t" -+ /* Inner loop. We now know that r1 is 64-bit aligned, so we -+ can safely fetch up to two words. This allows us to avoid -+ load stalls. */ -+ ".p2align 2\n" -+ "2:\n\t" -+ "pld [r1, #8]\n\t" -+ "ldr r4, [r1], #4\n\t" -+ "sub r2, r3, "magic1(r5)"\n\t" -+ "bics r2, r2, r3\n\t" -+ "tst r2, "magic2(r5)"\n\t" -+ "sub r2, r4, "magic1(r5)"\n\t" -+ "bne 1f\n\t" -+ "str r3, [ip], #4\n\t" -+ "bics r2, r2, r4\n\t" -+ "tst r2, "magic2(r5)"\n\t" -+ "itt eq\n\t" -+ "ldreq r3, [r1], #4\n\t" -+ "streq r4, [ip], #4\n\t" -+ "beq 2b\n\t" -+ "mov r3, r4\n" -+ "1:\n\t" -+#ifdef __ARMEB__ -+ "rors r3, r3, #24\n\t" -+#endif -+ "strb r3, [ip], #1\n\t" -+ "tst r3, #0xff\n\t" -+#ifdef __ARMEL__ -+ "ror r3, r3, #8\n\t" -+#endif -+ "bne 1b\n\t" -+ "ldr r4, [sp], #4\n\t" -+#ifndef __thumb2__ -+ "ldr r5, [sp], #4\n\t" -+#endif -+ "BX LR\n" -+ -+ /* Strings have the same offset from word alignment, but it's -+ not zero. */ -+ "3:\n\t" -+ "tst r1, #1\n\t" -+ "beq 1f\n\t" -+ "ldrb r2, [r1], #1\n\t" -+ "strb r2, [ip], #1\n\t" -+ "cmp r2, #0\n\t" -+ "it eq\n" -+ "BXEQ LR\n" -+ "1:\n\t" -+ "tst r1, #2\n\t" -+ "beq 5b\n\t" -+ "ldrh r2, [r1], #2\n\t" -+#ifdef __ARMEB__ -+ "tst r2, #0xff00\n\t" -+ "iteet ne\n\t" -+ "strneh r2, [ip], #2\n\t" -+ "lsreq r2, r2, #8\n\t" -+ "streqb r2, [ip]\n\t" -+ "tstne r2, #0xff\n\t" -+#else -+ "tst r2, #0xff\n\t" -+ "itet ne\n\t" -+ "strneh r2, [ip], #2\n\t" -+ "streqb r2, [ip]\n\t" -+ "tstne r2, #0xff00\n\t" -+#endif -+ "bne 5b\n\t" -+ "BX LR\n" -+ -+ /* src and dst do not have a common word-alignement. Fall back to -+ byte copying. */ -+ "4:\n\t" -+ "ldrb r2, [r1], #1\n\t" -+ "strb r2, [ip], #1\n\t" -+ "cmp r2, #0\n\t" -+ "bne 4b\n\t" -+ "BX LR" -+ -+#elif !defined (__thumb__) || defined (__thumb2__) -+ "mov r3, r0\n\t" -+ "1:\n\t" -+ "ldrb r2, [r1], #1\n\t" -+ "strb r2, [r3], #1\n\t" -+ "cmp r2, #0\n\t" -+ "bne 1b\n\t" -+ "BX LR" -+#else -+ "mov r3, r0\n\t" -+ "1:\n\t" -+ "ldrb r2, [r1]\n\t" -+ "add r1, r1, #1\n\t" -+ "strb r2, [r3]\n\t" -+ "add r3, r3, #1\n\t" -+ "cmp r2, #0\n\t" -+ "bne 1b\n\t" -+ "BX LR" -+#endif -+ ); -+} -+libc_hidden_builtin_def (strcpy) -diff --git a/cortex-strings/sysdeps/arm/armv7/strlen.S b/cortex-strings/sysdeps/arm/armv7/strlen.S -new file mode 100644 -index 0000000..125e92f ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strlen.S -@@ -0,0 +1,111 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert -+ -+ This strlen routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors. This routine is reasonably fast for short -+ strings, but is probably slower than a simple implementation if all -+ your strings are very short */ -+ -+@ 2011-02-08 david.gilbert@linaro.org -+@ Extracted from local git 6848613a -+ -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ -+@----------------------------------------------------------------------------------------------------------------------------- -+ .syntax unified -+ .arch armv7-a -+ -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global strlen -+ .type strlen,%function -+strlen: -+ @ r0 = string -+ @ returns count of bytes in string not including terminator -+ mov r1, r0 -+ push { r4,r6 } -+ mvns r6, #0 @ all F -+ movs r4, #0 -+ tst r0, #7 -+ beq 2f -+ -+1: -+ ldrb r2, [r1], #1 -+ tst r1, #7 @ Hit alignment yet? -+ cbz r2, 10f @ Exit if we found the 0 -+ bne 1b -+ -+ @ So we're now aligned -+2: -+ ldmia r1!,{r2,r3} -+ uadd8 r2, r2, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r2, r4, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ uadd8 r3, r3, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r3, r2, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ cmp r3, #0 -+ beq 2b -+ -+strlenendtmp: -+ @ One (or more) of the bytes we loaded was 0 - but which one? -+ @ r2 has the mask corresponding to the first loaded word -+ @ r3 has a combined mask of the two words - but if r2 was all-non 0 -+ @ then it's just the 2nd words -+ cmp r2, #0 -+ itte eq -+ moveq r2, r3 @ the end is in the 2nd word -+ subeq r1,r1,#3 -+ subne r1,r1,#7 -+ -+ @ r1 currently points to the 2nd byte of the word containing the 0 -+ tst r2, # CHARTSTMASK(0) @ 1st character -+ bne 10f -+ adds r1,r1,#1 -+ tst r2, # CHARTSTMASK(1) @ 2nd character -+ ittt eq -+ addeq r1,r1,#1 -+ tsteq r2, # (3<<15) @ 2nd & 3rd character -+ @ If not the 3rd must be the last one -+ addeq r1,r1,#1 -+ -+10: -+ @ r0 is still at the beginning, r1 is pointing 1 byte after the terminator -+ sub r0, r1, r0 -+ subs r0, r0, #1 -+ pop { r4, r6 } -+ bx lr diff --git a/patches/glibc-2.16.0/0010-add-libc_hidden_builtin_def-for-all-cortex-functions.patch b/patches/glibc-2.16.0/0010-add-libc_hidden_builtin_def-for-all-cortex-functions.patch deleted file mode 100644 index be6993d..0000000 --- a/patches/glibc-2.16.0/0010-add-libc_hidden_builtin_def-for-all-cortex-functions.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Michael Olbrich -Date: Thu, 15 Sep 2011 23:30:25 +0200 -Subject: [PATCH] add libc_hidden_builtin_def for all cortex functions - -Signed-off-by: Michael Olbrich ---- - cortex-strings/sysdeps/arm/armv7/memchr.S | 3 +++ - cortex-strings/sysdeps/arm/armv7/memcpy.S | 2 ++ - cortex-strings/sysdeps/arm/armv7/memset.S | 2 ++ - cortex-strings/sysdeps/arm/armv7/strchr.S | 3 +++ - cortex-strings/sysdeps/arm/armv7/strcpy.c | 1 + - cortex-strings/sysdeps/arm/armv7/strlen.S | 2 ++ - 6 files changed, 13 insertions(+) - -diff --git a/cortex-strings/sysdeps/arm/armv7/memchr.S b/cortex-strings/sysdeps/arm/armv7/memchr.S -index 8f5aaa9..6d497cb 100644 ---- a/cortex-strings/sysdeps/arm/armv7/memchr.S -+++ b/cortex-strings/sysdeps/arm/armv7/memchr.S -@@ -148,3 +148,6 @@ memchr: - pop {r4,r5,r6,r7} - subs r0,r0,#1 - bx lr -+ -+strong_alias (memchr, __memchr) -+libc_hidden_builtin_def (memchr) -diff --git a/cortex-strings/sysdeps/arm/armv7/memcpy.S b/cortex-strings/sysdeps/arm/armv7/memcpy.S -index 3be24ca..c274207 100644 ---- a/cortex-strings/sysdeps/arm/armv7/memcpy.S -+++ b/cortex-strings/sysdeps/arm/armv7/memcpy.S -@@ -150,3 +150,5 @@ memcpy: - cmp r2,#32 - blt 10b - b 4b -+ -+libc_hidden_builtin_def (memcpy) -diff --git a/cortex-strings/sysdeps/arm/armv7/memset.S b/cortex-strings/sysdeps/arm/armv7/memset.S -index 921cb75..d4c12a4 100644 ---- a/cortex-strings/sysdeps/arm/armv7/memset.S -+++ b/cortex-strings/sysdeps/arm/armv7/memset.S -@@ -116,3 +116,5 @@ memset: - - 10: - bx lr @ goodbye -+ -+libc_hidden_builtin_def (memset) -diff --git a/cortex-strings/sysdeps/arm/armv7/strchr.S b/cortex-strings/sysdeps/arm/armv7/strchr.S -index 8875dbf..05c832f 100644 ---- a/cortex-strings/sysdeps/arm/armv7/strchr.S -+++ b/cortex-strings/sysdeps/arm/armv7/strchr.S -@@ -74,3 +74,6 @@ strchr: - beq 5b @ A bit messy, if it's common we should branch at the start to a special loop - mov r0,#0 - bx lr -+ -+weak_alias (strchr, index) -+libc_hidden_builtin_def (strchr) -diff --git a/cortex-strings/sysdeps/arm/armv7/strcpy.c b/cortex-strings/sysdeps/arm/armv7/strcpy.c -index aa8cb06..3bbaa86 100644 ---- a/cortex-strings/sysdeps/arm/armv7/strcpy.c -+++ b/cortex-strings/sysdeps/arm/armv7/strcpy.c -@@ -169,4 +169,5 @@ strcpy (char* dst, const char* src) - #endif - ); - } -+ - libc_hidden_builtin_def (strcpy) -diff --git a/cortex-strings/sysdeps/arm/armv7/strlen.S b/cortex-strings/sysdeps/arm/armv7/strlen.S -index 125e92f..a1e02ad 100644 ---- a/cortex-strings/sysdeps/arm/armv7/strlen.S -+++ b/cortex-strings/sysdeps/arm/armv7/strlen.S -@@ -109,3 +109,5 @@ strlenendtmp: - subs r0, r0, #1 - pop { r4, r6 } - bx lr -+ -+libc_hidden_builtin_def (strlen) diff --git a/patches/glibc-2.16.0/0100-Fix-localedef-segfault-when-run-under-exec-shield-Pa.patch b/patches/glibc-2.16.0/0100-Fix-localedef-segfault-when-run-under-exec-shield-Pa.patch new file mode 100644 index 0000000..02865b3 --- /dev/null +++ b/patches/glibc-2.16.0/0100-Fix-localedef-segfault-when-run-under-exec-shield-Pa.patch @@ -0,0 +1,67 @@ +From: Jakub Jelinek +Date: Tue, 1 Nov 2011 18:58:26 +0100 +Subject: [PATCH] Fix localedef segfault when run under exec-shield, PaX or + similar + +# DP: Description: Fix localedef segfault when run under exec-shield, +# PaX or similar. (#231438, #198099) +# DP: Dpatch Author: James Troup +# DP: Patch Author: (probably) Jakub Jelinek +# DP: Upstream status: Unknown +# DP: Status Details: Unknown +# DP: Date: 2004-03-16 + +Taken from Debian. + +Signed-off-by: Michael Olbrich +--- + locale/programs/3level.h | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/locale/programs/3level.h b/locale/programs/3level.h +index 6297720..4c6f4e1 100644 +--- a/locale/programs/3level.h ++++ b/locale/programs/3level.h +@@ -202,6 +202,42 @@ CONCAT(TABLE,_iterate) (struct TABLE *t, + } + } + } ++ ++/* GCC ATM seems to do a poor job with pointers to nested functions passed ++ to inlined functions. Help it a little bit with this hack. */ ++#define wchead_table_iterate(tp, fn) \ ++do \ ++ { \ ++ struct wchead_table *t = (tp); \ ++ uint32_t index1; \ ++ for (index1 = 0; index1 < t->level1_size; index1++) \ ++ { \ ++ uint32_t lookup1 = t->level1[index1]; \ ++ if (lookup1 != ((uint32_t) ~0)) \ ++ { \ ++ uint32_t lookup1_shifted = lookup1 << t->q; \ ++ uint32_t index2; \ ++ for (index2 = 0; index2 < (1 << t->q); index2++) \ ++ { \ ++ uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; \ ++ if (lookup2 != ((uint32_t) ~0)) \ ++ { \ ++ uint32_t lookup2_shifted = lookup2 << t->p; \ ++ uint32_t index3; \ ++ for (index3 = 0; index3 < (1 << t->p); index3++) \ ++ { \ ++ struct element_t *lookup3 \ ++ = t->level3[index3 + lookup2_shifted]; \ ++ if (lookup3 != NULL) \ ++ fn ((((index1 << t->q) + index2) << t->p) + index3, \ ++ lookup3); \ ++ } \ ++ } \ ++ } \ ++ } \ ++ } \ ++ } while (0) ++ + #endif + + #ifndef NO_FINALIZE diff --git a/patches/glibc-2.16.0/0200-resolv-dynamic.patch b/patches/glibc-2.16.0/0200-resolv-dynamic.patch new file mode 100644 index 0000000..076ea83 --- /dev/null +++ b/patches/glibc-2.16.0/0200-resolv-dynamic.patch @@ -0,0 +1,47 @@ +From: unknown author +Date: Tue, 1 Nov 2011 18:58:26 +0100 +Subject: [PATCH] resolv dynamic + +ripped from SuSE + +if /etc/resolv.conf is updated, then make sure applications +already running get the updated information. + +http://bugs.gentoo.org/177416 +--- + resolv/res_libc.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/resolv/res_libc.c b/resolv/res_libc.c +index 48d3200..a443345 100644 +--- a/resolv/res_libc.c ++++ b/resolv/res_libc.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + + /* The following bit is copied from res_data.c (where it is #ifdef'ed +@@ -95,6 +96,20 @@ int + __res_maybe_init (res_state resp, int preinit) + { + if (resp->options & RES_INIT) { ++ static time_t last_mtime, last_check; ++ time_t now; ++ struct stat statbuf; ++ ++ time (&now); ++ if (now != last_check) { ++ last_check = now; ++ if (stat (_PATH_RESCONF, &statbuf) == 0 && last_mtime != statbuf.st_mtime) { ++ last_mtime = statbuf.st_mtime; ++ atomicinclock (lock); ++ atomicinc (__res_initstamp); ++ atomicincunlock (lock); ++ } ++ } + if (__res_initstamp != resp->_u._ext.initstamp) { + if (resp->nscount > 0) + __res_iclose (resp, true); diff --git a/patches/glibc-2.16.0/0300-optimized-string-functions-for-NEON-from-Linaro.patch b/patches/glibc-2.16.0/0300-optimized-string-functions-for-NEON-from-Linaro.patch new file mode 100644 index 0000000..ef7dc02 --- /dev/null +++ b/patches/glibc-2.16.0/0300-optimized-string-functions-for-NEON-from-Linaro.patch @@ -0,0 +1,1292 @@ +From: Michael Olbrich +Date: Thu, 15 Sep 2011 16:50:56 +0200 +Subject: [PATCH] optimized string functions for NEON from Linaro + +Signed-off-by: Michael Olbrich +--- + cortex-strings/sysdeps/arm/armv7/memchr.S | 150 ++++++++++ + cortex-strings/sysdeps/arm/armv7/memcpy.S | 152 ++++++++++ + cortex-strings/sysdeps/arm/armv7/memset.S | 118 ++++++++ + cortex-strings/sysdeps/arm/armv7/strchr.S | 76 +++++ + cortex-strings/sysdeps/arm/armv7/strcmp.c | 449 +++++++++++++++++++++++++++++ + cortex-strings/sysdeps/arm/armv7/strcpy.c | 172 +++++++++++ + cortex-strings/sysdeps/arm/armv7/strlen.S | 111 +++++++ + 7 files changed, 1228 insertions(+) + create mode 100644 cortex-strings/sysdeps/arm/armv7/memchr.S + create mode 100644 cortex-strings/sysdeps/arm/armv7/memcpy.S + create mode 100644 cortex-strings/sysdeps/arm/armv7/memset.S + create mode 100644 cortex-strings/sysdeps/arm/armv7/strchr.S + create mode 100644 cortex-strings/sysdeps/arm/armv7/strcmp.c + create mode 100644 cortex-strings/sysdeps/arm/armv7/strcpy.c + create mode 100644 cortex-strings/sysdeps/arm/armv7/strlen.S + +diff --git a/cortex-strings/sysdeps/arm/armv7/memchr.S b/cortex-strings/sysdeps/arm/armv7/memchr.S +new file mode 100644 +index 0000000..8f5aaa9 +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/memchr.S +@@ -0,0 +1,150 @@ ++/* Copyright (c) 2010-2011, Linaro Limited ++ All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ * Neither the name of Linaro Limited nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ Written by Dave Gilbert ++ ++ This memchr routine is optimised on a Cortex-A9 and should work on ++ all ARMv7 processors. It has a fast past for short sizes, and has ++ an optimised path for large data sets; the worst case is finding the ++ match early in a large data set. */ ++ ++@ 2011-02-07 david.gilbert@linaro.org ++@ Extracted from local git a5b438d861 ++@ 2011-07-14 david.gilbert@linaro.org ++@ Import endianness fix from local git ea786f1b ++ ++ .syntax unified ++ .arch armv7-a ++ ++@ this lets us check a flag in a 00/ff byte easily in either endianness ++#ifdef __ARMEB__ ++#define CHARTSTMASK(c) 1<<(31-(c*8)) ++#else ++#define CHARTSTMASK(c) 1<<(c*8) ++#endif ++ .text ++ .thumb ++ ++@ --------------------------------------------------------------------------- ++ .thumb_func ++ .align 2 ++ .p2align 4,,15 ++ .global memchr ++ .type memchr,%function ++memchr: ++ @ r0 = start of memory to scan ++ @ r1 = character to look for ++ @ r2 = length ++ @ returns r0 = pointer to character or NULL if not found ++ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char ++ ++ cmp r2,#16 @ If it's short don't bother with anything clever ++ blt 20f ++ ++ tst r0, #7 @ If it's already aligned skip the next bit ++ beq 10f ++ ++ @ Work up to an aligned point ++5: ++ ldrb r3, [r0],#1 ++ subs r2, r2, #1 ++ cmp r3, r1 ++ beq 50f @ If it matches exit found ++ tst r0, #7 ++ cbz r2, 40f @ If we run off the end, exit not found ++ bne 5b @ If not aligned yet then do next byte ++ ++10: ++ @ At this point, we are aligned, we know we have at least 8 bytes to work with ++ push {r4,r5,r6,r7} ++ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes ++ orr r1, r1, r1, lsl #16 ++ bic r4, r2, #7 @ Number of double words to work with ++ mvns r7, #0 @ all F's ++ movs r3, #0 ++ ++15: ++ ldmia r0!,{r5,r6} ++ subs r4, r4, #8 ++ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target ++ eor r6,r6, r1 ++ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 ++ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION ++ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 ++ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION ++ cbnz r6, 60f ++ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again ++ ++ pop {r4,r5,r6,r7} ++ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above ++ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done ++ ++20: ++ cbz r2, 40f @ 0 length or hit the end already then not found ++ ++21: @ Post aligned section, or just a short call ++ ldrb r3,[r0],#1 ++ subs r2,r2,#1 ++ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub ++ cbz r3, 50f ++ bne 21b @ on r2 flags ++ ++40: ++ movs r0,#0 @ not found ++ bx lr ++ ++50: ++ subs r0,r0,#1 @ found ++ bx lr ++ ++60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was ++ @ r0 points to the start of the double word after the one that was tested ++ @ r5 has the 00/ff pattern for the first word, r6 has the chained value ++ cmp r5, #0 ++ itte eq ++ moveq r5, r6 @ the end is in the 2nd word ++ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word ++ subne r0,r0,#7 @ or 2nd byte of 1st word ++ ++ @ r0 currently points to the 3rd byte of the word containing the hit ++ tst r5, # CHARTSTMASK(0) @ 1st character ++ bne 61f ++ adds r0,r0,#1 ++ tst r5, # CHARTSTMASK(1) @ 2nd character ++ ittt eq ++ addeq r0,r0,#1 ++ tsteq r5, # (3<<15) @ 2nd & 3rd character ++ @ If not the 3rd must be the last one ++ addeq r0,r0,#1 ++ ++61: ++ pop {r4,r5,r6,r7} ++ subs r0,r0,#1 ++ bx lr +diff --git a/cortex-strings/sysdeps/arm/armv7/memcpy.S b/cortex-strings/sysdeps/arm/armv7/memcpy.S +new file mode 100644 +index 0000000..3be24ca +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/memcpy.S +@@ -0,0 +1,152 @@ ++/* Copyright (c) 2010-2011, Linaro Limited ++ All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ * Neither the name of Linaro Limited nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ Written by Dave Gilbert ++ ++ This memcpy routine is optimised on a Cortex-A9 and should work on ++ all ARMv7 processors with NEON. */ ++ ++@ 2011-09-01 david.gilbert@linaro.org ++@ Extracted from local git 2f11b436 ++ ++ .syntax unified ++ .arch armv7-a ++ ++@ this lets us check a flag in a 00/ff byte easily in either endianness ++#ifdef __ARMEB__ ++#define CHARTSTMASK(c) 1<<(31-(c*8)) ++#else ++#define CHARTSTMASK(c) 1<<(c*8) ++#endif ++ .text ++ .thumb ++ ++@ --------------------------------------------------------------------------- ++ .thumb_func ++ .align 2 ++ .p2align 4,,15 ++ .global memcpy ++ .type memcpy,%function ++memcpy: ++ @ r0 = dest ++ @ r1 = source ++ @ r2 = count ++ @ returns dest in r0 ++ @ Overlaps of source/dest not allowed according to spec ++ @ Note this routine relies on v7 misaligned loads/stores ++ pld [r1] ++ mov r12, r0 @ stash original r0 ++ cmp r2,#32 ++ blt 10f @ take the small copy case separately ++ ++ @ test for either source or destination being misaligned ++ @ (We only rely on word align) ++ tst r0,#3 ++ it eq ++ tsteq r1,#3 ++ bne 30f @ misaligned case ++ ++4: ++ @ at this point we are word (or better) aligned and have at least ++ @ 32 bytes to play with ++ ++ @ If it's a huge copy, try Neon ++ cmp r2, #128*1024 ++ bge 35f @ Sharing general non-aligned case here, aligned could be faster ++ ++ push {r3,r4,r5,r6,r7,r8,r10,r11} ++5: ++ ldmia r1!,{r3,r4,r5,r6,r7,r8,r10,r11} ++ sub r2,r2,#32 ++ pld [r1,#96] ++ cmp r2,#32 ++ stmia r0!,{r3,r4,r5,r6,r7,r8,r10,r11} ++ bge 5b ++ ++ pop {r3,r4,r5,r6,r7,r8,r10,r11} ++ @ We are now down to less than 32 bytes ++ cbz r2,15f @ quick exit for the case where we copied a multiple of 32 ++ ++10: @ small copies (not necessarily aligned - note might be slightly more than 32bytes) ++ cmp r2,#4 ++ blt 12f ++11: ++ sub r2,r2,#4 ++ cmp r2,#4 ++ ldr r3, [r1],#4 ++ str r3, [r0],#4 ++ bge 11b ++12: ++ tst r2,#2 ++ itt ne ++ ldrhne r3, [r1],#2 ++ strhne r3, [r0],#2 ++ ++ tst r2,#1 ++ itt ne ++ ldrbne r3, [r1],#1 ++ strbne r3, [r0],#1 ++ ++15: @ exit ++ mov r0,r12 @ restore r0 ++ bx lr ++ ++ .align 2 ++ .p2align 4,,15 ++30: @ non-aligned - at least 32 bytes to play with ++ @ Test for co-misalignment ++ eor r3, r0, r1 ++ tst r3,#3 ++ beq 50f ++ ++ @ Use Neon for misaligned ++35: ++ vld1.8 {d0,d1,d2,d3}, [r1]! ++ sub r2,r2,#32 ++ cmp r2,#32 ++ pld [r1,#96] ++ vst1.8 {d0,d1,d2,d3}, [r0]! ++ bge 35b ++ b 10b @ TODO: Probably a bad idea to switch to ARM at this point ++ ++ .align 2 ++ .p2align 4,,15 ++50: @ Co-misaligned ++ @ At this point we've got at least 32 bytes ++51: ++ ldrb r3,[r1],#1 ++ sub r2,r2,#1 ++ strb r3,[r0],#1 ++ tst r0,#7 ++ bne 51b ++ ++ cmp r2,#32 ++ blt 10b ++ b 4b +diff --git a/cortex-strings/sysdeps/arm/armv7/memset.S b/cortex-strings/sysdeps/arm/armv7/memset.S +new file mode 100644 +index 0000000..921cb75 +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/memset.S +@@ -0,0 +1,118 @@ ++/* Copyright (c) 2010-2011, Linaro Limited ++ All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ * Neither the name of Linaro Limited nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ Written by Dave Gilbert ++ ++ This memset routine is optimised on a Cortex-A9 and should work on ++ all ARMv7 processors. */ ++ ++ .syntax unified ++ .arch armv7-a ++ ++@ 2011-08-30 david.gilbert@linaro.org ++@ Extracted from local git 2f11b436 ++ ++@ this lets us check a flag in a 00/ff byte easily in either endianness ++#ifdef __ARMEB__ ++#define CHARTSTMASK(c) 1<<(31-(c*8)) ++#else ++#define CHARTSTMASK(c) 1<<(c*8) ++#endif ++ .text ++ .thumb ++ ++@ --------------------------------------------------------------------------- ++ .thumb_func ++ .align 2 ++ .p2align 4,,15 ++ .global memset ++ .type memset,%function ++memset: ++ @ r0 = address ++ @ r1 = character ++ @ r2 = count ++ @ returns original address in r0 ++ ++ mov r3, r0 @ Leave r0 alone ++ cbz r2, 10f @ Exit if 0 length ++ ++ tst r0, #7 ++ beq 2f @ Already aligned ++ ++ @ Ok, so we're misaligned here ++1: ++ strb r1, [r3], #1 ++ subs r2,r2,#1 ++ tst r3, #7 ++ cbz r2, 10f @ Exit if we hit the end ++ bne 1b @ go round again if still misaligned ++ ++2: ++ @ OK, so we're aligned ++ push {r4,r5,r6,r7} ++ bics r4, r2, #15 @ if less than 16 bytes then need to finish it off ++ beq 5f ++ ++3: ++ @ POSIX says that ch is cast to an unsigned char. A uxtb is one ++ @ byte and takes two cycles, where an AND is four bytes but one ++ @ cycle. ++ and r1, #0xFF ++ orr r1, r1, r1, lsl#8 @ Same character into all bytes ++ orr r1, r1, r1, lsl#16 ++ mov r5,r1 ++ mov r6,r1 ++ mov r7,r1 ++ ++4: ++ subs r4,r4,#16 ++ stmia r3!,{r1,r5,r6,r7} ++ bne 4b ++ and r2,r2,#15 ++ ++ @ At this point we're still aligned and we have upto align-1 bytes left to right ++ @ we can avoid some of the byte-at-a time now by testing for some big chunks ++ tst r2,#8 ++ itt ne ++ subne r2,r2,#8 ++ stmiane r3!,{r1,r5} ++ ++5: ++ pop {r4,r5,r6,r7} ++ cbz r2, 10f ++ ++ @ Got to do any last < alignment bytes ++6: ++ subs r2,r2,#1 ++ strb r1,[r3],#1 ++ bne 6b ++ ++10: ++ bx lr @ goodbye +diff --git a/cortex-strings/sysdeps/arm/armv7/strchr.S b/cortex-strings/sysdeps/arm/armv7/strchr.S +new file mode 100644 +index 0000000..8875dbf +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/strchr.S +@@ -0,0 +1,76 @@ ++/* Copyright (c) 2010-2011, Linaro Limited ++ All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ * Neither the name of Linaro Limited nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ Written by Dave Gilbert ++ ++ A very simple strchr routine, from benchmarks on A9 it's a bit faster than ++ the current version in eglibc (2.12.1-0ubuntu14 package) ++ I don't think doing a word at a time version is worth it since a lot ++ of strchr cases are very short anyway */ ++ ++@ 2011-02-07 david.gilbert@linaro.org ++@ Extracted from local git a5b438d861 ++ ++ .syntax unified ++ .arch armv7-a ++ ++ .text ++ .thumb ++ ++@ --------------------------------------------------------------------------- ++ ++ .thumb_func ++ .align 2 ++ .p2align 4,,15 ++ .global strchr ++ .type strchr,%function ++strchr: ++ @ r0 = start of string ++ @ r1 = character to match ++ @ returns NULL for no match, or a pointer to the match ++ and r1,r1, #255 ++ ++1: ++ ldrb r2,[r0],#1 ++ cmp r2,r1 ++ cbz r2,10f ++ bne 1b ++ ++ @ We're here if it matched ++5: ++ subs r0,r0,#1 ++ bx lr ++ ++10: ++ @ We're here if we ran off the end ++ cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it ++ beq 5b @ A bit messy, if it's common we should branch at the start to a special loop ++ mov r0,#0 ++ bx lr +diff --git a/cortex-strings/sysdeps/arm/armv7/strcmp.c b/cortex-strings/sysdeps/arm/armv7/strcmp.c +new file mode 100644 +index 0000000..fb2280d +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/strcmp.c +@@ -0,0 +1,449 @@ ++/* ++ * Copyright (c) 2008 ARM Ltd ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. The name of the company may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include ++#include ++ ++#undef strcmp ++ ++ ++#ifdef __ARMEB__ ++#define SHFT2LSB "lsl" ++#define SHFT2MSB "lsr" ++#define MSB "0x000000ff" ++#define LSB "0xff000000" ++#else ++#define SHFT2LSB "lsr" ++#define SHFT2MSB "lsl" ++#define MSB "0xff000000" ++#define LSB "0x000000ff" ++#endif ++ ++#ifdef __thumb2__ ++#define magic1(REG) "#0x01010101" ++#define magic2(REG) "#0x80808080" ++#else ++#define magic1(REG) #REG ++#define magic2(REG) #REG ", lsl #7" ++#endif ++ ++int ++__attribute__((naked)) strcmp (const char* s1, const char* s2) ++{ ++ asm( ++#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ ++ (defined (__thumb__) && !defined (__thumb2__))) ++ "pld [r0, #0]\n\t" ++ "pld [r1, #0]\n\t" ++ "eor r2, r0, r1\n\t" ++ "tst r2, #3\n\t" ++ /* Strings not at same byte offset from a word boundary. */ ++ "bne strcmp_unaligned\n\t" ++ "ands r2, r0, #3\n\t" ++ "bic r0, r0, #3\n\t" ++ "bic r1, r1, #3\n\t" ++ "ldr ip, [r0], #4\n\t" ++ "it eq\n\t" ++ "ldreq r3, [r1], #4\n\t" ++ "beq 1f\n\t" ++ /* Although s1 and s2 have identical initial alignment, they are ++ not currently word aligned. Rather than comparing bytes, ++ make sure that any bytes fetched from before the addressed ++ bytes are forced to 0xff. Then they will always compare ++ equal. */ ++ "eor r2, r2, #3\n\t" ++ "lsl r2, r2, #3\n\t" ++ "mvn r3, #"MSB"\n\t" ++ SHFT2LSB" r2, r3, r2\n\t" ++ "ldr r3, [r1], #4\n\t" ++ "orr ip, ip, r2\n\t" ++ "orr r3, r3, r2\n" ++ "1:\n\t" ++#ifndef __thumb2__ ++ /* Load the 'magic' constant 0x01010101. */ ++ "str r4, [sp, #-4]!\n\t" ++ "mov r4, #1\n\t" ++ "orr r4, r4, r4, lsl #8\n\t" ++ "orr r4, r4, r4, lsl #16\n" ++#endif ++ ".p2align 2\n" ++ "4:\n\t" ++ "pld [r0, #8]\n\t" ++ "pld [r1, #8]\n\t" ++ "sub r2, ip, "magic1(r4)"\n\t" ++ "cmp ip, r3\n\t" ++ "itttt eq\n\t" ++ /* check for any zero bytes in first word */ ++ "biceq r2, r2, ip\n\t" ++ "tsteq r2, "magic2(r4)"\n\t" ++ "ldreq ip, [r0], #4\n\t" ++ "ldreq r3, [r1], #4\n\t" ++ "beq 4b\n" ++ "2:\n\t" ++ /* There's a zero or a different byte in the word */ ++ SHFT2MSB" r0, ip, #24\n\t" ++ SHFT2LSB" ip, ip, #8\n\t" ++ "cmp r0, #1\n\t" ++ "it cs\n\t" ++ "cmpcs r0, r3, "SHFT2MSB" #24\n\t" ++ "it eq\n\t" ++ SHFT2LSB"eq r3, r3, #8\n\t" ++ "beq 2b\n\t" ++ /* On a big-endian machine, r0 contains the desired byte in bits ++ 0-7; on a little-endian machine they are in bits 24-31. In ++ both cases the other bits in r0 are all zero. For r3 the ++ interesting byte is at the other end of the word, but the ++ other bits are not necessarily zero. We need a signed result ++ representing the differnece in the unsigned bytes, so for the ++ little-endian case we can't just shift the interesting bits ++ up. */ ++#ifdef __ARMEB__ ++ "sub r0, r0, r3, lsr #24\n\t" ++#else ++ "and r3, r3, #255\n\t" ++#ifdef __thumb2__ ++ /* No RSB instruction in Thumb2 */ ++ "lsr r0, r0, #24\n\t" ++ "sub r0, r0, r3\n\t" ++#else ++ "rsb r0, r3, r0, lsr #24\n\t" ++#endif ++#endif ++#ifndef __thumb2__ ++ "ldr r4, [sp], #4\n\t" ++#endif ++ "BX LR" ++#elif (defined (__thumb__) && !defined (__thumb2__)) ++ "1:\n\t" ++ "ldrb r2, [r0]\n\t" ++ "ldrb r3, [r1]\n\t" ++ "add r0, r0, #1\n\t" ++ "add r1, r1, #1\n\t" ++ "cmp r2, #0\n\t" ++ "beq 2f\n\t" ++ "cmp r2, r3\n\t" ++ "beq 1b\n\t" ++ "2:\n\t" ++ "sub r0, r2, r3\n\t" ++ "bx lr" ++#else ++ "3:\n\t" ++ "ldrb r2, [r0], #1\n\t" ++ "ldrb r3, [r1], #1\n\t" ++ "cmp r2, #1\n\t" ++ "it cs\n\t" ++ "cmpcs r2, r3\n\t" ++ "beq 3b\n\t" ++ "sub r0, r2, r3\n\t" ++ "BX LR" ++#endif ++ ); ++} ++ ++#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ ++ (defined (__thumb__) && !defined (__thumb2__))) ++static int __attribute__((naked, used)) ++strcmp_unaligned(const char* s1, const char* s2) ++{ ++#if 0 ++ /* The assembly code below is based on the following alogrithm. */ ++#ifdef __ARMEB__ ++#define RSHIFT << ++#define LSHIFT >> ++#else ++#define RSHIFT >> ++#define LSHIFT << ++#endif ++ ++#define body(shift) \ ++ mask = 0xffffffffU RSHIFT shift; \ ++ w1 = *wp1++; \ ++ w2 = *wp2++; \ ++ do \ ++ { \ ++ t1 = w1 & mask; \ ++ if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ ++ { \ ++ w2 RSHIFT= shift; \ ++ break; \ ++ } \ ++ if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \ ++ { \ ++ /* See comment in assembler below re syndrome on big-endian */\ ++ if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \ ++ w2 RSHIFT= shift; \ ++ else \ ++ { \ ++ w2 = *wp2; \ ++ t1 = w1 RSHIFT (32 - shift); \ ++ w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ ++ } \ ++ break; \ ++ } \ ++ w2 = *wp2++; \ ++ t1 ^= w1; \ ++ if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ ++ { \ ++ t1 = w1 >> (32 - shift); \ ++ w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ ++ break; \ ++ } \ ++ w1 = *wp1++; \ ++ } while (1) ++ ++ const unsigned* wp1; ++ const unsigned* wp2; ++ unsigned w1, w2; ++ unsigned mask; ++ unsigned shift; ++ unsigned b1 = 0x01010101; ++ char c1, c2; ++ unsigned t1; ++ ++ while (((unsigned) s1) & 3) ++ { ++ c1 = *s1++; ++ c2 = *s2++; ++ if (c1 == 0 || c1 != c2) ++ return c1 - (int)c2; ++ } ++ wp1 = (unsigned*) (((unsigned)s1) & ~3); ++ wp2 = (unsigned*) (((unsigned)s2) & ~3); ++ t1 = ((unsigned) s2) & 3; ++ if (t1 == 1) ++ { ++ body(8); ++ } ++ else if (t1 == 2) ++ { ++ body(16); ++ } ++ else ++ { ++ body (24); ++ } ++ ++ do ++ { ++#ifdef __ARMEB__ ++ c1 = (char) t1 >> 24; ++ c2 = (char) w2 >> 24; ++#else ++ c1 = (char) t1; ++ c2 = (char) w2; ++#endif ++ t1 RSHIFT= 8; ++ w2 RSHIFT= 8; ++ } while (c1 != 0 && c1 == c2); ++ return c1 - c2; ++#endif ++ ++ asm("wp1 .req r0\n\t" ++ "wp2 .req r1\n\t" ++ "b1 .req r2\n\t" ++ "w1 .req r4\n\t" ++ "w2 .req r5\n\t" ++ "t1 .req ip\n\t" ++ "@ r3 is scratch\n" ++ ++ /* First of all, compare bytes until wp1(sp1) is word-aligned. */ ++ "1:\n\t" ++ "tst wp1, #3\n\t" ++ "beq 2f\n\t" ++ "ldrb r2, [wp1], #1\n\t" ++ "ldrb r3, [wp2], #1\n\t" ++ "cmp r2, #1\n\t" ++ "it cs\n\t" ++ "cmpcs r2, r3\n\t" ++ "beq 1b\n\t" ++ "sub r0, r2, r3\n\t" ++ "BX LR\n" ++ ++ "2:\n\t" ++ "str r5, [sp, #-4]!\n\t" ++ "str r4, [sp, #-4]!\n\t" ++ // "stmfd sp!, {r4, r5}\n\t" ++ "mov b1, #1\n\t" ++ "orr b1, b1, b1, lsl #8\n\t" ++ "orr b1, b1, b1, lsl #16\n\t" ++ ++ "and t1, wp2, #3\n\t" ++ "bic wp2, wp2, #3\n\t" ++ "ldr w1, [wp1], #4\n\t" ++ "ldr w2, [wp2], #4\n\t" ++ "cmp t1, #2\n\t" ++ "beq 2f\n\t" ++ "bhi 3f\n" ++ ++ /* Critical inner Loop: Block with 3 bytes initial overlap */ ++ ".p2align 2\n" ++ "1:\n\t" ++ "bic t1, w1, #"MSB"\n\t" ++ "cmp t1, w2, "SHFT2LSB" #8\n\t" ++ "sub r3, w1, b1\n\t" ++ "bic r3, r3, w1\n\t" ++ "bne 4f\n\t" ++ "ands r3, r3, b1, lsl #7\n\t" ++ "it eq\n\t" ++ "ldreq w2, [wp2], #4\n\t" ++ "bne 5f\n\t" ++ "eor t1, t1, w1\n\t" ++ "cmp t1, w2, "SHFT2MSB" #24\n\t" ++ "bne 6f\n\t" ++ "ldr w1, [wp1], #4\n\t" ++ "b 1b\n" ++ "4:\n\t" ++ SHFT2LSB" w2, w2, #8\n\t" ++ "b 8f\n" ++ ++ "5:\n\t" ++#ifdef __ARMEB__ ++ /* The syndrome value may contain false ones if the string ends ++ with the bytes 0x01 0x00 */ ++ "tst w1, #0xff000000\n\t" ++ "itt ne\n\t" ++ "tstne w1, #0x00ff0000\n\t" ++ "tstne w1, #0x0000ff00\n\t" ++ "beq 7f\n\t" ++#else ++ "bics r3, r3, #0xff000000\n\t" ++ "bne 7f\n\t" ++#endif ++ "ldrb w2, [wp2]\n\t" ++ SHFT2LSB" t1, w1, #24\n\t" ++#ifdef __ARMEB__ ++ "lsl w2, w2, #24\n\t" ++#endif ++ "b 8f\n" ++ ++ "6:\n\t" ++ SHFT2LSB" t1, w1, #24\n\t" ++ "and w2, w2, #"LSB"\n\t" ++ "b 8f\n" ++ ++ /* Critical inner Loop: Block with 2 bytes initial overlap */ ++ ".p2align 2\n" ++ "2:\n\t" ++ SHFT2MSB" t1, w1, #16\n\t" ++ "sub r3, w1, b1\n\t" ++ SHFT2LSB" t1, t1, #16\n\t" ++ "bic r3, r3, w1\n\t" ++ "cmp t1, w2, "SHFT2LSB" #16\n\t" ++ "bne 4f\n\t" ++ "ands r3, r3, b1, lsl #7\n\t" ++ "it eq\n\t" ++ "ldreq w2, [wp2], #4\n\t" ++ "bne 5f\n\t" ++ "eor t1, t1, w1\n\t" ++ "cmp t1, w2, "SHFT2MSB" #16\n\t" ++ "bne 6f\n\t" ++ "ldr w1, [wp1], #4\n\t" ++ "b 2b\n" ++ ++ "5:\n\t" ++#ifdef __ARMEB__ ++ /* The syndrome value may contain false ones if the string ends ++ with the bytes 0x01 0x00 */ ++ "tst w1, #0xff000000\n\t" ++ "it ne\n\t" ++ "tstne w1, #0x00ff0000\n\t" ++ "beq 7f\n\t" ++#else ++ "lsls r3, r3, #16\n\t" ++ "bne 7f\n\t" ++#endif ++ "ldrh w2, [wp2]\n\t" ++ SHFT2LSB" t1, w1, #16\n\t" ++#ifdef __ARMEB__ ++ "lsl w2, w2, #16\n\t" ++#endif ++ "b 8f\n" ++ ++ "6:\n\t" ++ SHFT2MSB" w2, w2, #16\n\t" ++ SHFT2LSB" t1, w1, #16\n\t" ++ "4:\n\t" ++ SHFT2LSB" w2, w2, #16\n\t" ++ "b 8f\n\t" ++ ++ /* Critical inner Loop: Block with 1 byte initial overlap */ ++ ".p2align 2\n" ++ "3:\n\t" ++ "and t1, w1, #"LSB"\n\t" ++ "cmp t1, w2, "SHFT2LSB" #24\n\t" ++ "sub r3, w1, b1\n\t" ++ "bic r3, r3, w1\n\t" ++ "bne 4f\n\t" ++ "ands r3, r3, b1, lsl #7\n\t" ++ "it eq\n\t" ++ "ldreq w2, [wp2], #4\n\t" ++ "bne 5f\n\t" ++ "eor t1, t1, w1\n\t" ++ "cmp t1, w2, "SHFT2MSB" #8\n\t" ++ "bne 6f\n\t" ++ "ldr w1, [wp1], #4\n\t" ++ "b 3b\n" ++ "4:\n\t" ++ SHFT2LSB" w2, w2, #24\n\t" ++ "b 8f\n" ++ "5:\n\t" ++ /* The syndrome value may contain false ones if the string ends ++ with the bytes 0x01 0x00 */ ++ "tst w1, #"LSB"\n\t" ++ "beq 7f\n\t" ++ "ldr w2, [wp2], #4\n" ++ "6:\n\t" ++ SHFT2LSB" t1, w1, #8\n\t" ++ "bic w2, w2, #"MSB"\n\t" ++ "b 8f\n" ++ "7:\n\t" ++ "mov r0, #0\n\t" ++ // "ldmfd sp!, {r4, r5}\n\t" ++ "ldr r4, [sp], #4\n\t" ++ "ldr r5, [sp], #4\n\t" ++ "BX LR\n" ++ "8:\n\t" ++ "and r2, t1, #"LSB"\n\t" ++ "and r0, w2, #"LSB"\n\t" ++ "cmp r0, #1\n\t" ++ "it cs\n\t" ++ "cmpcs r0, r2\n\t" ++ "itt eq\n\t" ++ SHFT2LSB"eq t1, t1, #8\n\t" ++ SHFT2LSB"eq w2, w2, #8\n\t" ++ "beq 8b\n\t" ++ "sub r0, r2, r0\n\t" ++ // "ldmfd sp!, {r4, r5}\n\t" ++ "ldr r4, [sp], #4\n\t" ++ "ldr r5, [sp], #4\n\t" ++ "BX LR"); ++} ++ ++#endif ++ ++libc_hidden_builtin_def (strcmp) +diff --git a/cortex-strings/sysdeps/arm/armv7/strcpy.c b/cortex-strings/sysdeps/arm/armv7/strcpy.c +new file mode 100644 +index 0000000..aa8cb06 +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/strcpy.c +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (c) 2008 ARM Ltd ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. The name of the company may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include ++#include ++ ++#undef strcmp ++ ++ ++#ifdef __thumb2__ ++#define magic1(REG) "#0x01010101" ++#define magic2(REG) "#0x80808080" ++#else ++#define magic1(REG) #REG ++#define magic2(REG) #REG ", lsl #7" ++#endif ++ ++char* __attribute__((naked)) ++strcpy (char* dst, const char* src) ++{ ++ asm ( ++#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ ++ (defined (__thumb__) && !defined (__thumb2__))) ++ "pld [r1, #0]\n\t" ++ "eor r2, r0, r1\n\t" ++ "mov ip, r0\n\t" ++ "tst r2, #3\n\t" ++ "bne 4f\n\t" ++ "tst r1, #3\n\t" ++ "bne 3f\n" ++ "5:\n\t" ++#ifndef __thumb2__ ++ "str r5, [sp, #-4]!\n\t" ++ "mov r5, #0x01\n\t" ++ "orr r5, r5, r5, lsl #8\n\t" ++ "orr r5, r5, r5, lsl #16\n\t" ++#endif ++ ++ "str r4, [sp, #-4]!\n\t" ++ "tst r1, #4\n\t" ++ "ldr r3, [r1], #4\n\t" ++ "beq 2f\n\t" ++ "sub r2, r3, "magic1(r5)"\n\t" ++ "bics r2, r2, r3\n\t" ++ "tst r2, "magic2(r5)"\n\t" ++ "itt eq\n\t" ++ "streq r3, [ip], #4\n\t" ++ "ldreq r3, [r1], #4\n" ++ "bne 1f\n\t" ++ /* Inner loop. We now know that r1 is 64-bit aligned, so we ++ can safely fetch up to two words. This allows us to avoid ++ load stalls. */ ++ ".p2align 2\n" ++ "2:\n\t" ++ "pld [r1, #8]\n\t" ++ "ldr r4, [r1], #4\n\t" ++ "sub r2, r3, "magic1(r5)"\n\t" ++ "bics r2, r2, r3\n\t" ++ "tst r2, "magic2(r5)"\n\t" ++ "sub r2, r4, "magic1(r5)"\n\t" ++ "bne 1f\n\t" ++ "str r3, [ip], #4\n\t" ++ "bics r2, r2, r4\n\t" ++ "tst r2, "magic2(r5)"\n\t" ++ "itt eq\n\t" ++ "ldreq r3, [r1], #4\n\t" ++ "streq r4, [ip], #4\n\t" ++ "beq 2b\n\t" ++ "mov r3, r4\n" ++ "1:\n\t" ++#ifdef __ARMEB__ ++ "rors r3, r3, #24\n\t" ++#endif ++ "strb r3, [ip], #1\n\t" ++ "tst r3, #0xff\n\t" ++#ifdef __ARMEL__ ++ "ror r3, r3, #8\n\t" ++#endif ++ "bne 1b\n\t" ++ "ldr r4, [sp], #4\n\t" ++#ifndef __thumb2__ ++ "ldr r5, [sp], #4\n\t" ++#endif ++ "BX LR\n" ++ ++ /* Strings have the same offset from word alignment, but it's ++ not zero. */ ++ "3:\n\t" ++ "tst r1, #1\n\t" ++ "beq 1f\n\t" ++ "ldrb r2, [r1], #1\n\t" ++ "strb r2, [ip], #1\n\t" ++ "cmp r2, #0\n\t" ++ "it eq\n" ++ "BXEQ LR\n" ++ "1:\n\t" ++ "tst r1, #2\n\t" ++ "beq 5b\n\t" ++ "ldrh r2, [r1], #2\n\t" ++#ifdef __ARMEB__ ++ "tst r2, #0xff00\n\t" ++ "iteet ne\n\t" ++ "strneh r2, [ip], #2\n\t" ++ "lsreq r2, r2, #8\n\t" ++ "streqb r2, [ip]\n\t" ++ "tstne r2, #0xff\n\t" ++#else ++ "tst r2, #0xff\n\t" ++ "itet ne\n\t" ++ "strneh r2, [ip], #2\n\t" ++ "streqb r2, [ip]\n\t" ++ "tstne r2, #0xff00\n\t" ++#endif ++ "bne 5b\n\t" ++ "BX LR\n" ++ ++ /* src and dst do not have a common word-alignement. Fall back to ++ byte copying. */ ++ "4:\n\t" ++ "ldrb r2, [r1], #1\n\t" ++ "strb r2, [ip], #1\n\t" ++ "cmp r2, #0\n\t" ++ "bne 4b\n\t" ++ "BX LR" ++ ++#elif !defined (__thumb__) || defined (__thumb2__) ++ "mov r3, r0\n\t" ++ "1:\n\t" ++ "ldrb r2, [r1], #1\n\t" ++ "strb r2, [r3], #1\n\t" ++ "cmp r2, #0\n\t" ++ "bne 1b\n\t" ++ "BX LR" ++#else ++ "mov r3, r0\n\t" ++ "1:\n\t" ++ "ldrb r2, [r1]\n\t" ++ "add r1, r1, #1\n\t" ++ "strb r2, [r3]\n\t" ++ "add r3, r3, #1\n\t" ++ "cmp r2, #0\n\t" ++ "bne 1b\n\t" ++ "BX LR" ++#endif ++ ); ++} ++libc_hidden_builtin_def (strcpy) +diff --git a/cortex-strings/sysdeps/arm/armv7/strlen.S b/cortex-strings/sysdeps/arm/armv7/strlen.S +new file mode 100644 +index 0000000..125e92f +--- /dev/null ++++ b/cortex-strings/sysdeps/arm/armv7/strlen.S +@@ -0,0 +1,111 @@ ++/* Copyright (c) 2010-2011, Linaro Limited ++ All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ * Neither the name of Linaro Limited nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ Written by Dave Gilbert ++ ++ This strlen routine is optimised on a Cortex-A9 and should work on ++ all ARMv7 processors. This routine is reasonably fast for short ++ strings, but is probably slower than a simple implementation if all ++ your strings are very short */ ++ ++@ 2011-02-08 david.gilbert@linaro.org ++@ Extracted from local git 6848613a ++ ++ ++@ this lets us check a flag in a 00/ff byte easily in either endianness ++#ifdef __ARMEB__ ++#define CHARTSTMASK(c) 1<<(31-(c*8)) ++#else ++#define CHARTSTMASK(c) 1<<(c*8) ++#endif ++ ++@----------------------------------------------------------------------------------------------------------------------------- ++ .syntax unified ++ .arch armv7-a ++ ++ .thumb_func ++ .align 2 ++ .p2align 4,,15 ++ .global strlen ++ .type strlen,%function ++strlen: ++ @ r0 = string ++ @ returns count of bytes in string not including terminator ++ mov r1, r0 ++ push { r4,r6 } ++ mvns r6, #0 @ all F ++ movs r4, #0 ++ tst r0, #7 ++ beq 2f ++ ++1: ++ ldrb r2, [r1], #1 ++ tst r1, #7 @ Hit alignment yet? ++ cbz r2, 10f @ Exit if we found the 0 ++ bne 1b ++ ++ @ So we're now aligned ++2: ++ ldmia r1!,{r2,r3} ++ uadd8 r2, r2, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 ++ sel r2, r4, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION ++ uadd8 r3, r3, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 ++ sel r3, r2, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION ++ cmp r3, #0 ++ beq 2b ++ ++strlenendtmp: ++ @ One (or more) of the bytes we loaded was 0 - but which one? ++ @ r2 has the mask corresponding to the first loaded word ++ @ r3 has a combined mask of the two words - but if r2 was all-non 0 ++ @ then it's just the 2nd words ++ cmp r2, #0 ++ itte eq ++ moveq r2, r3 @ the end is in the 2nd word ++ subeq r1,r1,#3 ++ subne r1,r1,#7 ++ ++ @ r1 currently points to the 2nd byte of the word containing the 0 ++ tst r2, # CHARTSTMASK(0) @ 1st character ++ bne 10f ++ adds r1,r1,#1 ++ tst r2, # CHARTSTMASK(1) @ 2nd character ++ ittt eq ++ addeq r1,r1,#1 ++ tsteq r2, # (3<<15) @ 2nd & 3rd character ++ @ If not the 3rd must be the last one ++ addeq r1,r1,#1 ++ ++10: ++ @ r0 is still at the beginning, r1 is pointing 1 byte after the terminator ++ sub r0, r1, r0 ++ subs r0, r0, #1 ++ pop { r4, r6 } ++ bx lr diff --git a/patches/glibc-2.16.0/0301-add-libc_hidden_builtin_def-for-all-cortex-functions.patch b/patches/glibc-2.16.0/0301-add-libc_hidden_builtin_def-for-all-cortex-functions.patch new file mode 100644 index 0000000..be6993d --- /dev/null +++ b/patches/glibc-2.16.0/0301-add-libc_hidden_builtin_def-for-all-cortex-functions.patch @@ -0,0 +1,76 @@ +From: Michael Olbrich +Date: Thu, 15 Sep 2011 23:30:25 +0200 +Subject: [PATCH] add libc_hidden_builtin_def for all cortex functions + +Signed-off-by: Michael Olbrich +--- + cortex-strings/sysdeps/arm/armv7/memchr.S | 3 +++ + cortex-strings/sysdeps/arm/armv7/memcpy.S | 2 ++ + cortex-strings/sysdeps/arm/armv7/memset.S | 2 ++ + cortex-strings/sysdeps/arm/armv7/strchr.S | 3 +++ + cortex-strings/sysdeps/arm/armv7/strcpy.c | 1 + + cortex-strings/sysdeps/arm/armv7/strlen.S | 2 ++ + 6 files changed, 13 insertions(+) + +diff --git a/cortex-strings/sysdeps/arm/armv7/memchr.S b/cortex-strings/sysdeps/arm/armv7/memchr.S +index 8f5aaa9..6d497cb 100644 +--- a/cortex-strings/sysdeps/arm/armv7/memchr.S ++++ b/cortex-strings/sysdeps/arm/armv7/memchr.S +@@ -148,3 +148,6 @@ memchr: + pop {r4,r5,r6,r7} + subs r0,r0,#1 + bx lr ++ ++strong_alias (memchr, __memchr) ++libc_hidden_builtin_def (memchr) +diff --git a/cortex-strings/sysdeps/arm/armv7/memcpy.S b/cortex-strings/sysdeps/arm/armv7/memcpy.S +index 3be24ca..c274207 100644 +--- a/cortex-strings/sysdeps/arm/armv7/memcpy.S ++++ b/cortex-strings/sysdeps/arm/armv7/memcpy.S +@@ -150,3 +150,5 @@ memcpy: + cmp r2,#32 + blt 10b + b 4b ++ ++libc_hidden_builtin_def (memcpy) +diff --git a/cortex-strings/sysdeps/arm/armv7/memset.S b/cortex-strings/sysdeps/arm/armv7/memset.S +index 921cb75..d4c12a4 100644 +--- a/cortex-strings/sysdeps/arm/armv7/memset.S ++++ b/cortex-strings/sysdeps/arm/armv7/memset.S +@@ -116,3 +116,5 @@ memset: + + 10: + bx lr @ goodbye ++ ++libc_hidden_builtin_def (memset) +diff --git a/cortex-strings/sysdeps/arm/armv7/strchr.S b/cortex-strings/sysdeps/arm/armv7/strchr.S +index 8875dbf..05c832f 100644 +--- a/cortex-strings/sysdeps/arm/armv7/strchr.S ++++ b/cortex-strings/sysdeps/arm/armv7/strchr.S +@@ -74,3 +74,6 @@ strchr: + beq 5b @ A bit messy, if it's common we should branch at the start to a special loop + mov r0,#0 + bx lr ++ ++weak_alias (strchr, index) ++libc_hidden_builtin_def (strchr) +diff --git a/cortex-strings/sysdeps/arm/armv7/strcpy.c b/cortex-strings/sysdeps/arm/armv7/strcpy.c +index aa8cb06..3bbaa86 100644 +--- a/cortex-strings/sysdeps/arm/armv7/strcpy.c ++++ b/cortex-strings/sysdeps/arm/armv7/strcpy.c +@@ -169,4 +169,5 @@ strcpy (char* dst, const char* src) + #endif + ); + } ++ + libc_hidden_builtin_def (strcpy) +diff --git a/cortex-strings/sysdeps/arm/armv7/strlen.S b/cortex-strings/sysdeps/arm/armv7/strlen.S +index 125e92f..a1e02ad 100644 +--- a/cortex-strings/sysdeps/arm/armv7/strlen.S ++++ b/cortex-strings/sysdeps/arm/armv7/strlen.S +@@ -109,3 +109,5 @@ strlenendtmp: + subs r0, r0, #1 + pop { r4, r6 } + bx lr ++ ++libc_hidden_builtin_def (strlen) diff --git a/patches/glibc-2.16.0/series b/patches/glibc-2.16.0/series index bfa7e1f..b70db9b 100644 --- a/patches/glibc-2.16.0/series +++ b/patches/glibc-2.16.0/series @@ -1,13 +1,13 @@ # generated by git-ptx-patches #tag:base --start-number 1 +#tag:build-system --start-number 1 0001-add-install-lib-all-target.patch 0002-don-t-regen-docs-if-perl-is-not-found.patch -0003-i386-x86_64-revert-clone-cfi.patch -0004-queue-header-updates.patch -0005-localedef-fix-trampoline.patch -0006-resolv-dynamic.patch -0007-section-comments.patch -0008-fpscr-values.patch -0009-optimized-string-functions-for-NEON-from-Linaro.patch -0010-add-libc_hidden_builtin_def-for-all-cortex-functions.patch -# 836d22209095126bf71de80d57df7f6b - git-ptx-patches magic +#tag:debian --start-number 100 +0100-Fix-localedef-segfault-when-run-under-exec-shield-Pa.patch +#tag:gentoo --start-number 200 +0200-resolv-dynamic.patch +#tag:linaro --start-number 300 +0300-optimized-string-functions-for-NEON-from-Linaro.patch +0301-add-libc_hidden_builtin_def-for-all-cortex-functions.patch +# a4ec378a1364c1c28fb1ae34aab4f545 - git-ptx-patches magic -- cgit v1.2.3