summaryrefslogtreecommitdiffstats
path: root/patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch
diff options
context:
space:
mode:
Diffstat (limited to 'patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch')
-rw-r--r--patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch1295
1 files changed, 0 insertions, 1295 deletions
diff --git a/patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch b/patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch
deleted file mode 100644
index 3e56ec8..0000000
--- a/patches/glibc-2.13/0024-optimized-string-functions-for-NEON-from-Linaro.patch
+++ /dev/null
@@ -1,1295 +0,0 @@
-From: Michael Olbrich <m.olbrich@pengutronix.de>
-Date: Thu, 15 Sep 2011 16:50:56 +0200
-Subject: [PATCH] optimized string functions for NEON from Linaro
-
-Signed-off-by: Michael Olbrich <m.olbrich@pengutronix.de>
----
- .../sysdeps/arm/eabi/arm/cortex-a8/memchr.S | 150 +++++++
- .../sysdeps/arm/eabi/arm/cortex-a8/memcpy.S | 152 +++++++
- .../sysdeps/arm/eabi/arm/cortex-a8/memset.S | 118 +++++
- .../sysdeps/arm/eabi/arm/cortex-a8/strchr.S | 76 ++++
- .../sysdeps/arm/eabi/arm/cortex-a8/strcmp.c | 449 ++++++++++++++++++++
- .../sysdeps/arm/eabi/arm/cortex-a8/strcpy.c | 172 ++++++++
- .../sysdeps/arm/eabi/arm/cortex-a8/strlen.S | 111 +++++
- 7 files changed, 1228 insertions(+), 0 deletions(-)
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memchr.S
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memcpy.S
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memset.S
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strchr.S
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcmp.c
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcpy.c
- create mode 100644 cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strlen.S
-
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memchr.S b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memchr.S
-new file mode 100644
-index 0000000..8f5aaa9
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memchr.S
-@@ -0,0 +1,150 @@
-+/* Copyright (c) 2010-2011, Linaro Limited
-+ All rights reserved.
-+
-+ Redistribution and use in source and binary forms, with or without
-+ modification, are permitted provided that the following conditions
-+ are met:
-+
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+
-+ * Neither the name of Linaro Limited nor the names of its
-+ contributors may be used to endorse or promote products derived
-+ from this software without specific prior written permission.
-+
-+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+ Written by Dave Gilbert <david.gilbert@linaro.org>
-+
-+ This memchr routine is optimised on a Cortex-A9 and should work on
-+ all ARMv7 processors. It has a fast past for short sizes, and has
-+ an optimised path for large data sets; the worst case is finding the
-+ match early in a large data set. */
-+
-+@ 2011-02-07 david.gilbert@linaro.org
-+@ Extracted from local git a5b438d861
-+@ 2011-07-14 david.gilbert@linaro.org
-+@ Import endianness fix from local git ea786f1b
-+
-+ .syntax unified
-+ .arch armv7-a
-+
-+@ this lets us check a flag in a 00/ff byte easily in either endianness
-+#ifdef __ARMEB__
-+#define CHARTSTMASK(c) 1<<(31-(c*8))
-+#else
-+#define CHARTSTMASK(c) 1<<(c*8)
-+#endif
-+ .text
-+ .thumb
-+
-+@ ---------------------------------------------------------------------------
-+ .thumb_func
-+ .align 2
-+ .p2align 4,,15
-+ .global memchr
-+ .type memchr,%function
-+memchr:
-+ @ r0 = start of memory to scan
-+ @ r1 = character to look for
-+ @ r2 = length
-+ @ returns r0 = pointer to character or NULL if not found
-+ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
-+
-+ cmp r2,#16 @ If it's short don't bother with anything clever
-+ blt 20f
-+
-+ tst r0, #7 @ If it's already aligned skip the next bit
-+ beq 10f
-+
-+ @ Work up to an aligned point
-+5:
-+ ldrb r3, [r0],#1
-+ subs r2, r2, #1
-+ cmp r3, r1
-+ beq 50f @ If it matches exit found
-+ tst r0, #7
-+ cbz r2, 40f @ If we run off the end, exit not found
-+ bne 5b @ If not aligned yet then do next byte
-+
-+10:
-+ @ At this point, we are aligned, we know we have at least 8 bytes to work with
-+ push {r4,r5,r6,r7}
-+ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
-+ orr r1, r1, r1, lsl #16
-+ bic r4, r2, #7 @ Number of double words to work with
-+ mvns r7, #0 @ all F's
-+ movs r3, #0
-+
-+15:
-+ ldmia r0!,{r5,r6}
-+ subs r4, r4, #8
-+ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
-+ eor r6,r6, r1
-+ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
-+ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
-+ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
-+ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
-+ cbnz r6, 60f
-+ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
-+
-+ pop {r4,r5,r6,r7}
-+ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
-+ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
-+
-+20:
-+ cbz r2, 40f @ 0 length or hit the end already then not found
-+
-+21: @ Post aligned section, or just a short call
-+ ldrb r3,[r0],#1
-+ subs r2,r2,#1
-+ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
-+ cbz r3, 50f
-+ bne 21b @ on r2 flags
-+
-+40:
-+ movs r0,#0 @ not found
-+ bx lr
-+
-+50:
-+ subs r0,r0,#1 @ found
-+ bx lr
-+
-+60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
-+ @ r0 points to the start of the double word after the one that was tested
-+ @ r5 has the 00/ff pattern for the first word, r6 has the chained value
-+ cmp r5, #0
-+ itte eq
-+ moveq r5, r6 @ the end is in the 2nd word
-+ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
-+ subne r0,r0,#7 @ or 2nd byte of 1st word
-+
-+ @ r0 currently points to the 3rd byte of the word containing the hit
-+ tst r5, # CHARTSTMASK(0) @ 1st character
-+ bne 61f
-+ adds r0,r0,#1
-+ tst r5, # CHARTSTMASK(1) @ 2nd character
-+ ittt eq
-+ addeq r0,r0,#1
-+ tsteq r5, # (3<<15) @ 2nd & 3rd character
-+ @ If not the 3rd must be the last one
-+ addeq r0,r0,#1
-+
-+61:
-+ pop {r4,r5,r6,r7}
-+ subs r0,r0,#1
-+ bx lr
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memcpy.S b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memcpy.S
-new file mode 100644
-index 0000000..3be24ca
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memcpy.S
-@@ -0,0 +1,152 @@
-+/* Copyright (c) 2010-2011, Linaro Limited
-+ All rights reserved.
-+
-+ Redistribution and use in source and binary forms, with or without
-+ modification, are permitted provided that the following conditions
-+ are met:
-+
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+
-+ * Neither the name of Linaro Limited nor the names of its
-+ contributors may be used to endorse or promote products derived
-+ from this software without specific prior written permission.
-+
-+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+ Written by Dave Gilbert <david.gilbert@linaro.org>
-+
-+ This memcpy routine is optimised on a Cortex-A9 and should work on
-+ all ARMv7 processors with NEON. */
-+
-+@ 2011-09-01 david.gilbert@linaro.org
-+@ Extracted from local git 2f11b436
-+
-+ .syntax unified
-+ .arch armv7-a
-+
-+@ this lets us check a flag in a 00/ff byte easily in either endianness
-+#ifdef __ARMEB__
-+#define CHARTSTMASK(c) 1<<(31-(c*8))
-+#else
-+#define CHARTSTMASK(c) 1<<(c*8)
-+#endif
-+ .text
-+ .thumb
-+
-+@ ---------------------------------------------------------------------------
-+ .thumb_func
-+ .align 2
-+ .p2align 4,,15
-+ .global memcpy
-+ .type memcpy,%function
-+memcpy:
-+ @ r0 = dest
-+ @ r1 = source
-+ @ r2 = count
-+ @ returns dest in r0
-+ @ Overlaps of source/dest not allowed according to spec
-+ @ Note this routine relies on v7 misaligned loads/stores
-+ pld [r1]
-+ mov r12, r0 @ stash original r0
-+ cmp r2,#32
-+ blt 10f @ take the small copy case separately
-+
-+ @ test for either source or destination being misaligned
-+ @ (We only rely on word align)
-+ tst r0,#3
-+ it eq
-+ tsteq r1,#3
-+ bne 30f @ misaligned case
-+
-+4:
-+ @ at this point we are word (or better) aligned and have at least
-+ @ 32 bytes to play with
-+
-+ @ If it's a huge copy, try Neon
-+ cmp r2, #128*1024
-+ bge 35f @ Sharing general non-aligned case here, aligned could be faster
-+
-+ push {r3,r4,r5,r6,r7,r8,r10,r11}
-+5:
-+ ldmia r1!,{r3,r4,r5,r6,r7,r8,r10,r11}
-+ sub r2,r2,#32
-+ pld [r1,#96]
-+ cmp r2,#32
-+ stmia r0!,{r3,r4,r5,r6,r7,r8,r10,r11}
-+ bge 5b
-+
-+ pop {r3,r4,r5,r6,r7,r8,r10,r11}
-+ @ We are now down to less than 32 bytes
-+ cbz r2,15f @ quick exit for the case where we copied a multiple of 32
-+
-+10: @ small copies (not necessarily aligned - note might be slightly more than 32bytes)
-+ cmp r2,#4
-+ blt 12f
-+11:
-+ sub r2,r2,#4
-+ cmp r2,#4
-+ ldr r3, [r1],#4
-+ str r3, [r0],#4
-+ bge 11b
-+12:
-+ tst r2,#2
-+ itt ne
-+ ldrhne r3, [r1],#2
-+ strhne r3, [r0],#2
-+
-+ tst r2,#1
-+ itt ne
-+ ldrbne r3, [r1],#1
-+ strbne r3, [r0],#1
-+
-+15: @ exit
-+ mov r0,r12 @ restore r0
-+ bx lr
-+
-+ .align 2
-+ .p2align 4,,15
-+30: @ non-aligned - at least 32 bytes to play with
-+ @ Test for co-misalignment
-+ eor r3, r0, r1
-+ tst r3,#3
-+ beq 50f
-+
-+ @ Use Neon for misaligned
-+35:
-+ vld1.8 {d0,d1,d2,d3}, [r1]!
-+ sub r2,r2,#32
-+ cmp r2,#32
-+ pld [r1,#96]
-+ vst1.8 {d0,d1,d2,d3}, [r0]!
-+ bge 35b
-+ b 10b @ TODO: Probably a bad idea to switch to ARM at this point
-+
-+ .align 2
-+ .p2align 4,,15
-+50: @ Co-misaligned
-+ @ At this point we've got at least 32 bytes
-+51:
-+ ldrb r3,[r1],#1
-+ sub r2,r2,#1
-+ strb r3,[r0],#1
-+ tst r0,#7
-+ bne 51b
-+
-+ cmp r2,#32
-+ blt 10b
-+ b 4b
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memset.S b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memset.S
-new file mode 100644
-index 0000000..921cb75
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/memset.S
-@@ -0,0 +1,118 @@
-+/* Copyright (c) 2010-2011, Linaro Limited
-+ All rights reserved.
-+
-+ Redistribution and use in source and binary forms, with or without
-+ modification, are permitted provided that the following conditions
-+ are met:
-+
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+
-+ * Neither the name of Linaro Limited nor the names of its
-+ contributors may be used to endorse or promote products derived
-+ from this software without specific prior written permission.
-+
-+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+ Written by Dave Gilbert <david.gilbert@linaro.org>
-+
-+ This memset routine is optimised on a Cortex-A9 and should work on
-+ all ARMv7 processors. */
-+
-+ .syntax unified
-+ .arch armv7-a
-+
-+@ 2011-08-30 david.gilbert@linaro.org
-+@ Extracted from local git 2f11b436
-+
-+@ this lets us check a flag in a 00/ff byte easily in either endianness
-+#ifdef __ARMEB__
-+#define CHARTSTMASK(c) 1<<(31-(c*8))
-+#else
-+#define CHARTSTMASK(c) 1<<(c*8)
-+#endif
-+ .text
-+ .thumb
-+
-+@ ---------------------------------------------------------------------------
-+ .thumb_func
-+ .align 2
-+ .p2align 4,,15
-+ .global memset
-+ .type memset,%function
-+memset:
-+ @ r0 = address
-+ @ r1 = character
-+ @ r2 = count
-+ @ returns original address in r0
-+
-+ mov r3, r0 @ Leave r0 alone
-+ cbz r2, 10f @ Exit if 0 length
-+
-+ tst r0, #7
-+ beq 2f @ Already aligned
-+
-+ @ Ok, so we're misaligned here
-+1:
-+ strb r1, [r3], #1
-+ subs r2,r2,#1
-+ tst r3, #7
-+ cbz r2, 10f @ Exit if we hit the end
-+ bne 1b @ go round again if still misaligned
-+
-+2:
-+ @ OK, so we're aligned
-+ push {r4,r5,r6,r7}
-+ bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
-+ beq 5f
-+
-+3:
-+ @ POSIX says that ch is cast to an unsigned char. A uxtb is one
-+ @ byte and takes two cycles, where an AND is four bytes but one
-+ @ cycle.
-+ and r1, #0xFF
-+ orr r1, r1, r1, lsl#8 @ Same character into all bytes
-+ orr r1, r1, r1, lsl#16
-+ mov r5,r1
-+ mov r6,r1
-+ mov r7,r1
-+
-+4:
-+ subs r4,r4,#16
-+ stmia r3!,{r1,r5,r6,r7}
-+ bne 4b
-+ and r2,r2,#15
-+
-+ @ At this point we're still aligned and we have upto align-1 bytes left to right
-+ @ we can avoid some of the byte-at-a time now by testing for some big chunks
-+ tst r2,#8
-+ itt ne
-+ subne r2,r2,#8
-+ stmiane r3!,{r1,r5}
-+
-+5:
-+ pop {r4,r5,r6,r7}
-+ cbz r2, 10f
-+
-+ @ Got to do any last < alignment bytes
-+6:
-+ subs r2,r2,#1
-+ strb r1,[r3],#1
-+ bne 6b
-+
-+10:
-+ bx lr @ goodbye
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strchr.S b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strchr.S
-new file mode 100644
-index 0000000..8875dbf
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strchr.S
-@@ -0,0 +1,76 @@
-+/* Copyright (c) 2010-2011, Linaro Limited
-+ All rights reserved.
-+
-+ Redistribution and use in source and binary forms, with or without
-+ modification, are permitted provided that the following conditions
-+ are met:
-+
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+
-+ * Neither the name of Linaro Limited nor the names of its
-+ contributors may be used to endorse or promote products derived
-+ from this software without specific prior written permission.
-+
-+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+ Written by Dave Gilbert <david.gilbert@linaro.org>
-+
-+ A very simple strchr routine, from benchmarks on A9 it's a bit faster than
-+ the current version in eglibc (2.12.1-0ubuntu14 package)
-+ I don't think doing a word at a time version is worth it since a lot
-+ of strchr cases are very short anyway */
-+
-+@ 2011-02-07 david.gilbert@linaro.org
-+@ Extracted from local git a5b438d861
-+
-+ .syntax unified
-+ .arch armv7-a
-+
-+ .text
-+ .thumb
-+
-+@ ---------------------------------------------------------------------------
-+
-+ .thumb_func
-+ .align 2
-+ .p2align 4,,15
-+ .global strchr
-+ .type strchr,%function
-+strchr:
-+ @ r0 = start of string
-+ @ r1 = character to match
-+ @ returns NULL for no match, or a pointer to the match
-+ and r1,r1, #255
-+
-+1:
-+ ldrb r2,[r0],#1
-+ cmp r2,r1
-+ cbz r2,10f
-+ bne 1b
-+
-+ @ We're here if it matched
-+5:
-+ subs r0,r0,#1
-+ bx lr
-+
-+10:
-+ @ We're here if we ran off the end
-+ cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it
-+ beq 5b @ A bit messy, if it's common we should branch at the start to a special loop
-+ mov r0,#0
-+ bx lr
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcmp.c b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcmp.c
-new file mode 100644
-index 0000000..fb2280d
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcmp.c
-@@ -0,0 +1,449 @@
-+/*
-+ * Copyright (c) 2008 ARM Ltd
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The name of the company may not be used to endorse or promote
-+ * products derived from this software without specific prior written
-+ * permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
-+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ */
-+
-+#include <string.h>
-+#include <memcopy.h>
-+
-+#undef strcmp
-+
-+
-+#ifdef __ARMEB__
-+#define SHFT2LSB "lsl"
-+#define SHFT2MSB "lsr"
-+#define MSB "0x000000ff"
-+#define LSB "0xff000000"
-+#else
-+#define SHFT2LSB "lsr"
-+#define SHFT2MSB "lsl"
-+#define MSB "0xff000000"
-+#define LSB "0x000000ff"
-+#endif
-+
-+#ifdef __thumb2__
-+#define magic1(REG) "#0x01010101"
-+#define magic2(REG) "#0x80808080"
-+#else
-+#define magic1(REG) #REG
-+#define magic2(REG) #REG ", lsl #7"
-+#endif
-+
-+int
-+__attribute__((naked)) strcmp (const char* s1, const char* s2)
-+{
-+ asm(
-+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
-+ (defined (__thumb__) && !defined (__thumb2__)))
-+ "pld [r0, #0]\n\t"
-+ "pld [r1, #0]\n\t"
-+ "eor r2, r0, r1\n\t"
-+ "tst r2, #3\n\t"
-+ /* Strings not at same byte offset from a word boundary. */
-+ "bne strcmp_unaligned\n\t"
-+ "ands r2, r0, #3\n\t"
-+ "bic r0, r0, #3\n\t"
-+ "bic r1, r1, #3\n\t"
-+ "ldr ip, [r0], #4\n\t"
-+ "it eq\n\t"
-+ "ldreq r3, [r1], #4\n\t"
-+ "beq 1f\n\t"
-+ /* Although s1 and s2 have identical initial alignment, they are
-+ not currently word aligned. Rather than comparing bytes,
-+ make sure that any bytes fetched from before the addressed
-+ bytes are forced to 0xff. Then they will always compare
-+ equal. */
-+ "eor r2, r2, #3\n\t"
-+ "lsl r2, r2, #3\n\t"
-+ "mvn r3, #"MSB"\n\t"
-+ SHFT2LSB" r2, r3, r2\n\t"
-+ "ldr r3, [r1], #4\n\t"
-+ "orr ip, ip, r2\n\t"
-+ "orr r3, r3, r2\n"
-+ "1:\n\t"
-+#ifndef __thumb2__
-+ /* Load the 'magic' constant 0x01010101. */
-+ "str r4, [sp, #-4]!\n\t"
-+ "mov r4, #1\n\t"
-+ "orr r4, r4, r4, lsl #8\n\t"
-+ "orr r4, r4, r4, lsl #16\n"
-+#endif
-+ ".p2align 2\n"
-+ "4:\n\t"
-+ "pld [r0, #8]\n\t"
-+ "pld [r1, #8]\n\t"
-+ "sub r2, ip, "magic1(r4)"\n\t"
-+ "cmp ip, r3\n\t"
-+ "itttt eq\n\t"
-+ /* check for any zero bytes in first word */
-+ "biceq r2, r2, ip\n\t"
-+ "tsteq r2, "magic2(r4)"\n\t"
-+ "ldreq ip, [r0], #4\n\t"
-+ "ldreq r3, [r1], #4\n\t"
-+ "beq 4b\n"
-+ "2:\n\t"
-+ /* There's a zero or a different byte in the word */
-+ SHFT2MSB" r0, ip, #24\n\t"
-+ SHFT2LSB" ip, ip, #8\n\t"
-+ "cmp r0, #1\n\t"
-+ "it cs\n\t"
-+ "cmpcs r0, r3, "SHFT2MSB" #24\n\t"
-+ "it eq\n\t"
-+ SHFT2LSB"eq r3, r3, #8\n\t"
-+ "beq 2b\n\t"
-+ /* On a big-endian machine, r0 contains the desired byte in bits
-+ 0-7; on a little-endian machine they are in bits 24-31. In
-+ both cases the other bits in r0 are all zero. For r3 the
-+ interesting byte is at the other end of the word, but the
-+ other bits are not necessarily zero. We need a signed result
-+ representing the differnece in the unsigned bytes, so for the
-+ little-endian case we can't just shift the interesting bits
-+ up. */
-+#ifdef __ARMEB__
-+ "sub r0, r0, r3, lsr #24\n\t"
-+#else
-+ "and r3, r3, #255\n\t"
-+#ifdef __thumb2__
-+ /* No RSB instruction in Thumb2 */
-+ "lsr r0, r0, #24\n\t"
-+ "sub r0, r0, r3\n\t"
-+#else
-+ "rsb r0, r3, r0, lsr #24\n\t"
-+#endif
-+#endif
-+#ifndef __thumb2__
-+ "ldr r4, [sp], #4\n\t"
-+#endif
-+ "BX LR"
-+#elif (defined (__thumb__) && !defined (__thumb2__))
-+ "1:\n\t"
-+ "ldrb r2, [r0]\n\t"
-+ "ldrb r3, [r1]\n\t"
-+ "add r0, r0, #1\n\t"
-+ "add r1, r1, #1\n\t"
-+ "cmp r2, #0\n\t"
-+ "beq 2f\n\t"
-+ "cmp r2, r3\n\t"
-+ "beq 1b\n\t"
-+ "2:\n\t"
-+ "sub r0, r2, r3\n\t"
-+ "bx lr"
-+#else
-+ "3:\n\t"
-+ "ldrb r2, [r0], #1\n\t"
-+ "ldrb r3, [r1], #1\n\t"
-+ "cmp r2, #1\n\t"
-+ "it cs\n\t"
-+ "cmpcs r2, r3\n\t"
-+ "beq 3b\n\t"
-+ "sub r0, r2, r3\n\t"
-+ "BX LR"
-+#endif
-+ );
-+}
-+
-+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
-+ (defined (__thumb__) && !defined (__thumb2__)))
-+static int __attribute__((naked, used))
-+strcmp_unaligned(const char* s1, const char* s2)
-+{
-+#if 0
-+ /* The assembly code below is based on the following alogrithm. */
-+#ifdef __ARMEB__
-+#define RSHIFT <<
-+#define LSHIFT >>
-+#else
-+#define RSHIFT >>
-+#define LSHIFT <<
-+#endif
-+
-+#define body(shift) \
-+ mask = 0xffffffffU RSHIFT shift; \
-+ w1 = *wp1++; \
-+ w2 = *wp2++; \
-+ do \
-+ { \
-+ t1 = w1 & mask; \
-+ if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
-+ { \
-+ w2 RSHIFT= shift; \
-+ break; \
-+ } \
-+ if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
-+ { \
-+ /* See comment in assembler below re syndrome on big-endian */\
-+ if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
-+ w2 RSHIFT= shift; \
-+ else \
-+ { \
-+ w2 = *wp2; \
-+ t1 = w1 RSHIFT (32 - shift); \
-+ w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
-+ } \
-+ break; \
-+ } \
-+ w2 = *wp2++; \
-+ t1 ^= w1; \
-+ if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
-+ { \
-+ t1 = w1 >> (32 - shift); \
-+ w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
-+ break; \
-+ } \
-+ w1 = *wp1++; \
-+ } while (1)
-+
-+ const unsigned* wp1;
-+ const unsigned* wp2;
-+ unsigned w1, w2;
-+ unsigned mask;
-+ unsigned shift;
-+ unsigned b1 = 0x01010101;
-+ char c1, c2;
-+ unsigned t1;
-+
-+ while (((unsigned) s1) & 3)
-+ {
-+ c1 = *s1++;
-+ c2 = *s2++;
-+ if (c1 == 0 || c1 != c2)
-+ return c1 - (int)c2;
-+ }
-+ wp1 = (unsigned*) (((unsigned)s1) & ~3);
-+ wp2 = (unsigned*) (((unsigned)s2) & ~3);
-+ t1 = ((unsigned) s2) & 3;
-+ if (t1 == 1)
-+ {
-+ body(8);
-+ }
-+ else if (t1 == 2)
-+ {
-+ body(16);
-+ }
-+ else
-+ {
-+ body (24);
-+ }
-+
-+ do
-+ {
-+#ifdef __ARMEB__
-+ c1 = (char) t1 >> 24;
-+ c2 = (char) w2 >> 24;
-+#else
-+ c1 = (char) t1;
-+ c2 = (char) w2;
-+#endif
-+ t1 RSHIFT= 8;
-+ w2 RSHIFT= 8;
-+ } while (c1 != 0 && c1 == c2);
-+ return c1 - c2;
-+#endif
-+
-+ asm("wp1 .req r0\n\t"
-+ "wp2 .req r1\n\t"
-+ "b1 .req r2\n\t"
-+ "w1 .req r4\n\t"
-+ "w2 .req r5\n\t"
-+ "t1 .req ip\n\t"
-+ "@ r3 is scratch\n"
-+
-+ /* First of all, compare bytes until wp1(sp1) is word-aligned. */
-+ "1:\n\t"
-+ "tst wp1, #3\n\t"
-+ "beq 2f\n\t"
-+ "ldrb r2, [wp1], #1\n\t"
-+ "ldrb r3, [wp2], #1\n\t"
-+ "cmp r2, #1\n\t"
-+ "it cs\n\t"
-+ "cmpcs r2, r3\n\t"
-+ "beq 1b\n\t"
-+ "sub r0, r2, r3\n\t"
-+ "BX LR\n"
-+
-+ "2:\n\t"
-+ "str r5, [sp, #-4]!\n\t"
-+ "str r4, [sp, #-4]!\n\t"
-+ // "stmfd sp!, {r4, r5}\n\t"
-+ "mov b1, #1\n\t"
-+ "orr b1, b1, b1, lsl #8\n\t"
-+ "orr b1, b1, b1, lsl #16\n\t"
-+
-+ "and t1, wp2, #3\n\t"
-+ "bic wp2, wp2, #3\n\t"
-+ "ldr w1, [wp1], #4\n\t"
-+ "ldr w2, [wp2], #4\n\t"
-+ "cmp t1, #2\n\t"
-+ "beq 2f\n\t"
-+ "bhi 3f\n"
-+
-+ /* Critical inner Loop: Block with 3 bytes initial overlap */
-+ ".p2align 2\n"
-+ "1:\n\t"
-+ "bic t1, w1, #"MSB"\n\t"
-+ "cmp t1, w2, "SHFT2LSB" #8\n\t"
-+ "sub r3, w1, b1\n\t"
-+ "bic r3, r3, w1\n\t"
-+ "bne 4f\n\t"
-+ "ands r3, r3, b1, lsl #7\n\t"
-+ "it eq\n\t"
-+ "ldreq w2, [wp2], #4\n\t"
-+ "bne 5f\n\t"
-+ "eor t1, t1, w1\n\t"
-+ "cmp t1, w2, "SHFT2MSB" #24\n\t"
-+ "bne 6f\n\t"
-+ "ldr w1, [wp1], #4\n\t"
-+ "b 1b\n"
-+ "4:\n\t"
-+ SHFT2LSB" w2, w2, #8\n\t"
-+ "b 8f\n"
-+
-+ "5:\n\t"
-+#ifdef __ARMEB__
-+ /* The syndrome value may contain false ones if the string ends
-+ with the bytes 0x01 0x00 */
-+ "tst w1, #0xff000000\n\t"
-+ "itt ne\n\t"
-+ "tstne w1, #0x00ff0000\n\t"
-+ "tstne w1, #0x0000ff00\n\t"
-+ "beq 7f\n\t"
-+#else
-+ "bics r3, r3, #0xff000000\n\t"
-+ "bne 7f\n\t"
-+#endif
-+ "ldrb w2, [wp2]\n\t"
-+ SHFT2LSB" t1, w1, #24\n\t"
-+#ifdef __ARMEB__
-+ "lsl w2, w2, #24\n\t"
-+#endif
-+ "b 8f\n"
-+
-+ "6:\n\t"
-+ SHFT2LSB" t1, w1, #24\n\t"
-+ "and w2, w2, #"LSB"\n\t"
-+ "b 8f\n"
-+
-+ /* Critical inner Loop: Block with 2 bytes initial overlap */
-+ ".p2align 2\n"
-+ "2:\n\t"
-+ SHFT2MSB" t1, w1, #16\n\t"
-+ "sub r3, w1, b1\n\t"
-+ SHFT2LSB" t1, t1, #16\n\t"
-+ "bic r3, r3, w1\n\t"
-+ "cmp t1, w2, "SHFT2LSB" #16\n\t"
-+ "bne 4f\n\t"
-+ "ands r3, r3, b1, lsl #7\n\t"
-+ "it eq\n\t"
-+ "ldreq w2, [wp2], #4\n\t"
-+ "bne 5f\n\t"
-+ "eor t1, t1, w1\n\t"
-+ "cmp t1, w2, "SHFT2MSB" #16\n\t"
-+ "bne 6f\n\t"
-+ "ldr w1, [wp1], #4\n\t"
-+ "b 2b\n"
-+
-+ "5:\n\t"
-+#ifdef __ARMEB__
-+ /* The syndrome value may contain false ones if the string ends
-+ with the bytes 0x01 0x00 */
-+ "tst w1, #0xff000000\n\t"
-+ "it ne\n\t"
-+ "tstne w1, #0x00ff0000\n\t"
-+ "beq 7f\n\t"
-+#else
-+ "lsls r3, r3, #16\n\t"
-+ "bne 7f\n\t"
-+#endif
-+ "ldrh w2, [wp2]\n\t"
-+ SHFT2LSB" t1, w1, #16\n\t"
-+#ifdef __ARMEB__
-+ "lsl w2, w2, #16\n\t"
-+#endif
-+ "b 8f\n"
-+
-+ "6:\n\t"
-+ SHFT2MSB" w2, w2, #16\n\t"
-+ SHFT2LSB" t1, w1, #16\n\t"
-+ "4:\n\t"
-+ SHFT2LSB" w2, w2, #16\n\t"
-+ "b 8f\n\t"
-+
-+ /* Critical inner Loop: Block with 1 byte initial overlap */
-+ ".p2align 2\n"
-+ "3:\n\t"
-+ "and t1, w1, #"LSB"\n\t"
-+ "cmp t1, w2, "SHFT2LSB" #24\n\t"
-+ "sub r3, w1, b1\n\t"
-+ "bic r3, r3, w1\n\t"
-+ "bne 4f\n\t"
-+ "ands r3, r3, b1, lsl #7\n\t"
-+ "it eq\n\t"
-+ "ldreq w2, [wp2], #4\n\t"
-+ "bne 5f\n\t"
-+ "eor t1, t1, w1\n\t"
-+ "cmp t1, w2, "SHFT2MSB" #8\n\t"
-+ "bne 6f\n\t"
-+ "ldr w1, [wp1], #4\n\t"
-+ "b 3b\n"
-+ "4:\n\t"
-+ SHFT2LSB" w2, w2, #24\n\t"
-+ "b 8f\n"
-+ "5:\n\t"
-+ /* The syndrome value may contain false ones if the string ends
-+ with the bytes 0x01 0x00 */
-+ "tst w1, #"LSB"\n\t"
-+ "beq 7f\n\t"
-+ "ldr w2, [wp2], #4\n"
-+ "6:\n\t"
-+ SHFT2LSB" t1, w1, #8\n\t"
-+ "bic w2, w2, #"MSB"\n\t"
-+ "b 8f\n"
-+ "7:\n\t"
-+ "mov r0, #0\n\t"
-+ // "ldmfd sp!, {r4, r5}\n\t"
-+ "ldr r4, [sp], #4\n\t"
-+ "ldr r5, [sp], #4\n\t"
-+ "BX LR\n"
-+ "8:\n\t"
-+ "and r2, t1, #"LSB"\n\t"
-+ "and r0, w2, #"LSB"\n\t"
-+ "cmp r0, #1\n\t"
-+ "it cs\n\t"
-+ "cmpcs r0, r2\n\t"
-+ "itt eq\n\t"
-+ SHFT2LSB"eq t1, t1, #8\n\t"
-+ SHFT2LSB"eq w2, w2, #8\n\t"
-+ "beq 8b\n\t"
-+ "sub r0, r2, r0\n\t"
-+ // "ldmfd sp!, {r4, r5}\n\t"
-+ "ldr r4, [sp], #4\n\t"
-+ "ldr r5, [sp], #4\n\t"
-+ "BX LR");
-+}
-+
-+#endif
-+
-+libc_hidden_builtin_def (strcmp)
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcpy.c b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcpy.c
-new file mode 100644
-index 0000000..aa8cb06
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strcpy.c
-@@ -0,0 +1,172 @@
-+/*
-+ * Copyright (c) 2008 ARM Ltd
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The name of the company may not be used to endorse or promote
-+ * products derived from this software without specific prior written
-+ * permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
-+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ */
-+
-+#include <string.h>
-+#include <memcopy.h>
-+
-+#undef strcmp
-+
-+
-+#ifdef __thumb2__
-+#define magic1(REG) "#0x01010101"
-+#define magic2(REG) "#0x80808080"
-+#else
-+#define magic1(REG) #REG
-+#define magic2(REG) #REG ", lsl #7"
-+#endif
-+
-+char* __attribute__((naked))
-+strcpy (char* dst, const char* src)
-+{
-+ asm (
-+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
-+ (defined (__thumb__) && !defined (__thumb2__)))
-+ "pld [r1, #0]\n\t"
-+ "eor r2, r0, r1\n\t"
-+ "mov ip, r0\n\t"
-+ "tst r2, #3\n\t"
-+ "bne 4f\n\t"
-+ "tst r1, #3\n\t"
-+ "bne 3f\n"
-+ "5:\n\t"
-+#ifndef __thumb2__
-+ "str r5, [sp, #-4]!\n\t"
-+ "mov r5, #0x01\n\t"
-+ "orr r5, r5, r5, lsl #8\n\t"
-+ "orr r5, r5, r5, lsl #16\n\t"
-+#endif
-+
-+ "str r4, [sp, #-4]!\n\t"
-+ "tst r1, #4\n\t"
-+ "ldr r3, [r1], #4\n\t"
-+ "beq 2f\n\t"
-+ "sub r2, r3, "magic1(r5)"\n\t"
-+ "bics r2, r2, r3\n\t"
-+ "tst r2, "magic2(r5)"\n\t"
-+ "itt eq\n\t"
-+ "streq r3, [ip], #4\n\t"
-+ "ldreq r3, [r1], #4\n"
-+ "bne 1f\n\t"
-+ /* Inner loop. We now know that r1 is 64-bit aligned, so we
-+ can safely fetch up to two words. This allows us to avoid
-+ load stalls. */
-+ ".p2align 2\n"
-+ "2:\n\t"
-+ "pld [r1, #8]\n\t"
-+ "ldr r4, [r1], #4\n\t"
-+ "sub r2, r3, "magic1(r5)"\n\t"
-+ "bics r2, r2, r3\n\t"
-+ "tst r2, "magic2(r5)"\n\t"
-+ "sub r2, r4, "magic1(r5)"\n\t"
-+ "bne 1f\n\t"
-+ "str r3, [ip], #4\n\t"
-+ "bics r2, r2, r4\n\t"
-+ "tst r2, "magic2(r5)"\n\t"
-+ "itt eq\n\t"
-+ "ldreq r3, [r1], #4\n\t"
-+ "streq r4, [ip], #4\n\t"
-+ "beq 2b\n\t"
-+ "mov r3, r4\n"
-+ "1:\n\t"
-+#ifdef __ARMEB__
-+ "rors r3, r3, #24\n\t"
-+#endif
-+ "strb r3, [ip], #1\n\t"
-+ "tst r3, #0xff\n\t"
-+#ifdef __ARMEL__
-+ "ror r3, r3, #8\n\t"
-+#endif
-+ "bne 1b\n\t"
-+ "ldr r4, [sp], #4\n\t"
-+#ifndef __thumb2__
-+ "ldr r5, [sp], #4\n\t"
-+#endif
-+ "BX LR\n"
-+
-+ /* Strings have the same offset from word alignment, but it's
-+ not zero. */
-+ "3:\n\t"
-+ "tst r1, #1\n\t"
-+ "beq 1f\n\t"
-+ "ldrb r2, [r1], #1\n\t"
-+ "strb r2, [ip], #1\n\t"
-+ "cmp r2, #0\n\t"
-+ "it eq\n"
-+ "BXEQ LR\n"
-+ "1:\n\t"
-+ "tst r1, #2\n\t"
-+ "beq 5b\n\t"
-+ "ldrh r2, [r1], #2\n\t"
-+#ifdef __ARMEB__
-+ "tst r2, #0xff00\n\t"
-+ "iteet ne\n\t"
-+ "strneh r2, [ip], #2\n\t"
-+ "lsreq r2, r2, #8\n\t"
-+ "streqb r2, [ip]\n\t"
-+ "tstne r2, #0xff\n\t"
-+#else
-+ "tst r2, #0xff\n\t"
-+ "itet ne\n\t"
-+ "strneh r2, [ip], #2\n\t"
-+ "streqb r2, [ip]\n\t"
-+ "tstne r2, #0xff00\n\t"
-+#endif
-+ "bne 5b\n\t"
-+ "BX LR\n"
-+
-+ /* src and dst do not have a common word-alignement. Fall back to
-+ byte copying. */
-+ "4:\n\t"
-+ "ldrb r2, [r1], #1\n\t"
-+ "strb r2, [ip], #1\n\t"
-+ "cmp r2, #0\n\t"
-+ "bne 4b\n\t"
-+ "BX LR"
-+
-+#elif !defined (__thumb__) || defined (__thumb2__)
-+ "mov r3, r0\n\t"
-+ "1:\n\t"
-+ "ldrb r2, [r1], #1\n\t"
-+ "strb r2, [r3], #1\n\t"
-+ "cmp r2, #0\n\t"
-+ "bne 1b\n\t"
-+ "BX LR"
-+#else
-+ "mov r3, r0\n\t"
-+ "1:\n\t"
-+ "ldrb r2, [r1]\n\t"
-+ "add r1, r1, #1\n\t"
-+ "strb r2, [r3]\n\t"
-+ "add r3, r3, #1\n\t"
-+ "cmp r2, #0\n\t"
-+ "bne 1b\n\t"
-+ "BX LR"
-+#endif
-+ );
-+}
-+libc_hidden_builtin_def (strcpy)
-diff --git a/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strlen.S b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strlen.S
-new file mode 100644
-index 0000000..125e92f
---- /dev/null
-+++ b/cortex-strings/sysdeps/arm/eabi/arm/cortex-a8/strlen.S
-@@ -0,0 +1,111 @@
-+/* Copyright (c) 2010-2011, Linaro Limited
-+ All rights reserved.
-+
-+ Redistribution and use in source and binary forms, with or without
-+ modification, are permitted provided that the following conditions
-+ are met:
-+
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+
-+ * Neither the name of Linaro Limited nor the names of its
-+ contributors may be used to endorse or promote products derived
-+ from this software without specific prior written permission.
-+
-+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+ Written by Dave Gilbert <david.gilbert@linaro.org>
-+
-+ This strlen routine is optimised on a Cortex-A9 and should work on
-+ all ARMv7 processors. This routine is reasonably fast for short
-+ strings, but is probably slower than a simple implementation if all
-+ your strings are very short */
-+
-+@ 2011-02-08 david.gilbert@linaro.org
-+@ Extracted from local git 6848613a
-+
-+
-+@ this lets us check a flag in a 00/ff byte easily in either endianness
-+#ifdef __ARMEB__
-+#define CHARTSTMASK(c) 1<<(31-(c*8))
-+#else
-+#define CHARTSTMASK(c) 1<<(c*8)
-+#endif
-+
-+@-----------------------------------------------------------------------------------------------------------------------------
-+ .syntax unified
-+ .arch armv7-a
-+
-+ .thumb_func
-+ .align 2
-+ .p2align 4,,15
-+ .global strlen
-+ .type strlen,%function
-+strlen:
-+ @ r0 = string
-+ @ returns count of bytes in string not including terminator
-+ mov r1, r0
-+ push { r4,r6 }
-+ mvns r6, #0 @ all F
-+ movs r4, #0
-+ tst r0, #7
-+ beq 2f
-+
-+1:
-+ ldrb r2, [r1], #1
-+ tst r1, #7 @ Hit alignment yet?
-+ cbz r2, 10f @ Exit if we found the 0
-+ bne 1b
-+
-+ @ So we're now aligned
-+2:
-+ ldmia r1!,{r2,r3}
-+ uadd8 r2, r2, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
-+ sel r2, r4, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
-+ uadd8 r3, r3, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
-+ sel r3, r2, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
-+ cmp r3, #0
-+ beq 2b
-+
-+strlenendtmp:
-+ @ One (or more) of the bytes we loaded was 0 - but which one?
-+ @ r2 has the mask corresponding to the first loaded word
-+ @ r3 has a combined mask of the two words - but if r2 was all-non 0
-+ @ then it's just the 2nd words
-+ cmp r2, #0
-+ itte eq
-+ moveq r2, r3 @ the end is in the 2nd word
-+ subeq r1,r1,#3
-+ subne r1,r1,#7
-+
-+ @ r1 currently points to the 2nd byte of the word containing the 0
-+ tst r2, # CHARTSTMASK(0) @ 1st character
-+ bne 10f
-+ adds r1,r1,#1
-+ tst r2, # CHARTSTMASK(1) @ 2nd character
-+ ittt eq
-+ addeq r1,r1,#1
-+ tsteq r2, # (3<<15) @ 2nd & 3rd character
-+ @ If not the 3rd must be the last one
-+ addeq r1,r1,#1
-+
-+10:
-+ @ r0 is still at the beginning, r1 is pointing 1 byte after the terminator
-+ sub r0, r1, r0
-+ subs r0, r0, #1
-+ pop { r4, r6 }
-+ bx lr
---
-1.7.7
-