Merge "Add libc optimizations to bionic for MIPS"
This commit is contained in:
commit
9cb82a2c6b
15 changed files with 2628 additions and 1546 deletions
|
@ -632,7 +632,22 @@ cc_library_static {
|
|||
"upstream-openbsd/lib/libc/string/strncmp.c",
|
||||
],
|
||||
},
|
||||
|
||||
mips: {
|
||||
exclude_srcs: [
|
||||
"upstream-openbsd/lib/libc/string/memchr.c",
|
||||
"upstream-openbsd/lib/libc/string/memmove.c",
|
||||
"upstream-openbsd/lib/libc/string/strcpy.c",
|
||||
"upstream-openbsd/lib/libc/string/strncmp.c",
|
||||
],
|
||||
},
|
||||
mips64: {
|
||||
exclude_srcs: [
|
||||
"upstream-openbsd/lib/libc/string/memchr.c",
|
||||
"upstream-openbsd/lib/libc/string/memmove.c",
|
||||
"upstream-openbsd/lib/libc/string/strcpy.c",
|
||||
"upstream-openbsd/lib/libc/string/strncmp.c",
|
||||
],
|
||||
},
|
||||
x86: {
|
||||
exclude_srcs: [
|
||||
"upstream-openbsd/lib/libc/string/memchr.c",
|
||||
|
@ -1041,9 +1056,16 @@ cc_library_static {
|
|||
mips: {
|
||||
srcs: [
|
||||
"arch-mips/string/memcmp.c",
|
||||
"arch-mips/string/memcpy.S",
|
||||
"arch-mips/string/memcpy.c",
|
||||
"arch-mips/string/memset.S",
|
||||
"arch-mips/string/strcmp.S",
|
||||
"arch-mips/string/strncmp.S",
|
||||
"arch-mips/string/strlen.c",
|
||||
"arch-mips/string/strnlen.c",
|
||||
"arch-mips/string/strchr.c",
|
||||
"arch-mips/string/strcpy.c",
|
||||
"arch-mips/string/memchr.c",
|
||||
"arch-mips/string/memmove.c",
|
||||
|
||||
"arch-mips/bionic/__bionic_clone.S",
|
||||
"arch-mips/bionic/cacheflush.cpp",
|
||||
|
@ -1052,25 +1074,25 @@ cc_library_static {
|
|||
"arch-mips/bionic/setjmp.S",
|
||||
"arch-mips/bionic/syscall.S",
|
||||
"arch-mips/bionic/vfork.S",
|
||||
|
||||
"arch-mips/string/mips_strlen.c",
|
||||
],
|
||||
rev6: {
|
||||
srcs: [
|
||||
"arch-mips/string/strlen.c",
|
||||
],
|
||||
exclude_srcs: [
|
||||
"arch-mips/string/mips_strlen.c",
|
||||
],
|
||||
},
|
||||
exclude_srcs: [
|
||||
"bionic/strchr.cpp",
|
||||
"bionic/strnlen.c",
|
||||
],
|
||||
},
|
||||
mips64: {
|
||||
srcs: [
|
||||
"arch-mips/string/memcmp.c",
|
||||
"arch-mips/string/memcpy.S",
|
||||
"arch-mips/string/memcpy.c",
|
||||
"arch-mips/string/memset.S",
|
||||
"arch-mips/string/strcmp.S",
|
||||
"arch-mips/string/strncmp.S",
|
||||
"arch-mips/string/strlen.c",
|
||||
"arch-mips/string/strnlen.c",
|
||||
"arch-mips/string/strchr.c",
|
||||
"arch-mips/string/strcpy.c",
|
||||
"arch-mips/string/memchr.c",
|
||||
"arch-mips/string/memmove.c",
|
||||
|
||||
"arch-mips64/bionic/__bionic_clone.S",
|
||||
"arch-mips64/bionic/_exit_with_stack_teardown.S",
|
||||
|
@ -1079,6 +1101,10 @@ cc_library_static {
|
|||
"arch-mips64/bionic/vfork.S",
|
||||
"arch-mips64/bionic/stat.cpp",
|
||||
],
|
||||
exclude_srcs: [
|
||||
"bionic/strchr.cpp",
|
||||
"bionic/strnlen.c",
|
||||
],
|
||||
},
|
||||
|
||||
x86: {
|
||||
|
|
122
libc/NOTICE
122
libc/NOTICE
|
@ -4816,38 +4816,6 @@ Optimized by Bruce D. Evans.
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2010 MIPS Technologies, Inc.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
* Neither the name of MIPS Technologies Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2010 The NetBSD Foundation, Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
@ -5344,35 +5312,6 @@ Copyright (c) 2012-2013, Linaro Limited
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2012-2015
|
||||
MIPS Technologies, Inc., California.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2013
|
||||
MIPS Technologies, Inc., California.
|
||||
|
||||
|
@ -5586,35 +5525,6 @@ Copyright (c) 2013-2015, Linaro Limited
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2014
|
||||
Imagination Technologies Limited.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
|
||||
Copyright (c) 2014 Bob Beck <beck@obtuse.com>
|
||||
|
||||
|
@ -5750,6 +5660,38 @@ SUCH DAMAGE.
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2017 Imagination Technologies.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
* Neither the name of Imagination Technologies nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (c)1999 Citrus Project,
|
||||
All rights reserved.
|
||||
|
||||
|
|
185
libc/arch-mips/string/memchr.c
Normal file
185
libc/arch-mips/string/memchr.c
Normal file
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <string.h>
|
||||
|
||||
#define ENABLE_PREFETCH 1
|
||||
#define op_t unsigned long int
|
||||
#define op_size sizeof (op_t)
|
||||
|
||||
#if ENABLE_PREFETCH
|
||||
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
|
||||
#else
|
||||
#define PREFETCH(addr)
|
||||
#endif
|
||||
|
||||
#if __mips64 || __mips_isa_rev >= 2
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes (const op_t* w, op_t inval)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *) w;
|
||||
op_t outval = 0;
|
||||
#if __mips64
|
||||
__asm__ volatile (
|
||||
"dsbh %1, %0 \n\t"
|
||||
"dshd %0, %1 \n\t"
|
||||
"dclz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#else
|
||||
__asm__ volatile (
|
||||
"wsbh %1, %0 \n\t"
|
||||
"rotr %0, %1, 16 \n\t"
|
||||
"clz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#endif
|
||||
p += (outval >> 3);
|
||||
return (void *) p;
|
||||
}
|
||||
|
||||
#define DO_WORD(in, val) { \
|
||||
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
|
||||
if (tmp != 0) \
|
||||
return do_bytes(in, tmp); \
|
||||
}
|
||||
#else
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes (const op_t* w, unsigned char ch)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *) w;
|
||||
for (; *p != ch; ++p);
|
||||
return (void *) p;
|
||||
}
|
||||
|
||||
#define DO_WORD(in, val) { \
|
||||
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
|
||||
if (tmp != 0) \
|
||||
return do_bytes(in, ch); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define DO_WORDS(w) { \
|
||||
op_t* w1 = (op_t*) w; \
|
||||
op_t val0 = w1[0] ^ mask_c; \
|
||||
op_t val1 = w1[1] ^ mask_c; \
|
||||
op_t val2 = w1[2] ^ mask_c; \
|
||||
op_t val3 = w1[3] ^ mask_c; \
|
||||
DO_WORD(w1, val0) \
|
||||
DO_WORD(w1 + 1, val1) \
|
||||
DO_WORD(w1 + 2, val2) \
|
||||
DO_WORD(w1 + 3, val3) \
|
||||
}
|
||||
|
||||
void *
|
||||
memchr (void const *s, int c_in, size_t n) __overloadable
|
||||
{
|
||||
if (n != 0) {
|
||||
const unsigned char *p = (const unsigned char *) s;
|
||||
const op_t *w;
|
||||
op_t mask_1, mask_128, mask_c;
|
||||
unsigned char ch = (unsigned char) c_in;
|
||||
|
||||
/*
|
||||
* Check bytewize till initial alignment
|
||||
*/
|
||||
for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
|
||||
if (*p == ch)
|
||||
return (void *) p;
|
||||
}
|
||||
|
||||
w = (const op_t *) p;
|
||||
|
||||
mask_c = ch | (ch << 8);
|
||||
mask_c |= mask_c << 16;
|
||||
__asm__ volatile (
|
||||
"li %0, 0x01010101 \n\t"
|
||||
: "=r" (mask_1)
|
||||
);
|
||||
#if __mips64
|
||||
mask_1 |= mask_1 << 32;
|
||||
mask_c |= mask_c << 32;
|
||||
#endif
|
||||
mask_128 = mask_1 << 7;
|
||||
|
||||
/*
|
||||
* Check op_size byteswize after initial alignment
|
||||
*/
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
PREFETCH (w);
|
||||
PREFETCH (w + 8);
|
||||
while (n >= 24 * op_size) {
|
||||
PREFETCH(w + 16);
|
||||
DO_WORDS(w);
|
||||
DO_WORDS(w + 4);
|
||||
w += 8;
|
||||
n -= 8 * op_size;
|
||||
}
|
||||
while (n >= 8 * op_size) {
|
||||
DO_WORDS(w);
|
||||
DO_WORDS(w + 4);
|
||||
w += 8;
|
||||
n -= 8 * op_size;
|
||||
}
|
||||
#else
|
||||
PREFETCH (w);
|
||||
PREFETCH (w + 4);
|
||||
while (n >= 12 * op_size) {
|
||||
PREFETCH(w + 8);
|
||||
DO_WORDS(w);
|
||||
w += 4;
|
||||
n -= 4 * op_size;
|
||||
}
|
||||
while (n >= 4 * op_size) {
|
||||
DO_WORDS(w);
|
||||
w += 4;
|
||||
n -= 4 * op_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (n >= op_size) {
|
||||
op_t val = *w ^ mask_c;
|
||||
DO_WORD(w, val);
|
||||
w++;
|
||||
n -= op_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check bytewize for remaining bytes
|
||||
*/
|
||||
p = (const unsigned char *) w;
|
||||
for (; n > 0; --n, ++p) {
|
||||
if (*p == ch)
|
||||
return (void *) p;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
|
@ -1,51 +1,352 @@
|
|||
/*
|
||||
* Copyright (C) 2008 The Android Open Source Project
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
int memcmp(const void *s1, const void *s2, size_t n)
|
||||
#define ENABLE_PREFETCH 1
|
||||
|
||||
#define STRNG(X) #X
|
||||
#define PREFETCH(src_ptr, offset) \
|
||||
asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
|
||||
|
||||
#if !defined(UNALIGNED_INSTR_SUPPORT)
|
||||
/* does target have unaligned lw/ld/ualw/uald instructions? */
|
||||
#define UNALIGNED_INSTR_SUPPORT 0
|
||||
#if __mips_isa_rev < 6 && !__mips1
|
||||
#undef UNALIGNED_INSTR_SUPPORT
|
||||
#define UNALIGNED_INSTR_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(HW_UNALIGNED_SUPPORT)
|
||||
/* Does target have hardware support for unaligned accesses? */
|
||||
#define HW_UNALIGNED_SUPPORT 0
|
||||
#if __mips_isa_rev >= 6
|
||||
#undef HW_UNALIGNED_SUPPORT
|
||||
#define HW_UNALIGNED_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define SIZEOF_reg_t 4
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
typedef unsigned long reg_t;
|
||||
typedef struct bits
|
||||
{
|
||||
const unsigned char* p1 = s1;
|
||||
const unsigned char* end1 = p1 + n;
|
||||
const unsigned char* p2 = s2;
|
||||
int d = 0;
|
||||
reg_t B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#else
|
||||
#undef SIZEOF_reg_t
|
||||
#define SIZEOF_reg_t 8
|
||||
typedef unsigned long long reg_t;
|
||||
typedef struct bits
|
||||
{
|
||||
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
for (;;) {
|
||||
if (d || p1 >= end1) break;
|
||||
d = (int)*p1++ - (int)*p2++;
|
||||
/* This union assumes that small structures can be in registers. If
|
||||
not, then memory accesses will be done - not optimal, but ok. */
|
||||
typedef union
|
||||
{
|
||||
reg_t v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
if (d || p1 >= end1) break;
|
||||
d = (int)*p1++ - (int)*p2++;
|
||||
#define do_bitfield(__i) \
|
||||
if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
|
||||
|
||||
if (d || p1 >= end1) break;
|
||||
d = (int)*p1++ - (int)*p2++;
|
||||
|
||||
if (d || p1 >= end1) break;
|
||||
d = (int)*p1++ - (int)*p2++;
|
||||
}
|
||||
return d;
|
||||
/* pull apart the words to find the first differing unsigned byte. */
|
||||
static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
|
||||
{
|
||||
bitfields_t x, y;
|
||||
x.v = a;
|
||||
y.v = b;
|
||||
do_bitfield (0);
|
||||
do_bitfield (1);
|
||||
do_bitfield (2);
|
||||
#if SIZEOF_reg_t == 4
|
||||
return x.b.B3 - y.b.B3;
|
||||
#else
|
||||
do_bitfield (3);
|
||||
do_bitfield (4);
|
||||
do_bitfield (5);
|
||||
do_bitfield (6);
|
||||
return x.b.B7 - y.b.B7;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* This code is called when aligning a pointer, there are remaining bytes
|
||||
after doing word compares, or architecture does not have some form
|
||||
of unaligned support. */
|
||||
static inline int __attribute__ ((always_inline))
|
||||
do_bytes (const void *a, const void *b, unsigned long len)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
unsigned char *y = (unsigned char *) b;
|
||||
unsigned long i;
|
||||
|
||||
/* 'len' might be zero here, so preloading the first two values
|
||||
before the loop may access unallocated memory. */
|
||||
for (i = 0; i < len; i++) {
|
||||
if (*x != *y)
|
||||
return *x - *y;
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !HW_UNALIGNED_SUPPORT
|
||||
#if UNALIGNED_INSTR_SUPPORT
|
||||
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
|
||||
the compiler to treat the pointer access as unaligned. */
|
||||
struct ulw
|
||||
{
|
||||
reg_t uli;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/* first pointer is not aligned while second pointer is. */
|
||||
static int unaligned_words (const struct ulw *a, const reg_t *b,
|
||||
unsigned long words, unsigned long bytes)
|
||||
{
|
||||
#if ENABLE_PREFETCH
|
||||
/* prefetch pointer aligned to 32 byte boundary */
|
||||
const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
|
||||
const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
|
||||
#endif
|
||||
for (; words >= 16; words -= 8) {
|
||||
#if ENABLE_PREFETCH
|
||||
pref_ptr += 8;
|
||||
PREFETCH(pref_ptr, 0);
|
||||
PREFETCH(pref_ptr, 32);
|
||||
|
||||
pref_ptr_a += 8;
|
||||
PREFETCH(pref_ptr_a, 0);
|
||||
PREFETCH(pref_ptr_a, 32);
|
||||
#endif
|
||||
reg_t x0 = a[0].uli, x1 = a[1].uli;
|
||||
reg_t x2 = a[2].uli, x3 = a[3].uli;
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
|
||||
x0 = a[4].uli; x1 = a[5].uli;
|
||||
x2 = a[6].uli; x3 = a[7].uli;
|
||||
y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
for (; words >= 4; words -= 4) {
|
||||
reg_t x0 = a[0].uli, x1 = a[1].uli;
|
||||
reg_t x2 = a[2].uli, x3 = a[3].uli;
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
/* do remaining words. */
|
||||
while (words--) {
|
||||
reg_t x0 = a->uli;
|
||||
reg_t y0 = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes (a, b, bytes);
|
||||
}
|
||||
#else
|
||||
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
|
||||
static int unaligned_words (const reg_t *a, const reg_t *b,
|
||||
unsigned long words, unsigned long bytes)
|
||||
{
|
||||
return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
|
||||
}
|
||||
#endif /* UNALIGNED_INSTR_SUPPORT */
|
||||
#endif /* HW_UNALIGNED_SUPPORT */
|
||||
|
||||
/* both pointers are aligned, or first isn't and HW support for unaligned. */
|
||||
static int aligned_words (const reg_t *a, const reg_t *b,
|
||||
unsigned long words, unsigned long bytes)
|
||||
{
|
||||
#if ENABLE_PREFETCH
|
||||
/* prefetch pointer aligned to 32 byte boundary */
|
||||
const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
|
||||
const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
|
||||
#endif
|
||||
|
||||
for (; words >= 24; words -= 12) {
|
||||
#if ENABLE_PREFETCH
|
||||
pref_ptr += 12;
|
||||
PREFETCH(pref_ptr, 0);
|
||||
PREFETCH(pref_ptr, 32);
|
||||
PREFETCH(pref_ptr, 64);
|
||||
|
||||
pref_ptr_a += 12;
|
||||
PREFETCH(pref_ptr_a, 0);
|
||||
PREFETCH(pref_ptr_a, 32);
|
||||
PREFETCH(pref_ptr_a, 64);
|
||||
#endif
|
||||
reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
|
||||
x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
|
||||
y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
|
||||
x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
|
||||
y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
|
||||
a += 12;
|
||||
b += 12;
|
||||
}
|
||||
|
||||
for (; words >= 4; words -= 4) {
|
||||
reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
if (x1 != y1)
|
||||
return do_by_bitfields (x1, y1);
|
||||
if (x2 != y2)
|
||||
return do_by_bitfields (x2, y2);
|
||||
if (x3 != y3)
|
||||
return do_by_bitfields (x3, y3);
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
/* do remaining words. */
|
||||
while (words--) {
|
||||
reg_t x0 = *a;
|
||||
reg_t y0 = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
if (x0 != y0)
|
||||
return do_by_bitfields (x0, y0);
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes (a, b, bytes);
|
||||
}
|
||||
|
||||
int memcmp (const void *a, const void *b, size_t len)
|
||||
{
|
||||
unsigned long bytes, words;
|
||||
|
||||
/* shouldn't hit that often. */
|
||||
if (len < sizeof (reg_t) * 4) {
|
||||
return do_bytes (a, b, len);
|
||||
}
|
||||
|
||||
/* Align the second pointer to word/dword alignment.
|
||||
Note that the pointer is only 32-bits for o32/n32 ABIs. For
|
||||
n32, loads are done as 64-bit while address remains 32-bit. */
|
||||
bytes = ((unsigned long) b) % sizeof (reg_t);
|
||||
if (bytes) {
|
||||
int res;
|
||||
bytes = sizeof (reg_t) - bytes;
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
res = do_bytes (a, b, bytes);
|
||||
if (res || len == bytes)
|
||||
return res;
|
||||
len -= bytes;
|
||||
a = (const void *) (((unsigned char *) a) + bytes);
|
||||
b = (const void *) (((unsigned char *) b) + bytes);
|
||||
}
|
||||
|
||||
/* Second pointer now aligned. */
|
||||
words = len / sizeof (reg_t);
|
||||
bytes = len % sizeof (reg_t);
|
||||
|
||||
#if HW_UNALIGNED_SUPPORT
|
||||
/* treat possible unaligned first pointer as aligned. */
|
||||
return aligned_words (a, b, words, bytes);
|
||||
#else
|
||||
if (((unsigned long) a) % sizeof (reg_t) == 0) {
|
||||
return aligned_words (a, b, words, bytes);
|
||||
}
|
||||
/* need to use unaligned instructions on first pointer. */
|
||||
return unaligned_words (a, b, words, bytes);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,852 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2015
|
||||
* MIPS Technologies, Inc., California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __ANDROID__
|
||||
# include <private/bionic_asm.h>
|
||||
# define USE_MEMMOVE_FOR_OVERLAP
|
||||
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
|
||||
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
|
||||
#elif _LIBC
|
||||
# include <sysdep.h>
|
||||
# include <regdef.h>
|
||||
# include <sys/asm.h>
|
||||
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
|
||||
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
|
||||
#elif _COMPILING_NEWLIB
|
||||
# include "machine/asm.h"
|
||||
# include "machine/regdef.h"
|
||||
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
|
||||
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
|
||||
#else
|
||||
# include <regdef.h>
|
||||
# include <sys/asm.h>
|
||||
#endif
|
||||
|
||||
/* Check to see if the MIPS architecture we are compiling for supports
|
||||
* prefetching.
|
||||
*/
|
||||
|
||||
#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
|
||||
# ifndef DISABLE_PREFETCH
|
||||
# define USE_PREFETCH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
|
||||
# ifndef DISABLE_DOUBLE
|
||||
# define USE_DOUBLE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if __mips_isa_rev > 5
|
||||
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
# undef PREFETCH_STORE_HINT
|
||||
# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
|
||||
# endif
|
||||
# define R6_CODE
|
||||
#endif
|
||||
|
||||
/* Some asm.h files do not have the L macro definition. */
|
||||
#ifndef L
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define L(label) $L ## label
|
||||
# else
|
||||
# define L(label) .L ## label
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Some asm.h files do not have the PTR_ADDIU macro definition. */
|
||||
#ifndef PTR_ADDIU
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define PTR_ADDIU addiu
|
||||
# else
|
||||
# define PTR_ADDIU daddiu
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Some asm.h files do not have the PTR_SRA macro definition. */
|
||||
#ifndef PTR_SRA
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define PTR_SRA sra
|
||||
# else
|
||||
# define PTR_SRA dsra
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* New R6 instructions that may not be in asm.h. */
|
||||
#ifndef PTR_LSA
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define PTR_LSA lsa
|
||||
# else
|
||||
# define PTR_LSA dlsa
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
|
||||
* prefetches appears to offer a slight preformance advantage.
|
||||
*
|
||||
* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
|
||||
* or PREFETCH_STORE_STREAMED offers a large performance advantage
|
||||
* but PREPAREFORSTORE has some special restrictions to consider.
|
||||
*
|
||||
* Prefetch with the 'prepare for store' hint does not copy a memory
|
||||
* location into the cache, it just allocates a cache line and zeros
|
||||
* it out. This means that if you do not write to the entire cache
|
||||
* line before writing it out to memory some data will get zero'ed out
|
||||
* when the cache line is written back to memory and data will be lost.
|
||||
*
|
||||
* Also if you are using this memcpy to copy overlapping buffers it may
|
||||
* not behave correctly when using the 'prepare for store' hint. If you
|
||||
* use the 'prepare for store' prefetch on a memory area that is in the
|
||||
* memcpy source (as well as the memcpy destination), then you will get
|
||||
* some data zero'ed out before you have a chance to read it and data will
|
||||
* be lost.
|
||||
*
|
||||
* If you are going to use this memcpy routine with the 'prepare for store'
|
||||
* prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
|
||||
* the problem of running memcpy on overlapping buffers.
|
||||
*
|
||||
* There are ifdef'ed sections of this memcpy to make sure that it does not
|
||||
* do prefetches on cache lines that are not going to be completely written.
|
||||
* This code is only needed and only used when PREFETCH_STORE_HINT is set to
|
||||
* PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
|
||||
* 32 bytes and if the cache line is larger it will not work correctly.
|
||||
*/
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
# define PREFETCH_HINT_LOAD 0
|
||||
# define PREFETCH_HINT_STORE 1
|
||||
# define PREFETCH_HINT_LOAD_STREAMED 4
|
||||
# define PREFETCH_HINT_STORE_STREAMED 5
|
||||
# define PREFETCH_HINT_LOAD_RETAINED 6
|
||||
# define PREFETCH_HINT_STORE_RETAINED 7
|
||||
# define PREFETCH_HINT_WRITEBACK_INVAL 25
|
||||
# define PREFETCH_HINT_PREPAREFORSTORE 30
|
||||
|
||||
/*
|
||||
* If we have not picked out what hints to use at this point use the
|
||||
* standard load and store prefetch hints.
|
||||
*/
|
||||
# ifndef PREFETCH_STORE_HINT
|
||||
# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
|
||||
# endif
|
||||
# ifndef PREFETCH_LOAD_HINT
|
||||
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
|
||||
# endif
|
||||
|
||||
/*
|
||||
* We double everything when USE_DOUBLE is true so we do 2 prefetches to
|
||||
* get 64 bytes in that case. The assumption is that each individual
|
||||
* prefetch brings in 32 bytes.
|
||||
*/
|
||||
|
||||
# ifdef USE_DOUBLE
|
||||
# define PREFETCH_CHUNK 64
|
||||
# define PREFETCH_FOR_LOAD(chunk, reg) \
|
||||
pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
|
||||
pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
|
||||
# define PREFETCH_FOR_STORE(chunk, reg) \
|
||||
pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
|
||||
pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
|
||||
# else
|
||||
# define PREFETCH_CHUNK 32
|
||||
# define PREFETCH_FOR_LOAD(chunk, reg) \
|
||||
pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
|
||||
# define PREFETCH_FOR_STORE(chunk, reg) \
|
||||
pref PREFETCH_STORE_HINT, (chunk)*32(reg)
|
||||
# endif
|
||||
/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
|
||||
* than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
|
||||
* of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
|
||||
* hint is used, the code will not work correctly. If PREPAREFORSTORE is not
|
||||
* used then MAX_PREFETCH_SIZE does not matter. */
|
||||
# define MAX_PREFETCH_SIZE 128
|
||||
/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
|
||||
* than 5 on a STORE prefetch and that a single prefetch can never be larger
|
||||
* than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
|
||||
* we actually do two prefetches in that case, one 32 bytes after the other. */
|
||||
# ifdef USE_DOUBLE
|
||||
# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
|
||||
# else
|
||||
# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
|
||||
# endif
|
||||
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
|
||||
&& ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
|
||||
/* We cannot handle this because the initial prefetches may fetch bytes that
|
||||
* are before the buffer being copied. We start copies with an offset
|
||||
* of 4 so avoid this situation when using PREPAREFORSTORE. */
|
||||
#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
|
||||
# endif
|
||||
#else /* USE_PREFETCH not defined */
|
||||
# define PREFETCH_FOR_LOAD(offset, reg)
|
||||
# define PREFETCH_FOR_STORE(offset, reg)
|
||||
#endif
|
||||
|
||||
/* Allow the routine to be named something else if desired. */
|
||||
#ifndef MEMCPY_NAME
|
||||
# define MEMCPY_NAME memcpy
|
||||
#endif
|
||||
|
||||
/* We use these 32/64 bit registers as temporaries to do the copying. */
|
||||
#define REG0 t0
|
||||
#define REG1 t1
|
||||
#define REG2 t2
|
||||
#define REG3 t3
|
||||
#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
|
||||
# define REG4 t4
|
||||
# define REG5 t5
|
||||
# define REG6 t6
|
||||
# define REG7 t7
|
||||
#else
|
||||
# define REG4 ta0
|
||||
# define REG5 ta1
|
||||
# define REG6 ta2
|
||||
# define REG7 ta3
|
||||
#endif
|
||||
|
||||
/* We load/store 64 bits at a time when USE_DOUBLE is true.
|
||||
* The C_ prefix stands for CHUNK and is used to avoid macro name
|
||||
* conflicts with system header files. */
|
||||
|
||||
#ifdef USE_DOUBLE
|
||||
# define C_ST sd
|
||||
# define C_LD ld
|
||||
# if __MIPSEB
|
||||
# define C_LDHI ldl /* high part is left in big-endian */
|
||||
# define C_STHI sdl /* high part is left in big-endian */
|
||||
# define C_LDLO ldr /* low part is right in big-endian */
|
||||
# define C_STLO sdr /* low part is right in big-endian */
|
||||
# else
|
||||
# define C_LDHI ldr /* high part is right in little-endian */
|
||||
# define C_STHI sdr /* high part is right in little-endian */
|
||||
# define C_LDLO ldl /* low part is left in little-endian */
|
||||
# define C_STLO sdl /* low part is left in little-endian */
|
||||
# endif
|
||||
# define C_ALIGN dalign /* r6 align instruction */
|
||||
#else
|
||||
# define C_ST sw
|
||||
# define C_LD lw
|
||||
# if __MIPSEB
|
||||
# define C_LDHI lwl /* high part is left in big-endian */
|
||||
# define C_STHI swl /* high part is left in big-endian */
|
||||
# define C_LDLO lwr /* low part is right in big-endian */
|
||||
# define C_STLO swr /* low part is right in big-endian */
|
||||
# else
|
||||
# define C_LDHI lwr /* high part is right in little-endian */
|
||||
# define C_STHI swr /* high part is right in little-endian */
|
||||
# define C_LDLO lwl /* low part is left in little-endian */
|
||||
# define C_STLO swl /* low part is left in little-endian */
|
||||
# endif
|
||||
# define C_ALIGN align /* r6 align instruction */
|
||||
#endif
|
||||
|
||||
/* Bookkeeping values for 32 vs. 64 bit mode. */
|
||||
#ifdef USE_DOUBLE
|
||||
# define NSIZE 8
|
||||
# define NSIZEMASK 0x3f
|
||||
# define NSIZEDMASK 0x7f
|
||||
#else
|
||||
# define NSIZE 4
|
||||
# define NSIZEMASK 0x1f
|
||||
# define NSIZEDMASK 0x3f
|
||||
#endif
|
||||
#define UNIT(unit) ((unit)*NSIZE)
|
||||
#define UNITM1(unit) (((unit)*NSIZE)-1)
|
||||
|
||||
#ifdef __ANDROID__
|
||||
LEAF(MEMCPY_NAME, 0)
|
||||
#else
|
||||
LEAF(MEMCPY_NAME)
|
||||
#endif
|
||||
.set nomips16
|
||||
.set noreorder
|
||||
/*
|
||||
* Below we handle the case where memcpy is called with overlapping src and dst.
|
||||
* Although memcpy is not required to handle this case, some parts of Android
|
||||
* like Skia rely on such usage. We call memmove to handle such cases.
|
||||
*/
|
||||
#ifdef USE_MEMMOVE_FOR_OVERLAP
|
||||
PTR_SUBU t0,a0,a1
|
||||
PTR_SRA t2,t0,31
|
||||
xor t1,t0,t2
|
||||
PTR_SUBU t0,t1,t2
|
||||
sltu t2,t0,a2
|
||||
beq t2,zero,L(memcpy)
|
||||
nop
|
||||
#if defined(__LP64__)
|
||||
daddiu sp,sp,-8
|
||||
SETUP_GP64(0,MEMCPY_NAME)
|
||||
LA t9,memmove
|
||||
RESTORE_GP64
|
||||
jr t9
|
||||
daddiu sp,sp,8
|
||||
#else
|
||||
LA t9,memmove
|
||||
jr t9
|
||||
nop
|
||||
#endif
|
||||
L(memcpy):
|
||||
#endif
|
||||
/*
|
||||
* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
|
||||
* size, copy dst pointer to v0 for the return value.
|
||||
*/
|
||||
slti t2,a2,(2 * NSIZE)
|
||||
bne t2,zero,L(lastb)
|
||||
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
|
||||
move v0,zero
|
||||
#else
|
||||
move v0,a0
|
||||
#endif
|
||||
|
||||
#ifndef R6_CODE
|
||||
|
||||
/*
|
||||
* If src and dst have different alignments, go to L(unaligned), if they
|
||||
* have the same alignment (but are not actually aligned) do a partial
|
||||
* load/store to make them aligned. If they are both already aligned
|
||||
* we can start copying at L(aligned).
|
||||
*/
|
||||
xor t8,a1,a0
|
||||
andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */
|
||||
bne t8,zero,L(unaligned)
|
||||
PTR_SUBU a3, zero, a0
|
||||
|
||||
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
|
||||
beq a3,zero,L(aligned) /* if a3=0, it is already aligned */
|
||||
PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
|
||||
|
||||
C_LDHI t8,0(a1)
|
||||
PTR_ADDU a1,a1,a3
|
||||
C_STHI t8,0(a0)
|
||||
PTR_ADDU a0,a0,a3
|
||||
|
||||
#else /* R6_CODE */
|
||||
|
||||
/*
|
||||
* Align the destination and hope that the source gets aligned too. If it
|
||||
* doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
|
||||
* align instruction.
|
||||
*/
|
||||
andi t8,a0,7
|
||||
lapc t9,L(atable)
|
||||
PTR_LSA t9,t8,t9,2
|
||||
jrc t9
|
||||
L(atable):
|
||||
bc L(lb0)
|
||||
bc L(lb7)
|
||||
bc L(lb6)
|
||||
bc L(lb5)
|
||||
bc L(lb4)
|
||||
bc L(lb3)
|
||||
bc L(lb2)
|
||||
bc L(lb1)
|
||||
L(lb7):
|
||||
lb a3, 6(a1)
|
||||
sb a3, 6(a0)
|
||||
L(lb6):
|
||||
lb a3, 5(a1)
|
||||
sb a3, 5(a0)
|
||||
L(lb5):
|
||||
lb a3, 4(a1)
|
||||
sb a3, 4(a0)
|
||||
L(lb4):
|
||||
lb a3, 3(a1)
|
||||
sb a3, 3(a0)
|
||||
L(lb3):
|
||||
lb a3, 2(a1)
|
||||
sb a3, 2(a0)
|
||||
L(lb2):
|
||||
lb a3, 1(a1)
|
||||
sb a3, 1(a0)
|
||||
L(lb1):
|
||||
lb a3, 0(a1)
|
||||
sb a3, 0(a0)
|
||||
|
||||
li t9,8
|
||||
subu t8,t9,t8
|
||||
PTR_SUBU a2,a2,t8
|
||||
PTR_ADDU a0,a0,t8
|
||||
PTR_ADDU a1,a1,t8
|
||||
L(lb0):
|
||||
|
||||
andi t8,a1,(NSIZE-1)
|
||||
lapc t9,L(jtable)
|
||||
PTR_LSA t9,t8,t9,2
|
||||
jrc t9
|
||||
L(jtable):
|
||||
bc L(aligned)
|
||||
bc L(r6_unaligned1)
|
||||
bc L(r6_unaligned2)
|
||||
bc L(r6_unaligned3)
|
||||
# ifdef USE_DOUBLE
|
||||
bc L(r6_unaligned4)
|
||||
bc L(r6_unaligned5)
|
||||
bc L(r6_unaligned6)
|
||||
bc L(r6_unaligned7)
|
||||
# endif
|
||||
#endif /* R6_CODE */
|
||||
|
||||
L(aligned):
|
||||
|
||||
/*
|
||||
* Now dst/src are both aligned to (word or double word) aligned addresses
|
||||
* Set a2 to count how many bytes we have to copy after all the 64/128 byte
|
||||
* chunks are copied and a3 to the dst pointer after all the 64/128 byte
|
||||
* chunks have been copied. We will loop, incrementing a0 and a1 until a0
|
||||
* equals a3.
|
||||
*/
|
||||
|
||||
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
|
||||
beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
|
||||
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
|
||||
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
|
||||
|
||||
/* When in the loop we may prefetch with the 'prepare to store' hint,
|
||||
* in this case the a0+x should not be past the "t0-32" address. This
|
||||
* means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
|
||||
* for x=64 the last "safe" a0 address is "t0-96" In the current version we
|
||||
* will use "prefetch hint,128(a0)", so "t0-160" is the limit.
|
||||
*/
|
||||
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
|
||||
PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
|
||||
#endif
|
||||
PREFETCH_FOR_LOAD (0, a1)
|
||||
PREFETCH_FOR_LOAD (1, a1)
|
||||
PREFETCH_FOR_LOAD (2, a1)
|
||||
PREFETCH_FOR_LOAD (3, a1)
|
||||
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
|
||||
PREFETCH_FOR_STORE (1, a0)
|
||||
PREFETCH_FOR_STORE (2, a0)
|
||||
PREFETCH_FOR_STORE (3, a0)
|
||||
#endif
|
||||
#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
|
||||
# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
|
||||
sltu v1,t9,a0
|
||||
bgtz v1,L(skip_set)
|
||||
nop
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
|
||||
L(skip_set):
|
||||
# else
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
|
||||
# endif
|
||||
#endif
|
||||
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
|
||||
&& (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
|
||||
# ifdef USE_DOUBLE
|
||||
PTR_ADDIU v0,v0,32
|
||||
# endif
|
||||
#endif
|
||||
L(loop16w):
|
||||
C_LD t0,UNIT(0)(a1)
|
||||
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
|
||||
bgtz v1,L(skip_pref)
|
||||
#endif
|
||||
C_LD t1,UNIT(1)(a1)
|
||||
#ifndef R6_CODE
|
||||
PREFETCH_FOR_STORE (4, a0)
|
||||
PREFETCH_FOR_STORE (5, a0)
|
||||
#else
|
||||
PREFETCH_FOR_STORE (2, a0)
|
||||
#endif
|
||||
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
|
||||
# ifdef USE_DOUBLE
|
||||
PTR_ADDIU v0,v0,32
|
||||
# endif
|
||||
#endif
|
||||
L(skip_pref):
|
||||
C_LD REG2,UNIT(2)(a1)
|
||||
C_LD REG3,UNIT(3)(a1)
|
||||
C_LD REG4,UNIT(4)(a1)
|
||||
C_LD REG5,UNIT(5)(a1)
|
||||
C_LD REG6,UNIT(6)(a1)
|
||||
C_LD REG7,UNIT(7)(a1)
|
||||
#ifndef R6_CODE
|
||||
PREFETCH_FOR_LOAD (4, a1)
|
||||
#else
|
||||
PREFETCH_FOR_LOAD (3, a1)
|
||||
#endif
|
||||
C_ST t0,UNIT(0)(a0)
|
||||
C_ST t1,UNIT(1)(a0)
|
||||
C_ST REG2,UNIT(2)(a0)
|
||||
C_ST REG3,UNIT(3)(a0)
|
||||
C_ST REG4,UNIT(4)(a0)
|
||||
C_ST REG5,UNIT(5)(a0)
|
||||
C_ST REG6,UNIT(6)(a0)
|
||||
C_ST REG7,UNIT(7)(a0)
|
||||
|
||||
C_LD t0,UNIT(8)(a1)
|
||||
C_LD t1,UNIT(9)(a1)
|
||||
C_LD REG2,UNIT(10)(a1)
|
||||
C_LD REG3,UNIT(11)(a1)
|
||||
C_LD REG4,UNIT(12)(a1)
|
||||
C_LD REG5,UNIT(13)(a1)
|
||||
C_LD REG6,UNIT(14)(a1)
|
||||
C_LD REG7,UNIT(15)(a1)
|
||||
#ifndef R6_CODE
|
||||
PREFETCH_FOR_LOAD (5, a1)
|
||||
#endif
|
||||
C_ST t0,UNIT(8)(a0)
|
||||
C_ST t1,UNIT(9)(a0)
|
||||
C_ST REG2,UNIT(10)(a0)
|
||||
C_ST REG3,UNIT(11)(a0)
|
||||
C_ST REG4,UNIT(12)(a0)
|
||||
C_ST REG5,UNIT(13)(a0)
|
||||
C_ST REG6,UNIT(14)(a0)
|
||||
C_ST REG7,UNIT(15)(a0)
|
||||
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
|
||||
bne a0,a3,L(loop16w)
|
||||
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
|
||||
move a2,t8
|
||||
|
||||
/* Here we have src and dest word-aligned but less than 64-bytes or
|
||||
* 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
|
||||
* is one. Otherwise jump down to L(chk1w) to handle the tail end of
|
||||
* the copy.
|
||||
*/
|
||||
|
||||
L(chkw):
|
||||
PREFETCH_FOR_LOAD (0, a1)
|
||||
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
|
||||
/* The t8 is the reminder count past 32-bytes */
|
||||
beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */
|
||||
nop
|
||||
C_LD t0,UNIT(0)(a1)
|
||||
C_LD t1,UNIT(1)(a1)
|
||||
C_LD REG2,UNIT(2)(a1)
|
||||
C_LD REG3,UNIT(3)(a1)
|
||||
C_LD REG4,UNIT(4)(a1)
|
||||
C_LD REG5,UNIT(5)(a1)
|
||||
C_LD REG6,UNIT(6)(a1)
|
||||
C_LD REG7,UNIT(7)(a1)
|
||||
PTR_ADDIU a1,a1,UNIT(8)
|
||||
C_ST t0,UNIT(0)(a0)
|
||||
C_ST t1,UNIT(1)(a0)
|
||||
C_ST REG2,UNIT(2)(a0)
|
||||
C_ST REG3,UNIT(3)(a0)
|
||||
C_ST REG4,UNIT(4)(a0)
|
||||
C_ST REG5,UNIT(5)(a0)
|
||||
C_ST REG6,UNIT(6)(a0)
|
||||
C_ST REG7,UNIT(7)(a0)
|
||||
PTR_ADDIU a0,a0,UNIT(8)
|
||||
|
||||
/*
|
||||
* Here we have less than 32(64) bytes to copy. Set up for a loop to
|
||||
* copy one word (or double word) at a time. Set a2 to count how many
|
||||
* bytes we have to copy after all the word (or double word) chunks are
|
||||
* copied and a3 to the dst pointer after all the (d)word chunks have
|
||||
* been copied. We will loop, incrementing a0 and a1 until a0 equals a3.
|
||||
*/
|
||||
L(chk1w):
|
||||
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
|
||||
beq a2,t8,L(lastb)
|
||||
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
|
||||
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
|
||||
|
||||
/* copying in words (4-byte or 8-byte chunks) */
|
||||
L(wordCopy_loop):
|
||||
C_LD REG3,UNIT(0)(a1)
|
||||
PTR_ADDIU a0,a0,UNIT(1)
|
||||
PTR_ADDIU a1,a1,UNIT(1)
|
||||
bne a0,a3,L(wordCopy_loop)
|
||||
C_ST REG3,UNIT(-1)(a0)
|
||||
|
||||
/* Copy the last 8 (or 16) bytes */
|
||||
L(lastb):
|
||||
blez a2,L(leave)
|
||||
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
|
||||
L(lastbloop):
|
||||
lb v1,0(a1)
|
||||
PTR_ADDIU a0,a0,1
|
||||
PTR_ADDIU a1,a1,1
|
||||
bne a0,a3,L(lastbloop)
|
||||
sb v1,-1(a0)
|
||||
L(leave):
|
||||
j ra
|
||||
nop
|
||||
|
||||
#ifndef R6_CODE
|
||||
/*
|
||||
* UNALIGNED case, got here with a3 = "negu a0"
|
||||
* This code is nearly identical to the aligned code above
|
||||
* but only the destination (not the source) gets aligned
|
||||
* so we need to do partial loads of the source followed
|
||||
* by normal stores to the destination (once we have aligned
|
||||
* the destination).
|
||||
*/
|
||||
|
||||
L(unaligned):
|
||||
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
|
||||
beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */
|
||||
PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
|
||||
|
||||
C_LDHI v1,UNIT(0)(a1)
|
||||
C_LDLO v1,UNITM1(1)(a1)
|
||||
PTR_ADDU a1,a1,a3
|
||||
C_STHI v1,UNIT(0)(a0)
|
||||
PTR_ADDU a0,a0,a3
|
||||
|
||||
/*
|
||||
* Now the destination (but not the source) is aligned
|
||||
* Set a2 to count how many bytes we have to copy after all the 64/128 byte
|
||||
* chunks are copied and a3 to the dst pointer after all the 64/128 byte
|
||||
* chunks have been copied. We will loop, incrementing a0 and a1 until a0
|
||||
* equals a3.
|
||||
*/
|
||||
|
||||
L(ua_chk16w):
|
||||
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
|
||||
beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
|
||||
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
|
||||
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
|
||||
|
||||
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
|
||||
PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
|
||||
# endif
|
||||
PREFETCH_FOR_LOAD (0, a1)
|
||||
PREFETCH_FOR_LOAD (1, a1)
|
||||
PREFETCH_FOR_LOAD (2, a1)
|
||||
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
|
||||
PREFETCH_FOR_STORE (1, a0)
|
||||
PREFETCH_FOR_STORE (2, a0)
|
||||
PREFETCH_FOR_STORE (3, a0)
|
||||
# endif
|
||||
# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
|
||||
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
sltu v1,t9,a0
|
||||
bgtz v1,L(ua_skip_set)
|
||||
nop
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
|
||||
L(ua_skip_set):
|
||||
# else
|
||||
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
|
||||
# endif
|
||||
# endif
|
||||
L(ua_loop16w):
|
||||
PREFETCH_FOR_LOAD (3, a1)
|
||||
C_LDHI t0,UNIT(0)(a1)
|
||||
C_LDHI t1,UNIT(1)(a1)
|
||||
C_LDHI REG2,UNIT(2)(a1)
|
||||
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
|
||||
sltu v1,t9,a0
|
||||
bgtz v1,L(ua_skip_pref)
|
||||
# endif
|
||||
C_LDHI REG3,UNIT(3)(a1)
|
||||
PREFETCH_FOR_STORE (4, a0)
|
||||
PREFETCH_FOR_STORE (5, a0)
|
||||
L(ua_skip_pref):
|
||||
C_LDHI REG4,UNIT(4)(a1)
|
||||
C_LDHI REG5,UNIT(5)(a1)
|
||||
C_LDHI REG6,UNIT(6)(a1)
|
||||
C_LDHI REG7,UNIT(7)(a1)
|
||||
C_LDLO t0,UNITM1(1)(a1)
|
||||
C_LDLO t1,UNITM1(2)(a1)
|
||||
C_LDLO REG2,UNITM1(3)(a1)
|
||||
C_LDLO REG3,UNITM1(4)(a1)
|
||||
C_LDLO REG4,UNITM1(5)(a1)
|
||||
C_LDLO REG5,UNITM1(6)(a1)
|
||||
C_LDLO REG6,UNITM1(7)(a1)
|
||||
C_LDLO REG7,UNITM1(8)(a1)
|
||||
PREFETCH_FOR_LOAD (4, a1)
|
||||
C_ST t0,UNIT(0)(a0)
|
||||
C_ST t1,UNIT(1)(a0)
|
||||
C_ST REG2,UNIT(2)(a0)
|
||||
C_ST REG3,UNIT(3)(a0)
|
||||
C_ST REG4,UNIT(4)(a0)
|
||||
C_ST REG5,UNIT(5)(a0)
|
||||
C_ST REG6,UNIT(6)(a0)
|
||||
C_ST REG7,UNIT(7)(a0)
|
||||
C_LDHI t0,UNIT(8)(a1)
|
||||
C_LDHI t1,UNIT(9)(a1)
|
||||
C_LDHI REG2,UNIT(10)(a1)
|
||||
C_LDHI REG3,UNIT(11)(a1)
|
||||
C_LDHI REG4,UNIT(12)(a1)
|
||||
C_LDHI REG5,UNIT(13)(a1)
|
||||
C_LDHI REG6,UNIT(14)(a1)
|
||||
C_LDHI REG7,UNIT(15)(a1)
|
||||
C_LDLO t0,UNITM1(9)(a1)
|
||||
C_LDLO t1,UNITM1(10)(a1)
|
||||
C_LDLO REG2,UNITM1(11)(a1)
|
||||
C_LDLO REG3,UNITM1(12)(a1)
|
||||
C_LDLO REG4,UNITM1(13)(a1)
|
||||
C_LDLO REG5,UNITM1(14)(a1)
|
||||
C_LDLO REG6,UNITM1(15)(a1)
|
||||
C_LDLO REG7,UNITM1(16)(a1)
|
||||
PREFETCH_FOR_LOAD (5, a1)
|
||||
C_ST t0,UNIT(8)(a0)
|
||||
C_ST t1,UNIT(9)(a0)
|
||||
C_ST REG2,UNIT(10)(a0)
|
||||
C_ST REG3,UNIT(11)(a0)
|
||||
C_ST REG4,UNIT(12)(a0)
|
||||
C_ST REG5,UNIT(13)(a0)
|
||||
C_ST REG6,UNIT(14)(a0)
|
||||
C_ST REG7,UNIT(15)(a0)
|
||||
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
|
||||
bne a0,a3,L(ua_loop16w)
|
||||
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
|
||||
move a2,t8
|
||||
|
||||
/* Here we have src and dest word-aligned but less than 64-bytes or
|
||||
* 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
|
||||
* is one. Otherwise jump down to L(ua_chk1w) to handle the tail end of
|
||||
* the copy. */
|
||||
|
||||
L(ua_chkw):
|
||||
PREFETCH_FOR_LOAD (0, a1)
|
||||
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
|
||||
/* t8 is the reminder count past 32-bytes */
|
||||
beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
|
||||
nop
|
||||
C_LDHI t0,UNIT(0)(a1)
|
||||
C_LDHI t1,UNIT(1)(a1)
|
||||
C_LDHI REG2,UNIT(2)(a1)
|
||||
C_LDHI REG3,UNIT(3)(a1)
|
||||
C_LDHI REG4,UNIT(4)(a1)
|
||||
C_LDHI REG5,UNIT(5)(a1)
|
||||
C_LDHI REG6,UNIT(6)(a1)
|
||||
C_LDHI REG7,UNIT(7)(a1)
|
||||
C_LDLO t0,UNITM1(1)(a1)
|
||||
C_LDLO t1,UNITM1(2)(a1)
|
||||
C_LDLO REG2,UNITM1(3)(a1)
|
||||
C_LDLO REG3,UNITM1(4)(a1)
|
||||
C_LDLO REG4,UNITM1(5)(a1)
|
||||
C_LDLO REG5,UNITM1(6)(a1)
|
||||
C_LDLO REG6,UNITM1(7)(a1)
|
||||
C_LDLO REG7,UNITM1(8)(a1)
|
||||
PTR_ADDIU a1,a1,UNIT(8)
|
||||
C_ST t0,UNIT(0)(a0)
|
||||
C_ST t1,UNIT(1)(a0)
|
||||
C_ST REG2,UNIT(2)(a0)
|
||||
C_ST REG3,UNIT(3)(a0)
|
||||
C_ST REG4,UNIT(4)(a0)
|
||||
C_ST REG5,UNIT(5)(a0)
|
||||
C_ST REG6,UNIT(6)(a0)
|
||||
C_ST REG7,UNIT(7)(a0)
|
||||
PTR_ADDIU a0,a0,UNIT(8)
|
||||
/*
|
||||
* Here we have less than 32(64) bytes to copy. Set up for a loop to
|
||||
* copy one word (or double word) at a time.
|
||||
*/
|
||||
L(ua_chk1w):
|
||||
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
|
||||
beq a2,t8,L(ua_smallCopy)
|
||||
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
|
||||
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
|
||||
|
||||
/* copying in words (4-byte or 8-byte chunks) */
|
||||
L(ua_wordCopy_loop):
|
||||
C_LDHI v1,UNIT(0)(a1)
|
||||
C_LDLO v1,UNITM1(1)(a1)
|
||||
PTR_ADDIU a0,a0,UNIT(1)
|
||||
PTR_ADDIU a1,a1,UNIT(1)
|
||||
bne a0,a3,L(ua_wordCopy_loop)
|
||||
C_ST v1,UNIT(-1)(a0)
|
||||
|
||||
/* Copy the last 8 (or 16) bytes */
|
||||
L(ua_smallCopy):
|
||||
beqz a2,L(leave)
|
||||
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
|
||||
L(ua_smallCopy_loop):
|
||||
lb v1,0(a1)
|
||||
PTR_ADDIU a0,a0,1
|
||||
PTR_ADDIU a1,a1,1
|
||||
bne a0,a3,L(ua_smallCopy_loop)
|
||||
sb v1,-1(a0)
|
||||
|
||||
j ra
|
||||
nop
|
||||
|
||||
#else /* R6_CODE */
|
||||
|
||||
# if __MIPSEB
|
||||
# define SWAP_REGS(X,Y) X, Y
|
||||
# define ALIGN_OFFSET(N) (N)
|
||||
# else
|
||||
# define SWAP_REGS(X,Y) Y, X
|
||||
# define ALIGN_OFFSET(N) (NSIZE-N)
|
||||
# endif
|
||||
# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
|
||||
andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
|
||||
beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
|
||||
PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
|
||||
/* (d)word chunks. */ \
|
||||
move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
|
||||
/* after word loop is finished. */ \
|
||||
PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
|
||||
PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \
|
||||
PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \
|
||||
C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \
|
||||
L(r6_ua_wordcopy##BYTEOFFSET): \
|
||||
C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \
|
||||
C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \
|
||||
PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
|
||||
PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
|
||||
move t0, t1; /* Move second part of source to first. */ \
|
||||
bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
|
||||
C_ST REG3, UNIT(-1)(a0); \
|
||||
j L(lastb); \
|
||||
nop
|
||||
|
||||
/* We are generating R6 code, the destination is 4 byte aligned and
|
||||
the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
|
||||
alignment of the source. */
|
||||
|
||||
L(r6_unaligned1):
|
||||
R6_UNALIGNED_WORD_COPY(1)
|
||||
L(r6_unaligned2):
|
||||
R6_UNALIGNED_WORD_COPY(2)
|
||||
L(r6_unaligned3):
|
||||
R6_UNALIGNED_WORD_COPY(3)
|
||||
# ifdef USE_DOUBLE
|
||||
L(r6_unaligned4):
|
||||
R6_UNALIGNED_WORD_COPY(4)
|
||||
L(r6_unaligned5):
|
||||
R6_UNALIGNED_WORD_COPY(5)
|
||||
L(r6_unaligned6):
|
||||
R6_UNALIGNED_WORD_COPY(6)
|
||||
L(r6_unaligned7):
|
||||
R6_UNALIGNED_WORD_COPY(7)
|
||||
# endif
|
||||
#endif /* R6_CODE */
|
||||
|
||||
.set at
|
||||
.set reorder
|
||||
END(MEMCPY_NAME)
|
||||
#ifndef __ANDROID__
|
||||
# ifdef _LIBC
|
||||
libc_hidden_builtin_def (MEMCPY_NAME)
|
||||
# endif
|
||||
#endif
|
328
libc/arch-mips/string/memcpy.c
Normal file
328
libc/arch-mips/string/memcpy.c
Normal file
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(UNALIGNED_INSTR_SUPPORT)
|
||||
/* does target have unaligned lw/ld/ualw/uald instructions? */
|
||||
#define UNALIGNED_INSTR_SUPPORT 0
|
||||
#if __mips_isa_rev < 6 && !__mips1
|
||||
#undef UNALIGNED_INSTR_SUPPORT
|
||||
#define UNALIGNED_INSTR_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(HW_UNALIGNED_SUPPORT)
|
||||
/* Does target have hardware support for unaligned accesses? */
|
||||
#define HW_UNALIGNED_SUPPORT 0
|
||||
#if __mips_isa_rev >= 6
|
||||
#undef HW_UNALIGNED_SUPPORT
|
||||
#define HW_UNALIGNED_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ENABLE_PREFETCH 1
|
||||
|
||||
#if ENABLE_PREFETCH
|
||||
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
|
||||
#else
|
||||
#define PREFETCH(addr)
|
||||
#endif
|
||||
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
typedef unsigned long reg_t;
|
||||
typedef struct
|
||||
{
|
||||
reg_t B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#else
|
||||
typedef unsigned long long reg_t;
|
||||
typedef struct
|
||||
{
|
||||
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
typedef union
|
||||
{
|
||||
reg_t v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
#define DO_BYTE(a, i) \
|
||||
a[i] = bw.b.B##i; \
|
||||
len--; \
|
||||
if(!len) return ret; \
|
||||
|
||||
/* This code is called when aligning a pointer, there are remaining bytes
|
||||
after doing word compares, or architecture does not have some form
|
||||
of unaligned support. */
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes (void *a, const void *b, unsigned long len, void *ret)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
unsigned char *y = (unsigned char *) b;
|
||||
unsigned long i;
|
||||
|
||||
/* 'len' might be zero here, so preloading the first two values
|
||||
before the loop may access unallocated memory. */
|
||||
for (i = 0; i < len; i++) {
|
||||
*x = *y;
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This code is called to copy only remaining bytes within word or doubleword */
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
|
||||
if(len > 0) {
|
||||
bitfields_t bw;
|
||||
bw.v = *((reg_t*) b);
|
||||
|
||||
#if __mips64
|
||||
DO_BYTE(x, 0);
|
||||
DO_BYTE(x, 1);
|
||||
DO_BYTE(x, 2);
|
||||
DO_BYTE(x, 3);
|
||||
DO_BYTE(x, 4);
|
||||
DO_BYTE(x, 5);
|
||||
DO_BYTE(x, 6);
|
||||
DO_BYTE(x, 7);
|
||||
#else
|
||||
DO_BYTE(x, 0);
|
||||
DO_BYTE(x, 1);
|
||||
DO_BYTE(x, 2);
|
||||
DO_BYTE(x, 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if !HW_UNALIGNED_SUPPORT
|
||||
#if UNALIGNED_INSTR_SUPPORT
|
||||
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
|
||||
the compiler to treat the pointer access as unaligned. */
|
||||
struct ulw
|
||||
{
|
||||
reg_t uli;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/* first pointer is not aligned while second pointer is. */
|
||||
static void *
|
||||
unaligned_words (struct ulw *a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b + 8);
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
|
||||
a[0].uli = y0;
|
||||
a[1].uli = y1;
|
||||
a[2].uli = y2;
|
||||
a[3].uli = y3;
|
||||
a[4].uli = y4;
|
||||
a[5].uli = y5;
|
||||
a[6].uli = y6;
|
||||
a[7].uli = y7;
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b + 4);
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
|
||||
a[0].uli = y0;
|
||||
a[1].uli = y1;
|
||||
a[2].uli = y2;
|
||||
a[3].uli = y3;
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
a->uli = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_remaining (a, b, bytes, ret);
|
||||
}
|
||||
#else
|
||||
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
|
||||
static void *
|
||||
unaligned_words (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
|
||||
for (i = 0; i < words; i++) {
|
||||
bitfields_t bw;
|
||||
bw.v = *((reg_t*) b);
|
||||
x = (unsigned char *) a;
|
||||
#if __mips64
|
||||
x[0] = bw.b.B0;
|
||||
x[1] = bw.b.B1;
|
||||
x[2] = bw.b.B2;
|
||||
x[3] = bw.b.B3;
|
||||
x[4] = bw.b.B4;
|
||||
x[5] = bw.b.B5;
|
||||
x[6] = bw.b.B6;
|
||||
x[7] = bw.b.B7;
|
||||
#else
|
||||
x[0] = bw.b.B0;
|
||||
x[1] = bw.b.B1;
|
||||
x[2] = bw.b.B2;
|
||||
x[3] = bw.b.B3;
|
||||
#endif
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes */
|
||||
return do_bytes_remaining (a, b, bytes, ret);
|
||||
}
|
||||
#endif /* UNALIGNED_INSTR_SUPPORT */
|
||||
#endif /* HW_UNALIGNED_SUPPORT */
|
||||
|
||||
/* both pointers are aligned, or first isn't and HW support for unaligned. */
|
||||
static void *
|
||||
aligned_words (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b + 8);
|
||||
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
|
||||
reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
|
||||
a[0] = x0;
|
||||
a[1] = x1;
|
||||
a[2] = x2;
|
||||
a[3] = x3;
|
||||
a[4] = x4;
|
||||
a[5] = x5;
|
||||
a[6] = x6;
|
||||
a[7] = x7;
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b + 4);
|
||||
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
|
||||
a[0] = x0;
|
||||
a[1] = x1;
|
||||
a[2] = x2;
|
||||
a[3] = x3;
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
*a = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_remaining (a, b, bytes, ret);
|
||||
}
|
||||
|
||||
void *
|
||||
memcpy (void *a, const void *b, size_t len) __overloadable
|
||||
{
|
||||
unsigned long bytes, words;
|
||||
void *ret = a;
|
||||
|
||||
/* shouldn't hit that often. */
|
||||
if (len < sizeof (reg_t) * 4) {
|
||||
return do_bytes (a, b, len, a);
|
||||
}
|
||||
|
||||
/* Align the second pointer to word/dword alignment.
|
||||
Note that the pointer is only 32-bits for o32/n32 ABIs. For
|
||||
n32, loads are done as 64-bit while address remains 32-bit. */
|
||||
bytes = ((unsigned long) b) % sizeof (reg_t);
|
||||
if (bytes) {
|
||||
bytes = sizeof (reg_t) - bytes;
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
do_bytes (a, b, bytes, ret);
|
||||
if (len == bytes)
|
||||
return ret;
|
||||
len -= bytes;
|
||||
a = (void *) (((unsigned char *) a) + bytes);
|
||||
b = (const void *) (((unsigned char *) b) + bytes);
|
||||
}
|
||||
|
||||
/* Second pointer now aligned. */
|
||||
words = len / sizeof (reg_t);
|
||||
bytes = len % sizeof (reg_t);
|
||||
#if HW_UNALIGNED_SUPPORT
|
||||
/* treat possible unaligned first pointer as aligned. */
|
||||
return aligned_words (a, b, words, bytes, ret);
|
||||
#else
|
||||
if (((unsigned long) a) % sizeof (reg_t) == 0) {
|
||||
return aligned_words (a, b, words, bytes, ret);
|
||||
}
|
||||
/* need to use unaligned instructions on first pointer. */
|
||||
return unaligned_words (a, b, words, bytes, ret);
|
||||
#endif
|
||||
}
|
468
libc/arch-mips/string/memmove.c
Normal file
468
libc/arch-mips/string/memmove.c
Normal file
|
@ -0,0 +1,468 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(UNALIGNED_INSTR_SUPPORT)
|
||||
/* does target have unaligned lw/ld/ualw/uald instructions? */
|
||||
#define UNALIGNED_INSTR_SUPPORT 0
|
||||
#if __mips_isa_rev < 6 && !__mips1
|
||||
#undef UNALIGNED_INSTR_SUPPORT
|
||||
#define UNALIGNED_INSTR_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(HW_UNALIGNED_SUPPORT)
|
||||
/* Does target have hardware support for unaligned accesses? */
|
||||
#define HW_UNALIGNED_SUPPORT 0
|
||||
#if __mips_isa_rev >= 6
|
||||
#undef HW_UNALIGNED_SUPPORT
|
||||
#define HW_UNALIGNED_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ENABLE_PREFETCH 1
|
||||
|
||||
#if ENABLE_PREFETCH
|
||||
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
|
||||
#else
|
||||
#define PREFETCH(addr)
|
||||
#endif
|
||||
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
typedef unsigned long reg_t;
|
||||
typedef struct
|
||||
{
|
||||
reg_t B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#else
|
||||
typedef unsigned long long reg_t;
|
||||
typedef struct
|
||||
{
|
||||
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
typedef union
|
||||
{
|
||||
reg_t v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
#define DO_BYTE(a, i) \
|
||||
a[i] = bw.b.B##i; \
|
||||
len--; \
|
||||
if(!len) return ret; \
|
||||
|
||||
/* This code is called when aligning a pointer, there are remaining bytes
|
||||
after doing word compares, or architecture does not have some form
|
||||
of unaligned support. */
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes (void *a, const void *b, unsigned long len, void *ret)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
unsigned char *y = (unsigned char *) b;
|
||||
unsigned long i;
|
||||
|
||||
/* 'len' might be zero here, so preloading the first two values
|
||||
before the loop may access unallocated memory. */
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
*x = *y;
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes_backward (void *a, const void *b, unsigned long len, void *ret)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
unsigned char *y = (unsigned char *) b;
|
||||
unsigned long i;
|
||||
|
||||
/* 'len' might be zero here, so preloading the first two values
|
||||
before the loop may access unallocated memory. */
|
||||
for (i = 0; i < len; i++) {
|
||||
*--x = *--y;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void * __attribute__ ((always_inline))
|
||||
do_bytes_aligned (void *a, const void *b, unsigned long len, void *ret)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) a;
|
||||
|
||||
if(len > 0) {
|
||||
bitfields_t bw;
|
||||
bw.v = *((reg_t*) b);
|
||||
|
||||
#if __mips64
|
||||
DO_BYTE(x, 0);
|
||||
DO_BYTE(x, 1);
|
||||
DO_BYTE(x, 2);
|
||||
DO_BYTE(x, 3);
|
||||
DO_BYTE(x, 4);
|
||||
DO_BYTE(x, 5);
|
||||
DO_BYTE(x, 6);
|
||||
DO_BYTE(x, 7);
|
||||
#else
|
||||
DO_BYTE(x, 0);
|
||||
DO_BYTE(x, 1);
|
||||
DO_BYTE(x, 2);
|
||||
DO_BYTE(x, 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if !HW_UNALIGNED_SUPPORT
|
||||
#if UNALIGNED_INSTR_SUPPORT
|
||||
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
|
||||
the compiler to treat the pointer access as unaligned. */
|
||||
struct ulw
|
||||
{
|
||||
reg_t uli;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#define STORE_UNALIGNED_8(a, b) \
|
||||
{ \
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
|
||||
reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7]; \
|
||||
a[0].uli = y0; \
|
||||
a[1].uli = y1; \
|
||||
a[2].uli = y2; \
|
||||
a[3].uli = y3; \
|
||||
a[4].uli = y4; \
|
||||
a[5].uli = y5; \
|
||||
a[6].uli = y6; \
|
||||
a[7].uli = y7; \
|
||||
}
|
||||
|
||||
#define STORE_UNALIGNED_4(a, b) \
|
||||
{ \
|
||||
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
|
||||
a[0].uli = y0; \
|
||||
a[1].uli = y1; \
|
||||
a[2].uli = y2; \
|
||||
a[3].uli = y3; \
|
||||
}
|
||||
|
||||
/* first pointer is not aligned while second pointer is. */
|
||||
static void *
|
||||
unaligned_words_forward (struct ulw *a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b + 8);
|
||||
STORE_UNALIGNED_8(a, b);
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b + 4);
|
||||
STORE_UNALIGNED_4(a, b);
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
a->uli = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_aligned (a, b, bytes, ret);
|
||||
}
|
||||
|
||||
static void *
|
||||
unaligned_words_backward (struct ulw *a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b - 16);
|
||||
a -= 8;
|
||||
b -= 8;
|
||||
STORE_UNALIGNED_8(a, b);
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b - 8);
|
||||
a -= 4;
|
||||
b -= 4;
|
||||
STORE_UNALIGNED_4(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
a -= 1;
|
||||
b -= 1;
|
||||
a->uli = *b;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_backward (a, b, bytes, ret);
|
||||
}
|
||||
|
||||
#else
|
||||
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
|
||||
static void *
|
||||
unaligned_words_forward (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
return do_bytes_aligned (a, b, (sizeof (reg_t) * words) + bytes, ret);
|
||||
}
|
||||
|
||||
static void *
|
||||
unaligned_words_backward (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
return do_bytes_backward (a, b, (sizeof (reg_t) * words) + bytes, ret);
|
||||
}
|
||||
|
||||
#endif /* UNALIGNED_INSTR_SUPPORT */
|
||||
#endif /* HW_UNALIGNED_SUPPORT */
|
||||
|
||||
/* both pointers are aligned, or first isn't and HW support for unaligned. */
|
||||
|
||||
#define STORE_ALIGNED_8(a, b) \
|
||||
{ \
|
||||
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
|
||||
reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7]; \
|
||||
a[0] = x0; \
|
||||
a[1] = x1; \
|
||||
a[2] = x2; \
|
||||
a[3] = x3; \
|
||||
a[4] = x4; \
|
||||
a[5] = x5; \
|
||||
a[6] = x6; \
|
||||
a[7] = x7; \
|
||||
}
|
||||
|
||||
#define STORE_ALIGNED_4(a, b) \
|
||||
{ \
|
||||
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
|
||||
a[0] = x0; \
|
||||
a[1] = x1; \
|
||||
a[2] = x2; \
|
||||
a[3] = x3; \
|
||||
}
|
||||
|
||||
static void *
|
||||
aligned_words_forward (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b + 8);
|
||||
STORE_ALIGNED_8(a, b);
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b + 4);
|
||||
STORE_ALIGNED_4(a, b);
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
*a = *b;
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_aligned (a, b, bytes, ret);
|
||||
}
|
||||
|
||||
|
||||
static void *
|
||||
aligned_words_backward (reg_t * a, const reg_t * b,
|
||||
unsigned long words, unsigned long bytes, void *ret)
|
||||
{
|
||||
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
|
||||
unsigned long i, words_by_8, words_by_1;
|
||||
words_by_1 = words % 8;
|
||||
words_by_8 = words >> 3;
|
||||
for (; words_by_8 > 0; words_by_8--) {
|
||||
if(words_by_8 != 1)
|
||||
PREFETCH (b - 16);
|
||||
a -= 8;
|
||||
b -= 8;
|
||||
STORE_ALIGNED_8(a, b);
|
||||
}
|
||||
#else
|
||||
unsigned long i, words_by_4, words_by_1;
|
||||
words_by_1 = words % 4;
|
||||
words_by_4 = words >> 2;
|
||||
for (; words_by_4 > 0; words_by_4--) {
|
||||
if(words_by_4 != 1)
|
||||
PREFETCH (b - 8);
|
||||
a -= 4;
|
||||
b -= 4;
|
||||
STORE_ALIGNED_4(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* do remaining words. */
|
||||
for (i = 0; i < words_by_1; i++) {
|
||||
a -= 1;
|
||||
b -= 1;
|
||||
*a = *b;
|
||||
}
|
||||
|
||||
/* mop up any remaining bytes. */
|
||||
return do_bytes_backward (a, b, bytes, ret);
|
||||
}
|
||||
|
||||
void *
|
||||
memmove (void *dst0, const void *src0, size_t length) __overloadable
|
||||
{
|
||||
unsigned long bytes, words;
|
||||
void *ret = dst0;
|
||||
|
||||
if (length == 0 || dst0 == src0) /* nothing to do */
|
||||
return dst0;
|
||||
|
||||
if ((unsigned long)dst0 < (unsigned long)src0) {
|
||||
/* Copy forwards. */
|
||||
/* This shouldn't hit that often. */
|
||||
if (length < sizeof (reg_t) * 4) {
|
||||
return do_bytes (dst0, src0, length, ret);
|
||||
}
|
||||
|
||||
/* Align the second pointer to word/dword alignment.
|
||||
Note that the pointer is only 32-bits for o32/n32 ABIs. For
|
||||
n32, loads are done as 64-bit while address remains 32-bit. */
|
||||
bytes = ((unsigned long) src0) % sizeof (reg_t);
|
||||
if (bytes) {
|
||||
bytes = sizeof (reg_t) - bytes;
|
||||
if (bytes > length)
|
||||
bytes = length;
|
||||
do_bytes (dst0, src0, bytes, ret);
|
||||
if (length == bytes)
|
||||
return ret;
|
||||
length -= bytes;
|
||||
dst0 = (void *) (((unsigned char *) dst0) + bytes);
|
||||
src0 = (const void *) (((unsigned char *) src0) + bytes);
|
||||
}
|
||||
|
||||
/* Second pointer now aligned. */
|
||||
words = length / sizeof (reg_t);
|
||||
bytes = length % sizeof (reg_t);
|
||||
#if HW_UNALIGNED_SUPPORT
|
||||
/* treat possible unaligned first pointer as aligned. */
|
||||
return aligned_words_forward (dst0, src0, words, bytes, ret);
|
||||
#else
|
||||
if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
|
||||
return aligned_words_forward (dst0, src0, words, bytes, ret);
|
||||
}
|
||||
/* need to use unaligned instructions on first pointer. */
|
||||
return unaligned_words_forward (dst0, src0, words, bytes, ret);
|
||||
#endif
|
||||
} else {
|
||||
/* Copy backwards. */
|
||||
dst0 = (void *) (((unsigned char *) dst0) + length);
|
||||
src0 = (const void *) (((unsigned char *) src0) + length);
|
||||
|
||||
/* This shouldn't hit that often. */
|
||||
if (length < sizeof (reg_t) * 4) {
|
||||
return do_bytes_backward (dst0, src0, length, ret);
|
||||
}
|
||||
|
||||
/* Align the second pointer to word/dword alignment.
|
||||
Note that the pointer is only 32-bits for o32/n32 ABIs. For
|
||||
n32, loads are done as 64-bit while address remains 32-bit. */
|
||||
bytes = ((unsigned long) src0) % sizeof (reg_t);
|
||||
if (bytes) {
|
||||
if (bytes > length)
|
||||
bytes = length;
|
||||
do_bytes_backward (dst0, src0, bytes, ret);
|
||||
if (length == bytes)
|
||||
return ret;
|
||||
length -= bytes;
|
||||
dst0 = (void *) (((unsigned char *) dst0) - bytes);
|
||||
src0 = (const void *) (((unsigned char *) src0) - bytes);
|
||||
}
|
||||
|
||||
words = length / sizeof (reg_t);
|
||||
bytes = length % sizeof (reg_t);
|
||||
#if HW_UNALIGNED_SUPPORT
|
||||
/* treat possible unaligned first pointer as aligned. */
|
||||
return aligned_words_backward ((void *)dst0, (void *)src0, words, bytes, ret);
|
||||
#else
|
||||
if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
|
||||
return aligned_words_backward (dst0, src0, words, bytes, ret);
|
||||
}
|
||||
/* need to use unaligned instructions on first pointer. */
|
||||
return unaligned_words_backward (dst0, src0, words, bytes, ret);
|
||||
#endif
|
||||
}
|
||||
}
|
|
@ -1,148 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 MIPS Technologies, Inc.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of MIPS Technologies Inc. nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __MIPS_STRING_OPS_H
|
||||
#define __MIPS_STRING_OPS_H
|
||||
/* This definition of the byte bitfields uses the
|
||||
assumption that the layout of the bitfields is
|
||||
equivalent to the layout in memory. Generally,
|
||||
for the MIPS ABIs, this is true. If you compile
|
||||
the strcmp.c file with -DSMOKE_TEST_NEW_STRCMP,
|
||||
this assumption will be tested.
|
||||
|
||||
Also, regardless of char signedness, ANSI C dictates that
|
||||
strcmp() treats each character as unsigned char. For
|
||||
strlen and the like, signedness doesn't matter.
|
||||
|
||||
Also, this code assumes that there are 8-bits per 'char'. */
|
||||
|
||||
#if __mips64
|
||||
typedef struct bits
|
||||
{
|
||||
unsigned B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#else
|
||||
typedef struct bits
|
||||
{
|
||||
unsigned B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
#ifndef _ULW
|
||||
/* for MIPS GCC, there is no unaligned builtins - so this code forces
|
||||
the compiler to treat the pointer access as unaligned. */
|
||||
struct ulw
|
||||
{
|
||||
unsigned b;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#define _ULW(__x) ((struct ulw *) ((char *)(&__x)))->b;
|
||||
#endif
|
||||
|
||||
/* This union assumes that small structures can be in registers. If
|
||||
not, then memory accesses will be done - not optimal, but ok. */
|
||||
typedef union
|
||||
{
|
||||
unsigned v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
#ifndef detect_zero
|
||||
/* __mips_dsp, __mips_dspr2, and __mips64 are predefined by
|
||||
the compiler, based on command line options. */
|
||||
#if (__mips_dsp || __mips_dspr2) && !__mips64
|
||||
#define __mips_using_dsp 1
|
||||
|
||||
/* DSP 4-lane (8 unsigned bits per line) subtract and saturate
|
||||
* Intrinsic operation. How this works:
|
||||
* Given a 4-byte string of "ABC\0", subtract this as
|
||||
* an unsigned integer from 0x01010101:
|
||||
* 0x01010101
|
||||
* - 0x41424300
|
||||
* -----------
|
||||
( 0xbfbebe01 <-- answer without saturation
|
||||
* 0x00000001 <-- answer with saturation
|
||||
* When this 4-lane vector is treated as an unsigned int value,
|
||||
* a non-zero answer indicates the presence of a zero in the
|
||||
* original 4-byte argument. */
|
||||
|
||||
typedef signed char v4i8 __attribute__ ((vector_size (4)));
|
||||
|
||||
#define detect_zero(__x,__y,__01s,__80s)\
|
||||
((unsigned) __builtin_mips_subu_s_qb((v4i8) __01s,(v4i8) __x))
|
||||
|
||||
/* sets all 4 lanes to requested byte. */
|
||||
#define set_byte_lanes(__x) ((unsigned) __builtin_mips_repl_qb(__x))
|
||||
|
||||
/* sets all 4 lanes to 0x01. */
|
||||
#define def_and_set_01(__x) unsigned __x = (unsigned) __builtin_mips_repl_qb(0x01)
|
||||
|
||||
/* sets all 4 lanes to 0x80. Not needed when subu_s.qb used. */
|
||||
#define def_and_set_80(__x) /* do nothing */
|
||||
|
||||
#else
|
||||
/* this version, originally published in the 80's, uses
|
||||
a reverse-carry-set like determination of the zero byte.
|
||||
The steps are, for __x = 0x31ff0001:
|
||||
__x - _01s = 0x30fdff00
|
||||
~__x = 0xce00fffe
|
||||
((__x - _01s) & ~__x) = 0x0000ff00
|
||||
x & _80s = 0x00008000 <- byte 3 was zero
|
||||
Some implementaions naively assume that characters are
|
||||
always 7-bit unsigned ASCII. With that assumption, the
|
||||
"& ~x" is usually discarded. Since character strings
|
||||
are 8-bit, the and is needed to catch the case of
|
||||
a false positive when the byte is 0x80. */
|
||||
|
||||
#define detect_zero(__x,__y,_01s,_80s)\
|
||||
((unsigned) (((__x) - _01s) & ~(__x)) & _80s)
|
||||
|
||||
#if __mips64
|
||||
#define def_and_set_80(__x) unsigned __x = 0x8080808080808080ul
|
||||
#define def_and_set_01(__x) unsigned __x = 0x0101010101010101ul
|
||||
#else
|
||||
#define def_and_set_80(__x) unsigned __x = 0x80808080ul
|
||||
#define def_and_set_01(__x) unsigned __x = 0x01010101ul
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* dealing with 'void *' conversions without using extra variables. */
|
||||
#define get_byte(__x,__idx) (((unsigned char *) (__x))[__idx])
|
||||
#define set_byte(__x,__idx,__fill) ((unsigned char *) (__x))[__idx] = (__fill)
|
||||
#define get_word(__x,__idx) (((unsigned *) (__x))[__idx])
|
||||
#define set_word(__x,__idx,__fill) ((unsigned *) (__x))[__idx] = (__fill)
|
||||
#define inc_ptr_as(__type,__x,__inc) __x = (void *) (((__type) __x) + (__inc))
|
||||
#define cvt_ptr_to(__type,__x) ((__type) (__x))
|
||||
|
||||
#endif
|
|
@ -1,224 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 MIPS Technologies, Inc.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of MIPS Technologies Inc. nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "mips-string-ops.h"
|
||||
|
||||
#define do_strlen_word(__av) {\
|
||||
if (detect_zero(x,x,_01s,_80s)) break;\
|
||||
x = __av;\
|
||||
cnt += sizeof (unsigned);\
|
||||
}
|
||||
|
||||
#define do_strlen_byte(__x) {\
|
||||
if ((bx.b.B##__x) == 0) break;\
|
||||
++cnt;\
|
||||
}
|
||||
|
||||
#if SMOKE_TEST_MIPS_STRLEN
|
||||
#define strlen my_strlen
|
||||
#endif
|
||||
|
||||
size_t
|
||||
strlen (const char *_a) __overloadable
|
||||
{
|
||||
int cnt = 0;
|
||||
unsigned x;
|
||||
|
||||
/* align the string to word boundary so we can do word at a time. */
|
||||
if ((cvt_ptr_to (unsigned, _a) & (sizeof (unsigned) - 1)) != 0)
|
||||
{
|
||||
if ((cvt_ptr_to (unsigned, _a) & 1) != 0)
|
||||
{
|
||||
if (get_byte (_a, 0) == 0)
|
||||
return cnt;
|
||||
/* set bit 1 so 2-bytes are checked and incremented. */
|
||||
inc_ptr_as (char *, _a, 1);
|
||||
++cnt;
|
||||
}
|
||||
if ((cvt_ptr_to (unsigned, _a) & 2) != 0)
|
||||
{
|
||||
if (get_byte (_a, 0) == 0)
|
||||
return cnt + 0;
|
||||
if (get_byte (_a, 1) == 0)
|
||||
return cnt + 1;
|
||||
inc_ptr_as (char *, _a, 2);
|
||||
cnt += 2;
|
||||
}
|
||||
}
|
||||
|
||||
#if __mips64
|
||||
#error strlen: mips64 check for 4-byte alignment not implemented.
|
||||
#endif
|
||||
|
||||
if (1)
|
||||
{
|
||||
def_and_set_01 (_01s);
|
||||
def_and_set_80 (_80s);
|
||||
|
||||
/* as advantagous as it is to performance, this code cannot pre-load
|
||||
the following word, nor can it prefetch the next line at the start
|
||||
of the loop since the string can be at the end of a page with the
|
||||
following page unmapped. There are tests in the suite to catch
|
||||
any attempt to go beyond the current word. */
|
||||
x = get_word (_a, 0);
|
||||
while (1)
|
||||
{
|
||||
/* doing 8 words should cover most strings. */
|
||||
do_strlen_word (get_word (_a, 1));
|
||||
do_strlen_word (get_word (_a, 2));
|
||||
do_strlen_word (get_word (_a, 3));
|
||||
do_strlen_word (get_word (_a, 4));
|
||||
do_strlen_word (get_word (_a, 5));
|
||||
do_strlen_word (get_word (_a, 6));
|
||||
do_strlen_word (get_word (_a, 7));
|
||||
do_strlen_word (get_word (_a, 8));
|
||||
inc_ptr_as (unsigned *, _a, 8);
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
/* pull apart the last word processed and find the zero. */
|
||||
bitfields_t bx;
|
||||
bx.v = x;
|
||||
#if __mips64
|
||||
do_strlen_byte (0);
|
||||
do_strlen_byte (1);
|
||||
do_strlen_byte (2);
|
||||
do_strlen_byte (3);
|
||||
do_strlen_byte (4);
|
||||
do_strlen_byte (5);
|
||||
do_strlen_byte (6);
|
||||
#else
|
||||
do_strlen_byte (0);
|
||||
do_strlen_byte (1);
|
||||
do_strlen_byte (2);
|
||||
#endif
|
||||
/* last byte is zero */
|
||||
break;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
#undef do_strlen_byte
|
||||
#undef do_strlen_word
|
||||
|
||||
#if SMOKE_TEST_MIPS_STRLEN
|
||||
#include <stdio.h>
|
||||
char str1[] = "DHRYSTONE PROGRAM, 1'ST STRING";
|
||||
char str2[] = "DHRYSTONE PROGRAM, 2'ST STRING";
|
||||
|
||||
char str3[] = "another string";
|
||||
char str4[] = "another";
|
||||
|
||||
char str5[] = "somes tring";
|
||||
char str6[] = "somes_tring";
|
||||
|
||||
char str7[16], str8[16];
|
||||
|
||||
static char *
|
||||
chk (unsigned mine, unsigned libs, int *errors)
|
||||
{
|
||||
static char answer[1024];
|
||||
char *result = mine == libs ? "PASS" : "FAIL";
|
||||
sprintf (answer, "new_strlen=%d: lib_strlen=%d: %s!", mine, libs, result);
|
||||
if (mine != libs)
|
||||
(*errors)++;
|
||||
return answer;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
int errors = 0;
|
||||
/* set -1 in one position */
|
||||
str6[5] = 0xff;
|
||||
/* set zero in same position with junk in following 3 */
|
||||
str7[0] = str8[0] = 0;
|
||||
str7[1] = 0xff;
|
||||
str7[2] = 'a';
|
||||
str7[3] = 2;
|
||||
str8[1] = 's';
|
||||
str8[2] = -2;
|
||||
str8[3] = 0;
|
||||
|
||||
fprintf (stderr, "========== mips_strlen%s test...\n",
|
||||
argv[0] ? argv[0] : "unknown strlen");
|
||||
#define P(__x,__y) {\
|
||||
int a = my_strlen(__x + __y);\
|
||||
int b = (strlen)(__x + __y) /* library version */;\
|
||||
fprintf(stderr,"%s+%d: %s\n",#__x,__y,chk(a,b,&errors));\
|
||||
}
|
||||
|
||||
P (str1, 0);
|
||||
P (str1, 1);
|
||||
P (str1, 2);
|
||||
P (str1, 3);
|
||||
|
||||
P (str2, 0);
|
||||
P (str2, 1);
|
||||
P (str2, 2);
|
||||
P (str2, 3);
|
||||
|
||||
P (str3, 0);
|
||||
P (str3, 1);
|
||||
P (str3, 2);
|
||||
P (str3, 3);
|
||||
|
||||
P (str4, 0);
|
||||
P (str4, 1);
|
||||
P (str4, 2);
|
||||
P (str4, 3);
|
||||
|
||||
P (str5, 0);
|
||||
P (str5, 1);
|
||||
P (str5, 2);
|
||||
P (str5, 3);
|
||||
|
||||
P (str6, 0);
|
||||
P (str6, 1);
|
||||
P (str6, 2);
|
||||
P (str6, 3);
|
||||
|
||||
P (str7, 0);
|
||||
P (str7, 1);
|
||||
P (str7, 2);
|
||||
P (str7, 3);
|
||||
|
||||
P (str8, 0);
|
||||
P (str8, 1);
|
||||
P (str8, 2);
|
||||
P (str8, 3);
|
||||
|
||||
return errors;
|
||||
}
|
||||
#endif
|
138
libc/arch-mips/string/strchr.c
Normal file
138
libc/arch-mips/string/strchr.c
Normal file
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define op_t unsigned long int
|
||||
#define op_size sizeof (op_t)
|
||||
|
||||
#if __mips64
|
||||
typedef struct
|
||||
{
|
||||
op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#else
|
||||
typedef struct
|
||||
{
|
||||
op_t B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
typedef union
|
||||
{
|
||||
op_t v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
#define DO_BYTE(i) \
|
||||
if (a.b.B##i != ch) { \
|
||||
if(a.b.B##i == '\0') return 0; \
|
||||
p++; \
|
||||
} else \
|
||||
return (char *)p;
|
||||
|
||||
#define DO_WORD(w, cnt) { \
|
||||
op_t val = w[cnt] ^ mask_c; \
|
||||
if ((((w[cnt] - mask_1) & ~w[cnt]) & mask_128) || \
|
||||
(((val - mask_1) & ~val) & mask_128)) { \
|
||||
return do_bytes(w + cnt, ch); \
|
||||
} \
|
||||
}
|
||||
|
||||
static inline char * __attribute__ ((always_inline))
|
||||
do_bytes (const op_t* w, unsigned char ch)
|
||||
{
|
||||
bitfields_t a;
|
||||
unsigned char* p = (unsigned char *) w;
|
||||
a.v = *w;
|
||||
#if __mips64
|
||||
DO_BYTE(0)
|
||||
DO_BYTE(1)
|
||||
DO_BYTE(2)
|
||||
DO_BYTE(3)
|
||||
DO_BYTE(4)
|
||||
DO_BYTE(5)
|
||||
DO_BYTE(6)
|
||||
DO_BYTE(7)
|
||||
#else
|
||||
DO_BYTE(0)
|
||||
DO_BYTE(1)
|
||||
DO_BYTE(2)
|
||||
DO_BYTE(3)
|
||||
#endif
|
||||
return (char *)p;
|
||||
}
|
||||
|
||||
char* strchr(const char* s, int c) __overloadable
|
||||
{
|
||||
const op_t *w;
|
||||
op_t mask_1, mask_128, mask_c;
|
||||
const unsigned char ch = c;
|
||||
unsigned char* p = (unsigned char *) s;
|
||||
|
||||
/*
|
||||
* Check byte by byte till initial alignment
|
||||
*/
|
||||
for ( ; *p != ch && ((size_t) p % op_size) != 0; p++)
|
||||
if (*p == '\0')
|
||||
return 0;
|
||||
|
||||
if (*p != ch) {
|
||||
w = (const op_t *) p;
|
||||
|
||||
mask_c = ch | (ch << 8);
|
||||
mask_c |= mask_c << 16;
|
||||
__asm__ volatile (
|
||||
"li %0, 0x01010101 \n\t"
|
||||
: "=r" (mask_1)
|
||||
);
|
||||
#if __mips64
|
||||
mask_1 |= mask_1 << 32;
|
||||
mask_c |= mask_c << 32;
|
||||
#endif
|
||||
mask_128 = mask_1 << 7;
|
||||
|
||||
/*
|
||||
* Check word/dword wize after initial alignment till character match
|
||||
* or end of string
|
||||
*/
|
||||
while (1) {
|
||||
DO_WORD(w, 0)
|
||||
DO_WORD(w, 1)
|
||||
DO_WORD(w, 2)
|
||||
DO_WORD(w, 3)
|
||||
w += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return (char *)p;
|
||||
}
|
|
@ -1,30 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2014
|
||||
* Imagination Technologies Limited.
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __ANDROID__
|
||||
|
@ -41,6 +44,22 @@
|
|||
# include <sys/asm.h>
|
||||
#endif
|
||||
|
||||
#if __mips64
|
||||
# define NSIZE 8
|
||||
# define LW ld
|
||||
# define EXT dext
|
||||
# define SRL dsrl
|
||||
# define SLL dsll
|
||||
# define SUBU dsubu
|
||||
#else
|
||||
# define NSIZE 4
|
||||
# define LW lw
|
||||
# define EXT ext
|
||||
# define SRL srl
|
||||
# define SLL sll
|
||||
# define SUBU subu
|
||||
#endif
|
||||
|
||||
/* Technically strcmp should not read past the end of the strings being
|
||||
compared. We will read a full word that may contain excess bits beyond
|
||||
the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
|
||||
|
@ -77,6 +96,23 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
/* It might seem better to do the 'beq' instruction between the two 'lbu'
|
||||
instructions so that the nop is not needed but testing showed that this
|
||||
code is actually faster (based on glibc strcmp test). */
|
||||
#define BYTECMP01(OFFSET) \
|
||||
lbu v0, OFFSET(a0); \
|
||||
lbu v1, OFFSET(a1); \
|
||||
beq v0, zero, L(bexit01); \
|
||||
nop; \
|
||||
bne v0, v1, L(bexit01)
|
||||
|
||||
#define BYTECMP89(OFFSET) \
|
||||
lbu t8, OFFSET(a0); \
|
||||
lbu t9, OFFSET(a1); \
|
||||
beq t8, zero, L(bexit89); \
|
||||
nop; \
|
||||
bne t8, t9, L(bexit89)
|
||||
|
||||
/* Allow the routine to be named something else if desired. */
|
||||
#ifndef STRCMP_NAME
|
||||
# define STRCMP_NAME strcmp
|
||||
|
@ -87,170 +123,236 @@ LEAF(STRCMP_NAME, 0)
|
|||
#else
|
||||
LEAF(STRCMP_NAME)
|
||||
#endif
|
||||
.set nomips16
|
||||
.set noreorder
|
||||
.set nomips16
|
||||
.set noreorder
|
||||
|
||||
or t0, a0, a1
|
||||
andi t0,0x3
|
||||
bne t0, zero, L(byteloop)
|
||||
andi t1, a1, (NSIZE - 1)
|
||||
beqz t1, L(exitalign)
|
||||
or t0, zero, NSIZE
|
||||
SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
|
||||
|
||||
/* Both strings are 4 byte aligned at this point. */
|
||||
L(alignloop): #do by bytes until a1 aligned
|
||||
BYTECMP01(0)
|
||||
SUBU t1, t1, 0x1
|
||||
PTR_ADDIU a0, a0, 0x1
|
||||
bnez t1, L(alignloop)
|
||||
PTR_ADDIU a1, a1, 0x1
|
||||
|
||||
lui t8, 0x0101
|
||||
ori t8, t8, 0x0101
|
||||
lui t9, 0x7f7f
|
||||
ori t9, 0x7f7f
|
||||
L(exitalign):
|
||||
|
||||
#define STRCMP32(OFFSET) \
|
||||
lw v0, OFFSET(a0); \
|
||||
lw v1, OFFSET(a1); \
|
||||
subu t0, v0, t8; \
|
||||
bne v0, v1, L(worddiff); \
|
||||
nor t1, v0, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(returnzero)
|
||||
/* string a1 is NSIZE byte aligned at this point. */
|
||||
|
||||
lui t8, 0x0101
|
||||
ori t8, 0x0101
|
||||
lui t9, 0x7f7f
|
||||
ori t9, 0x7f7f
|
||||
#if __mips64
|
||||
dsll t1, t8, 32
|
||||
or t8, t1
|
||||
dsll t1, t9, 32
|
||||
or t9, t1
|
||||
#endif
|
||||
|
||||
andi t2, a0, (NSIZE - 1) #check if a0 aligned
|
||||
SUBU t3, t0, t2 #t3 will be used as shifter
|
||||
bnez t2, L(uloopenter)
|
||||
SUBU a2, a0, t2 #bring back a0 to aligned position
|
||||
|
||||
#define STRCMPW(OFFSET) \
|
||||
LW v0, OFFSET(a0); \
|
||||
LW v1, OFFSET(a1); \
|
||||
SUBU t0, v0, t8; \
|
||||
bne v0, v1, L(worddiff); \
|
||||
nor t1, v0, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(returnzero);\
|
||||
|
||||
L(wordloop):
|
||||
STRCMP32(0)
|
||||
DELAY_READ
|
||||
STRCMP32(4)
|
||||
DELAY_READ
|
||||
STRCMP32(8)
|
||||
DELAY_READ
|
||||
STRCMP32(12)
|
||||
DELAY_READ
|
||||
STRCMP32(16)
|
||||
DELAY_READ
|
||||
STRCMP32(20)
|
||||
DELAY_READ
|
||||
STRCMP32(24)
|
||||
DELAY_READ
|
||||
STRCMP32(28)
|
||||
PTR_ADDIU a0, a0, 32
|
||||
b L(wordloop)
|
||||
PTR_ADDIU a1, a1, 32
|
||||
STRCMPW(0 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(1 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(2 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(3 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(4 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(5 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(6 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(7 * NSIZE)
|
||||
PTR_ADDIU a0, a0, (8 * NSIZE)
|
||||
b L(wordloop)
|
||||
PTR_ADDIU a1, a1, (8 * NSIZE)
|
||||
|
||||
#define USTRCMPW(OFFSET) \
|
||||
LW v1, OFFSET(a1); \
|
||||
SUBU t0, v0, t8; \
|
||||
nor t1, v0, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(worddiff); \
|
||||
SRL v0, t2; \
|
||||
LW a3, (OFFSET + NSIZE)(a2); \
|
||||
SUBU t0, v1, t8; \
|
||||
SLL t1, a3, t3; \
|
||||
or v0, v0, t1; \
|
||||
bne v0, v1, L(worddiff); \
|
||||
nor t1, v1, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(returnzero); \
|
||||
move v0, a3;\
|
||||
|
||||
L(uloopenter):
|
||||
LW v0, 0(a2)
|
||||
SLL t2, 3 #multiply by 8
|
||||
SLL t3, 3 #multiply by 8
|
||||
li a3, -1 #all 1s
|
||||
SRL a3, t3
|
||||
or v0, a3 #replace with all 1s if zeros in unintented read
|
||||
|
||||
L(uwordloop):
|
||||
USTRCMPW(0 * NSIZE)
|
||||
USTRCMPW(1 * NSIZE)
|
||||
USTRCMPW(2 * NSIZE)
|
||||
USTRCMPW(3 * NSIZE)
|
||||
USTRCMPW(4 * NSIZE)
|
||||
USTRCMPW(5 * NSIZE)
|
||||
USTRCMPW(6 * NSIZE)
|
||||
USTRCMPW(7 * NSIZE)
|
||||
PTR_ADDIU a2, a2, (8 * NSIZE)
|
||||
b L(uwordloop)
|
||||
PTR_ADDIU a1, a1, (8 * NSIZE)
|
||||
|
||||
L(returnzero):
|
||||
j ra
|
||||
move v0, zero
|
||||
j ra
|
||||
move v0, zero
|
||||
|
||||
#if __mips_isa_rev > 1
|
||||
#define EXT_COMPARE01(POS) \
|
||||
EXT t0, v0, POS, 8; \
|
||||
beq t0, zero, L(wexit01); \
|
||||
EXT t1, v1, POS, 8; \
|
||||
bne t0, t1, L(wexit01)
|
||||
#define EXT_COMPARE89(POS) \
|
||||
EXT t8, v0, POS, 8; \
|
||||
beq t8, zero, L(wexit89); \
|
||||
EXT t9, v1, POS, 8; \
|
||||
bne t8, t9, L(wexit89)
|
||||
#else
|
||||
#define EXT_COMPARE01(POS) \
|
||||
SRL t0, v0, POS; \
|
||||
SRL t1, v1, POS; \
|
||||
andi t0, t0, 0xff; \
|
||||
beq t0, zero, L(wexit01); \
|
||||
andi t1, t1, 0xff; \
|
||||
bne t0, t1, L(wexit01)
|
||||
#define EXT_COMPARE89(POS) \
|
||||
SRL t8, v0, POS; \
|
||||
SRL t9, v1, POS; \
|
||||
andi t8, t8, 0xff; \
|
||||
beq t8, zero, L(wexit89); \
|
||||
andi t9, t9, 0xff; \
|
||||
bne t8, t9, L(wexit89)
|
||||
#endif
|
||||
|
||||
L(worddiff):
|
||||
#ifdef USE_CLZ
|
||||
subu t0, v0, t8
|
||||
nor t1, v0, t9
|
||||
and t1, t0, t1
|
||||
xor t0, v0, v1
|
||||
or t0, t0, t1
|
||||
SUBU t0, v0, t8
|
||||
nor t1, v0, t9
|
||||
and t1, t0, t1
|
||||
xor t0, v0, v1
|
||||
or t0, t0, t1
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
wsbh t0, t0
|
||||
rotr t0, t0, 16
|
||||
wsbh t0, t0
|
||||
rotr t0, t0, 16
|
||||
# endif
|
||||
clz t1, t0
|
||||
and t1, 0xf8
|
||||
clz t1, t0
|
||||
and t1, 0xf8
|
||||
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
neg t1
|
||||
addu t1, 24
|
||||
neg t1
|
||||
addu t1, 24
|
||||
# endif
|
||||
rotrv v0, v0, t1
|
||||
rotrv v1, v1, t1
|
||||
and v0, v0, 0xff
|
||||
and v1, v1, 0xff
|
||||
j ra
|
||||
subu v0, v0, v1
|
||||
rotrv v0, v0, t1
|
||||
rotrv v1, v1, t1
|
||||
and v0, v0, 0xff
|
||||
and v1, v1, 0xff
|
||||
j ra
|
||||
SUBU v0, v0, v1
|
||||
#else /* USE_CLZ */
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
andi t0, v0, 0xff
|
||||
beq t0, zero, L(wexit01)
|
||||
andi t1, v1, 0xff
|
||||
bne t0, t1, L(wexit01)
|
||||
andi t0, v0, 0xff
|
||||
beq t0, zero, L(wexit01)
|
||||
andi t1, v1, 0xff
|
||||
bne t0, t1, L(wexit01)
|
||||
EXT_COMPARE89(8)
|
||||
EXT_COMPARE01(16)
|
||||
#ifndef __mips64
|
||||
SRL t8, v0, 24
|
||||
SRL t9, v1, 24
|
||||
#else
|
||||
EXT_COMPARE89(24)
|
||||
EXT_COMPARE01(32)
|
||||
EXT_COMPARE89(40)
|
||||
EXT_COMPARE01(48)
|
||||
SRL t8, v0, 56
|
||||
SRL t9, v1, 56
|
||||
#endif
|
||||
|
||||
srl t8, v0, 8
|
||||
srl t9, v1, 8
|
||||
andi t8, t8, 0xff
|
||||
beq t8, zero, L(wexit89)
|
||||
andi t9, t9, 0xff
|
||||
bne t8, t9, L(wexit89)
|
||||
|
||||
srl t0, v0, 16
|
||||
srl t1, v1, 16
|
||||
andi t0, t0, 0xff
|
||||
beq t0, zero, L(wexit01)
|
||||
andi t1, t1, 0xff
|
||||
bne t0, t1, L(wexit01)
|
||||
|
||||
srl t8, v0, 24
|
||||
srl t9, v1, 24
|
||||
# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
|
||||
srl t0, v0, 24
|
||||
beq t0, zero, L(wexit01)
|
||||
srl t1, v1, 24
|
||||
bne t0, t1, L(wexit01)
|
||||
#ifdef __mips64
|
||||
SRL t0, v0, 56
|
||||
beq t0, zero, L(wexit01)
|
||||
SRL t1, v1, 56
|
||||
bne t0, t1, L(wexit01)
|
||||
EXT_COMPARE89(48)
|
||||
EXT_COMPARE01(40)
|
||||
EXT_COMPARE89(32)
|
||||
EXT_COMPARE01(24)
|
||||
#else
|
||||
SRL t0, v0, 24
|
||||
beq t0, zero, L(wexit01)
|
||||
SRL t1, v1, 24
|
||||
bne t0, t1, L(wexit01)
|
||||
#endif
|
||||
EXT_COMPARE89(16)
|
||||
EXT_COMPARE01(8)
|
||||
|
||||
srl t8, v0, 16
|
||||
srl t9, v1, 16
|
||||
andi t8, t8, 0xff
|
||||
beq t8, zero, L(wexit89)
|
||||
andi t9, t9, 0xff
|
||||
bne t8, t9, L(wexit89)
|
||||
|
||||
srl t0, v0, 8
|
||||
srl t1, v1, 8
|
||||
andi t0, t0, 0xff
|
||||
beq t0, zero, L(wexit01)
|
||||
andi t1, t1, 0xff
|
||||
bne t0, t1, L(wexit01)
|
||||
|
||||
andi t8, v0, 0xff
|
||||
andi t9, v1, 0xff
|
||||
andi t8, v0, 0xff
|
||||
andi t9, v1, 0xff
|
||||
# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
|
||||
|
||||
L(wexit89):
|
||||
j ra
|
||||
subu v0, t8, t9
|
||||
j ra
|
||||
SUBU v0, t8, t9
|
||||
L(wexit01):
|
||||
j ra
|
||||
subu v0, t0, t1
|
||||
j ra
|
||||
SUBU v0, t0, t1
|
||||
#endif /* USE_CLZ */
|
||||
|
||||
/* It might seem better to do the 'beq' instruction between the two 'lbu'
|
||||
instructions so that the nop is not needed but testing showed that this
|
||||
code is actually faster (based on glibc strcmp test). */
|
||||
#define BYTECMP01(OFFSET) \
|
||||
lbu v0, OFFSET(a0); \
|
||||
lbu v1, OFFSET(a1); \
|
||||
beq v0, zero, L(bexit01); \
|
||||
nop; \
|
||||
bne v0, v1, L(bexit01)
|
||||
|
||||
#define BYTECMP89(OFFSET) \
|
||||
lbu t8, OFFSET(a0); \
|
||||
lbu t9, OFFSET(a1); \
|
||||
beq t8, zero, L(bexit89); \
|
||||
nop; \
|
||||
bne t8, t9, L(bexit89)
|
||||
|
||||
L(byteloop):
|
||||
BYTECMP01(0)
|
||||
BYTECMP89(1)
|
||||
BYTECMP01(2)
|
||||
BYTECMP89(3)
|
||||
BYTECMP01(4)
|
||||
BYTECMP89(5)
|
||||
BYTECMP01(6)
|
||||
BYTECMP89(7)
|
||||
PTR_ADDIU a0, a0, 8
|
||||
b L(byteloop)
|
||||
PTR_ADDIU a1, a1, 8
|
||||
BYTECMP01(0)
|
||||
BYTECMP89(1)
|
||||
BYTECMP01(2)
|
||||
BYTECMP89(3)
|
||||
BYTECMP01(4)
|
||||
BYTECMP89(5)
|
||||
BYTECMP01(6)
|
||||
BYTECMP89(7)
|
||||
PTR_ADDIU a0, a0, 8
|
||||
b L(byteloop)
|
||||
PTR_ADDIU a1, a1, 8
|
||||
|
||||
L(bexit01):
|
||||
j ra
|
||||
subu v0, v0, v1
|
||||
j ra
|
||||
SUBU v0, v0, v1
|
||||
L(bexit89):
|
||||
j ra
|
||||
subu v0, t8, t9
|
||||
j ra
|
||||
SUBU v0, t8, t9
|
||||
|
||||
.set at
|
||||
.set reorder
|
||||
.set at
|
||||
.set reorder
|
||||
|
||||
END(STRCMP_NAME)
|
||||
#ifndef __ANDROID__
|
||||
|
|
204
libc/arch-mips/string/strcpy.c
Normal file
204
libc/arch-mips/string/strcpy.c
Normal file
|
@ -0,0 +1,204 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define op_t unsigned long int
|
||||
|
||||
#if !defined(UNALIGNED_INSTR_SUPPORT)
|
||||
/* does target have unaligned lw/ld/ualw/uald instructions? */
|
||||
#define UNALIGNED_INSTR_SUPPORT 0
|
||||
#if __mips_isa_rev < 6 && !__mips1
|
||||
#undef UNALIGNED_INSTR_SUPPORT
|
||||
#define UNALIGNED_INSTR_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(HW_UNALIGNED_SUPPORT)
|
||||
/* Does target have hardware support for unaligned accesses? */
|
||||
#define HW_UNALIGNED_SUPPORT 0
|
||||
#if __mips_isa_rev >= 6
|
||||
#undef HW_UNALIGNED_SUPPORT
|
||||
#define HW_UNALIGNED_SUPPORT 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if __mips64
|
||||
typedef struct
|
||||
{
|
||||
op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
|
||||
} bits_t;
|
||||
#else
|
||||
typedef struct
|
||||
{
|
||||
op_t B0:8, B1:8, B2:8, B3:8;
|
||||
} bits_t;
|
||||
#endif
|
||||
|
||||
typedef union
|
||||
{
|
||||
op_t v;
|
||||
bits_t b;
|
||||
} bitfields_t;
|
||||
|
||||
#if !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT
|
||||
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
|
||||
the compiler to treat the pointer access as unaligned. */
|
||||
struct ulw
|
||||
{
|
||||
op_t uli;
|
||||
} __attribute__ ((packed));
|
||||
#endif /* !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT */
|
||||
|
||||
#define DO_BYTE(i, ptdst) { \
|
||||
*(ptdst+i) = a.b.B##i; \
|
||||
if(a.b.B##i == '\0') \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#if __mips64
|
||||
#define DO_BYTES(val, dst) { \
|
||||
bitfields_t a; \
|
||||
char *tdst = (char *)(dst); \
|
||||
a.v = val; \
|
||||
DO_BYTE(0, tdst) \
|
||||
DO_BYTE(1, tdst) \
|
||||
DO_BYTE(2, tdst) \
|
||||
DO_BYTE(3, tdst) \
|
||||
DO_BYTE(4, tdst) \
|
||||
DO_BYTE(5, tdst) \
|
||||
DO_BYTE(6, tdst) \
|
||||
DO_BYTE(7, tdst) \
|
||||
}
|
||||
#else
|
||||
#define DO_BYTES(val, dst) { \
|
||||
bitfields_t a; \
|
||||
char *tdst = (char *)(dst); \
|
||||
a.v = val; \
|
||||
DO_BYTE(0, tdst) \
|
||||
DO_BYTE(1, tdst) \
|
||||
DO_BYTE(2, tdst) \
|
||||
DO_BYTE(3, tdst) \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define DO_WORD_ALIGNED(dst, src) { \
|
||||
op_t val = *(src); \
|
||||
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
|
||||
DO_BYTES(val, dst); \
|
||||
} else *(dst) = val; \
|
||||
}
|
||||
|
||||
#if !HW_UNALIGNED_SUPPORT
|
||||
#if UNALIGNED_INSTR_SUPPORT
|
||||
#define DO_WORD_UNALIGNED(dst, src) { \
|
||||
op_t val = *(src); \
|
||||
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
|
||||
DO_BYTES(val, dst); \
|
||||
} else { \
|
||||
struct ulw *a = (struct ulw *)(dst); \
|
||||
a->uli = val; \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define DO_WORD_UNALIGNED(dst, src) { \
|
||||
op_t val = *(src); \
|
||||
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
|
||||
DO_BYTES(val, dst); \
|
||||
} else { \
|
||||
char *pdst = (char *) dst; \
|
||||
const char *psrc = (const char *) src; \
|
||||
for (; (*pdst = *psrc) != '\0'; ++psrc, ++pdst); \
|
||||
return ret; \
|
||||
} \
|
||||
}
|
||||
#endif /* UNALIGNED_INSTR_SUPPORT */
|
||||
|
||||
#define PROCESS_UNALIGNED_WORDS(a, b) { \
|
||||
while (1) { \
|
||||
DO_WORD_UNALIGNED(a, b); \
|
||||
DO_WORD_UNALIGNED(a + 1, b + 1); \
|
||||
DO_WORD_UNALIGNED(a + 2, b + 2); \
|
||||
DO_WORD_UNALIGNED(a + 3, b + 3); \
|
||||
a += 4; \
|
||||
b += 4; \
|
||||
} \
|
||||
}
|
||||
#endif /* HW_UNALIGNED_SUPPORT */
|
||||
|
||||
#define PROCESS_ALIGNED_WORDS(a, b) { \
|
||||
while (1) { \
|
||||
DO_WORD_ALIGNED(a, b); \
|
||||
DO_WORD_ALIGNED(a + 1, b + 1); \
|
||||
DO_WORD_ALIGNED(a + 2, b + 2); \
|
||||
DO_WORD_ALIGNED(a + 3, b + 3); \
|
||||
a += 4; \
|
||||
b += 4; \
|
||||
} \
|
||||
}
|
||||
|
||||
char *
|
||||
strcpy (char *to, const char *from) __overloadable
|
||||
{
|
||||
char *ret = to;
|
||||
op_t mask_1, mask_128;
|
||||
const op_t *src;
|
||||
op_t *dst;
|
||||
|
||||
for (; (*to = *from) != '\0' && ((size_t) from % sizeof (op_t)) != 0; ++from, ++to);
|
||||
|
||||
if(*to != '\0') {
|
||||
__asm__ volatile (
|
||||
"li %0, 0x01010101 \n\t"
|
||||
: "=r" (mask_1)
|
||||
);
|
||||
#if __mips64
|
||||
mask_1 |= mask_1 << 32;
|
||||
#endif
|
||||
mask_128 = mask_1 << 7;
|
||||
|
||||
src = (const op_t *) from;
|
||||
dst = (op_t *) to;
|
||||
|
||||
#if HW_UNALIGNED_SUPPORT
|
||||
PROCESS_ALIGNED_WORDS(dst, src);
|
||||
#else
|
||||
if (((unsigned long) dst) % sizeof (op_t) == 0) {
|
||||
PROCESS_ALIGNED_WORDS(dst, src);
|
||||
} else {
|
||||
PROCESS_UNALIGNED_WORDS(dst, src);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -1,43 +1,115 @@
|
|||
/* $OpenBSD: strlen.c,v 1.8 2014/06/10 04:17:37 deraadt Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
size_t
|
||||
strlen(const char *str) __overloadable
|
||||
{
|
||||
const char *s;
|
||||
#define op_t unsigned long int
|
||||
#define op_size sizeof (op_t)
|
||||
|
||||
for (s = str; *s; ++s)
|
||||
;
|
||||
return (s - str);
|
||||
#if __mips64 || __mips_isa_rev >= 2
|
||||
static inline size_t __attribute__ ((always_inline))
|
||||
do_bytes (const char *base, const char *p, op_t inval)
|
||||
{
|
||||
op_t outval = 0;
|
||||
#if __mips64
|
||||
__asm__ volatile (
|
||||
"dsbh %1, %0 \n\t"
|
||||
"dshd %0, %1 \n\t"
|
||||
"dclz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#else
|
||||
__asm__ volatile (
|
||||
"wsbh %1, %0 \n\t"
|
||||
"rotr %0, %1, 16 \n\t"
|
||||
"clz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#endif
|
||||
p += (outval >> 3);
|
||||
return (size_t) (p - base);
|
||||
}
|
||||
|
||||
#define DO_WORD(w, cnt) { \
|
||||
op_t val = ((w[cnt] - mask_1) & ~w[cnt]) & mask_128; \
|
||||
if (val) \
|
||||
return do_bytes(str, (const char *)(w + cnt), val); \
|
||||
}
|
||||
#else
|
||||
static inline size_t __attribute__ ((always_inline))
|
||||
do_bytes (const char *base, const char *p)
|
||||
{
|
||||
for (; *p; ++p);
|
||||
return (size_t) (p - base);
|
||||
}
|
||||
|
||||
#define DO_WORD(w, cnt) { \
|
||||
if (((w[cnt] - mask_1) & ~w[cnt]) & mask_128) \
|
||||
return do_bytes(str, (const char *)(w + cnt)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t
|
||||
strlen (const char *str) __overloadable
|
||||
{
|
||||
if (*str) {
|
||||
const char *p = (const char *) str;
|
||||
const op_t *w;
|
||||
op_t mask_1, mask_128;
|
||||
|
||||
while ((size_t) p % sizeof (op_t)) {
|
||||
if (!(*p))
|
||||
return (p - str);
|
||||
p++;
|
||||
}
|
||||
|
||||
__asm__ volatile (
|
||||
"li %0, 0x01010101 \n\t"
|
||||
: "=r" (mask_1)
|
||||
);
|
||||
#if __mips64
|
||||
mask_1 |= mask_1 << 32;
|
||||
#endif
|
||||
mask_128 = mask_1 << 7;
|
||||
|
||||
w = (const op_t *) p;
|
||||
|
||||
while (1) {
|
||||
DO_WORD(w, 0);
|
||||
DO_WORD(w, 1);
|
||||
DO_WORD(w, 2);
|
||||
DO_WORD(w, 3);
|
||||
w += 4;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
401
libc/arch-mips/string/strncmp.S
Normal file
401
libc/arch-mips/string/strncmp.S
Normal file
|
@ -0,0 +1,401 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __ANDROID__
|
||||
# include <private/bionic_asm.h>
|
||||
#elif _LIBC
|
||||
# include <sysdep.h>
|
||||
# include <regdef.h>
|
||||
# include <sys/asm.h>
|
||||
#elif _COMPILING_NEWLIB
|
||||
# include "machine/asm.h"
|
||||
# include "machine/regdef.h"
|
||||
#else
|
||||
# include <regdef.h>
|
||||
# include <sys/asm.h>
|
||||
#endif
|
||||
|
||||
#if __mips64
|
||||
# define NSIZE 8
|
||||
# define LW ld
|
||||
# define LWR ldr
|
||||
# define LWL ldl
|
||||
# define EXT dext
|
||||
# define SRL dsrl
|
||||
# define SUBU dsubu
|
||||
#else
|
||||
# define NSIZE 4
|
||||
# define LW lw
|
||||
# define LWR lwr
|
||||
# define LWL lwl
|
||||
# define EXT ext
|
||||
# define SRL srl
|
||||
# define SUBU subu
|
||||
#endif
|
||||
|
||||
/* Technically strcmp should not read past the end of the strings being
|
||||
compared. We will read a full word that may contain excess bits beyond
|
||||
the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
|
||||
read the next word after the end of string. Setting ENABLE_READAHEAD will
|
||||
improve performance but is technically illegal based on the definition of
|
||||
strcmp. */
|
||||
#ifdef ENABLE_READAHEAD
|
||||
# define DELAY_READ
|
||||
#else
|
||||
# define DELAY_READ nop
|
||||
#endif
|
||||
|
||||
/* Testing on a little endian machine showed using CLZ was a
|
||||
performance loss, so we are not turning it on by default. */
|
||||
#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
|
||||
# define USE_CLZ
|
||||
#endif
|
||||
|
||||
/* Some asm.h files do not have the L macro definition. */
|
||||
#ifndef L
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define L(label) $L ## label
|
||||
# else
|
||||
# define L(label) .L ## label
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Some asm.h files do not have the PTR_ADDIU macro definition. */
|
||||
#ifndef PTR_ADDIU
|
||||
# if _MIPS_SIM == _ABIO32
|
||||
# define PTR_ADDIU addiu
|
||||
# else
|
||||
# define PTR_ADDIU daddiu
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* It might seem better to do the 'beq' instruction between the two 'lbu'
|
||||
instructions so that the nop is not needed but testing showed that this
|
||||
code is actually faster (based on glibc strcmp test). */
|
||||
#define BYTECMP01(OFFSET) \
|
||||
lbu v0, OFFSET(a0); \
|
||||
lbu v1, OFFSET(a1); \
|
||||
beq v0, zero, L(bexit01); \
|
||||
nop; \
|
||||
bne v0, v1, L(bexit01)
|
||||
|
||||
#define BYTECMP89(OFFSET) \
|
||||
lbu t8, OFFSET(a0); \
|
||||
lbu t9, OFFSET(a1); \
|
||||
beq t8, zero, L(bexit89); \
|
||||
nop; \
|
||||
bne t8, t9, L(bexit89)
|
||||
|
||||
/* Allow the routine to be named something else if desired. */
|
||||
#ifndef STRNCMP_NAME
|
||||
# define STRNCMP_NAME strncmp
|
||||
#endif
|
||||
|
||||
#ifdef __ANDROID__
|
||||
LEAF(STRNCMP_NAME, 0)
|
||||
#else
|
||||
LEAF(STRNCMP_NAME)
|
||||
#endif
|
||||
.set nomips16
|
||||
.set noreorder
|
||||
|
||||
srl t0, a2, (2 + NSIZE / 4)
|
||||
beqz t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
|
||||
andi t1, a1, (NSIZE - 1)
|
||||
beqz t1, L(exitalign)
|
||||
or t0, zero, NSIZE
|
||||
SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
|
||||
SUBU a2, a2, t1 #dec count by t1
|
||||
|
||||
L(alignloop): #do by bytes until a1 aligned
|
||||
BYTECMP01(0)
|
||||
SUBU t1, t1, 0x1
|
||||
PTR_ADDIU a0, a0, 0x1
|
||||
bne t1, zero, L(alignloop)
|
||||
PTR_ADDIU a1, a1, 0x1
|
||||
|
||||
L(exitalign):
|
||||
|
||||
/* string a1 is NSIZE byte aligned at this point. */
|
||||
#ifndef __mips1
|
||||
lui t8, 0x0101
|
||||
ori t8, 0x0101
|
||||
lui t9, 0x7f7f
|
||||
ori t9, 0x7f7f
|
||||
#if __mips64
|
||||
dsll t0, t8, 32
|
||||
or t8, t0
|
||||
dsll t1, t9, 32
|
||||
or t9, t1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* hardware or software alignment not supported for mips1
|
||||
rev6 archs have h/w unaligned support
|
||||
remainings archs need to implemented with unaligned instructions */
|
||||
|
||||
#if __mips1
|
||||
andi t0, a0, (NSIZE - 1)
|
||||
bne t0, zero, L(byteloop)
|
||||
#elif __mips_isa_rev < 6
|
||||
andi t0, a0, (NSIZE - 1)
|
||||
bne t0, zero, L(uwordloop)
|
||||
#endif
|
||||
|
||||
#define STRCMPW(OFFSET) \
|
||||
LW v0, (OFFSET)(a0); \
|
||||
LW v1, (OFFSET)(a1); \
|
||||
SUBU t0, v0, t8; \
|
||||
bne v0, v1, L(worddiff); \
|
||||
nor t1, v0, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(returnzero);\
|
||||
|
||||
L(wordloop):
|
||||
SUBU t1, a2, (8 * NSIZE)
|
||||
bltz t1, L(onewords)
|
||||
STRCMPW(0 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(1 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(2 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(3 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(4 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(5 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(6 * NSIZE)
|
||||
DELAY_READ
|
||||
STRCMPW(7 * NSIZE)
|
||||
SUBU a2, a2, (8 * NSIZE)
|
||||
PTR_ADDIU a0, a0, (8 * NSIZE)
|
||||
b L(wordloop)
|
||||
PTR_ADDIU a1, a1, (8 * NSIZE)
|
||||
|
||||
L(onewords):
|
||||
SUBU t1, a2, NSIZE
|
||||
bltz t1, L(byteloop)
|
||||
STRCMPW(0)
|
||||
SUBU a2, a2, NSIZE
|
||||
PTR_ADDIU a0, a0, NSIZE
|
||||
b L(onewords)
|
||||
PTR_ADDIU a1, a1, NSIZE
|
||||
|
||||
#if __mips_isa_rev < 6 && !__mips1
|
||||
#define USTRCMPW(OFFSET) \
|
||||
LWR v0, (OFFSET)(a0); \
|
||||
LWL v0, (OFFSET + NSIZE - 1)(a0); \
|
||||
LW v1, (OFFSET)(a1); \
|
||||
SUBU t0, v0, t8; \
|
||||
bne v0, v1, L(worddiff); \
|
||||
nor t1, v0, t9; \
|
||||
and t0, t0, t1; \
|
||||
bne t0, zero, L(returnzero);\
|
||||
|
||||
L(uwordloop):
|
||||
SUBU t1, a2, (8 * NSIZE)
|
||||
bltz t1, L(uonewords)
|
||||
USTRCMPW(0 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(1 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(2 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(3 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(4 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(5 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(6 * NSIZE)
|
||||
DELAY_READ
|
||||
USTRCMPW(7 * NSIZE)
|
||||
SUBU a2, a2, (8 * NSIZE)
|
||||
PTR_ADDIU a0, a0, (8 * NSIZE)
|
||||
b L(uwordloop)
|
||||
PTR_ADDIU a1, a1, (8 * NSIZE)
|
||||
|
||||
L(uonewords):
|
||||
SUBU t1, a2, NSIZE
|
||||
bltz t1, L(byteloop)
|
||||
USTRCMPW(0)
|
||||
SUBU a2, a2, NSIZE
|
||||
PTR_ADDIU a0, a0, NSIZE
|
||||
b L(uonewords)
|
||||
PTR_ADDIU a1, a1, NSIZE
|
||||
|
||||
#endif
|
||||
|
||||
L(returnzero):
|
||||
j ra
|
||||
move v0, zero
|
||||
|
||||
#if __mips_isa_rev > 1
|
||||
#define EXT_COMPARE01(POS) \
|
||||
EXT t0, v0, POS, 8; \
|
||||
beq t0, zero, L(wexit01); \
|
||||
EXT t1, v1, POS, 8; \
|
||||
bne t0, t1, L(wexit01)
|
||||
#define EXT_COMPARE89(POS) \
|
||||
EXT t8, v0, POS, 8; \
|
||||
beq t8, zero, L(wexit89); \
|
||||
EXT t9, v1, POS, 8; \
|
||||
bne t8, t9, L(wexit89)
|
||||
#else
|
||||
#define EXT_COMPARE01(POS) \
|
||||
SRL t0, v0, POS; \
|
||||
SRL t1, v1, POS; \
|
||||
andi t0, t0, 0xff; \
|
||||
beq t0, zero, L(wexit01); \
|
||||
andi t1, t1, 0xff; \
|
||||
bne t0, t1, L(wexit01)
|
||||
#define EXT_COMPARE89(POS) \
|
||||
SRL t8, v0, POS; \
|
||||
SRL t9, v1, POS; \
|
||||
andi t8, t8, 0xff; \
|
||||
beq t8, zero, L(wexit89); \
|
||||
andi t9, t9, 0xff; \
|
||||
bne t8, t9, L(wexit89)
|
||||
#endif
|
||||
|
||||
L(worddiff):
|
||||
#ifdef USE_CLZ
|
||||
SUBU t0, v0, t8
|
||||
nor t1, v0, t9
|
||||
and t1, t0, t1
|
||||
xor t0, v0, v1
|
||||
or t0, t0, t1
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
wsbh t0, t0
|
||||
rotr t0, t0, 16
|
||||
# endif
|
||||
clz t1, t0
|
||||
and t1, 0xf8
|
||||
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
neg t1
|
||||
addu t1, 24
|
||||
# endif
|
||||
rotrv v0, v0, t1
|
||||
rotrv v1, v1, t1
|
||||
and v0, v0, 0xff
|
||||
and v1, v1, 0xff
|
||||
j ra
|
||||
SUBU v0, v0, v1
|
||||
#else /* USE_CLZ */
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
andi t0, v0, 0xff
|
||||
beq t0, zero, L(wexit01)
|
||||
andi t1, v1, 0xff
|
||||
bne t0, t1, L(wexit01)
|
||||
EXT_COMPARE89(8)
|
||||
EXT_COMPARE01(16)
|
||||
#ifndef __mips64
|
||||
SRL t8, v0, 24
|
||||
SRL t9, v1, 24
|
||||
#else
|
||||
EXT_COMPARE89(24)
|
||||
EXT_COMPARE01(32)
|
||||
EXT_COMPARE89(40)
|
||||
EXT_COMPARE01(48)
|
||||
SRL t8, v0, 56
|
||||
SRL t9, v1, 56
|
||||
#endif
|
||||
|
||||
# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
|
||||
#ifdef __mips64
|
||||
SRL t0, v0, 56
|
||||
beq t0, zero, L(wexit01)
|
||||
SRL t1, v1, 56
|
||||
bne t0, t1, L(wexit01)
|
||||
EXT_COMPARE89(48)
|
||||
EXT_COMPARE01(40)
|
||||
EXT_COMPARE89(32)
|
||||
EXT_COMPARE01(24)
|
||||
#else
|
||||
SRL t0, v0, 24
|
||||
beq t0, zero, L(wexit01)
|
||||
SRL t1, v1, 24
|
||||
bne t0, t1, L(wexit01)
|
||||
#endif
|
||||
EXT_COMPARE89(16)
|
||||
EXT_COMPARE01(8)
|
||||
|
||||
andi t8, v0, 0xff
|
||||
andi t9, v1, 0xff
|
||||
# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
|
||||
|
||||
L(wexit89):
|
||||
j ra
|
||||
SUBU v0, t8, t9
|
||||
L(wexit01):
|
||||
j ra
|
||||
SUBU v0, t0, t1
|
||||
#endif /* USE_CLZ */
|
||||
|
||||
L(byteloop):
|
||||
beq a2, zero, L(returnzero)
|
||||
SUBU a2, a2, 1
|
||||
BYTECMP01(0)
|
||||
nop
|
||||
beq a2, zero, L(returnzero)
|
||||
SUBU a2, a2, 1
|
||||
BYTECMP89(1)
|
||||
nop
|
||||
beq a2, zero, L(returnzero)
|
||||
SUBU a2, a2, 1
|
||||
BYTECMP01(2)
|
||||
nop
|
||||
beq a2, zero, L(returnzero)
|
||||
SUBU a2, a2, 1
|
||||
BYTECMP89(3)
|
||||
PTR_ADDIU a0, a0, 4
|
||||
b L(byteloop)
|
||||
PTR_ADDIU a1, a1, 4
|
||||
|
||||
L(bexit01):
|
||||
j ra
|
||||
SUBU v0, v0, v1
|
||||
L(bexit89):
|
||||
j ra
|
||||
SUBU v0, t8, t9
|
||||
|
||||
.set at
|
||||
.set reorder
|
||||
|
||||
END(STRNCMP_NAME)
|
||||
#ifndef __ANDROID__
|
||||
# ifdef _LIBC
|
||||
libc_hidden_builtin_def (STRNCMP_NAME)
|
||||
# endif
|
||||
#endif
|
139
libc/arch-mips/string/strnlen.c
Normal file
139
libc/arch-mips/string/strnlen.c
Normal file
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Imagination Technologies.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with
|
||||
* the distribution.
|
||||
* * Neither the name of Imagination Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define op_t unsigned long int
|
||||
#define op_size sizeof (op_t)
|
||||
|
||||
#if __mips64 || __mips_isa_rev >= 2
|
||||
static inline size_t __attribute__ ((always_inline))
|
||||
do_bytes (const char *base, const char *p, op_t inval)
|
||||
{
|
||||
op_t outval = 0;
|
||||
#if __mips64
|
||||
__asm__ volatile (
|
||||
"dsbh %1, %0 \n\t"
|
||||
"dshd %0, %1 \n\t"
|
||||
"dclz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#else
|
||||
__asm__ volatile (
|
||||
"wsbh %1, %0 \n\t"
|
||||
"rotr %0, %1, 16 \n\t"
|
||||
"clz %1, %0 \n\t"
|
||||
: "+r" (inval), "+r" (outval)
|
||||
);
|
||||
#endif
|
||||
p += (outval >> 3);
|
||||
return (size_t) (p - base);
|
||||
}
|
||||
|
||||
#define DO_WORD(in, val) { \
|
||||
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
|
||||
if (tmp) \
|
||||
return do_bytes(str, (const char *)(in), tmp); \
|
||||
}
|
||||
#else
|
||||
static inline size_t __attribute__ ((always_inline))
|
||||
do_bytes (const char *base, const char *p)
|
||||
{
|
||||
for (; *p; ++p);
|
||||
return (size_t) (p - base);
|
||||
}
|
||||
|
||||
#define DO_WORD(in, val) { \
|
||||
if (((val - mask_1) & ~val) & mask_128) { \
|
||||
return do_bytes(str, (const char *)(in)); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t strnlen (const char *str, size_t n) {
|
||||
if (n != 0) {
|
||||
const char *p = (const char *) str;
|
||||
const op_t *w;
|
||||
op_t mask_1, mask_128;
|
||||
|
||||
for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
|
||||
if (!(*p))
|
||||
return (p - str);
|
||||
}
|
||||
|
||||
w = (const op_t *) p;
|
||||
|
||||
__asm__ volatile (
|
||||
"li %0, 0x01010101 \n\t"
|
||||
: "=r" (mask_1)
|
||||
);
|
||||
#if __mips64
|
||||
mask_1 |= mask_1 << 32;
|
||||
#endif
|
||||
mask_128 = mask_1 << 7;
|
||||
|
||||
/*
|
||||
* Check op_size byteswize after initial alignment
|
||||
*/
|
||||
while (n >= 4 * op_size) {
|
||||
const op_t w0 = w[0];
|
||||
const op_t w1 = w[1];
|
||||
const op_t w2 = w[2];
|
||||
const op_t w3 = w[3];
|
||||
DO_WORD(w + 0, w0)
|
||||
DO_WORD(w + 1, w1)
|
||||
DO_WORD(w + 2, w2)
|
||||
DO_WORD(w + 3, w3)
|
||||
w += 4;
|
||||
n -= 4 * op_size;
|
||||
}
|
||||
|
||||
while (n >= op_size) {
|
||||
DO_WORD(w, w[0]);
|
||||
w++;
|
||||
n -= op_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check bytewize for remaining bytes
|
||||
*/
|
||||
p = (const char *) w;
|
||||
for (; n > 0; --n, ++p) {
|
||||
if (!(*p))
|
||||
return (p - str);
|
||||
}
|
||||
|
||||
return (p - str);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue