Merge "Add libc optimizations to bionic for MIPS"

This commit is contained in:
Treehugger Robot 2017-03-17 15:29:13 +00:00 committed by Gerrit Code Review
commit 9cb82a2c6b
15 changed files with 2628 additions and 1546 deletions

View file

@ -632,7 +632,22 @@ cc_library_static {
"upstream-openbsd/lib/libc/string/strncmp.c",
],
},
mips: {
exclude_srcs: [
"upstream-openbsd/lib/libc/string/memchr.c",
"upstream-openbsd/lib/libc/string/memmove.c",
"upstream-openbsd/lib/libc/string/strcpy.c",
"upstream-openbsd/lib/libc/string/strncmp.c",
],
},
mips64: {
exclude_srcs: [
"upstream-openbsd/lib/libc/string/memchr.c",
"upstream-openbsd/lib/libc/string/memmove.c",
"upstream-openbsd/lib/libc/string/strcpy.c",
"upstream-openbsd/lib/libc/string/strncmp.c",
],
},
x86: {
exclude_srcs: [
"upstream-openbsd/lib/libc/string/memchr.c",
@ -1041,9 +1056,16 @@ cc_library_static {
mips: {
srcs: [
"arch-mips/string/memcmp.c",
"arch-mips/string/memcpy.S",
"arch-mips/string/memcpy.c",
"arch-mips/string/memset.S",
"arch-mips/string/strcmp.S",
"arch-mips/string/strncmp.S",
"arch-mips/string/strlen.c",
"arch-mips/string/strnlen.c",
"arch-mips/string/strchr.c",
"arch-mips/string/strcpy.c",
"arch-mips/string/memchr.c",
"arch-mips/string/memmove.c",
"arch-mips/bionic/__bionic_clone.S",
"arch-mips/bionic/cacheflush.cpp",
@ -1052,25 +1074,25 @@ cc_library_static {
"arch-mips/bionic/setjmp.S",
"arch-mips/bionic/syscall.S",
"arch-mips/bionic/vfork.S",
"arch-mips/string/mips_strlen.c",
],
rev6: {
srcs: [
"arch-mips/string/strlen.c",
],
exclude_srcs: [
"arch-mips/string/mips_strlen.c",
],
},
exclude_srcs: [
"bionic/strchr.cpp",
"bionic/strnlen.c",
],
},
mips64: {
srcs: [
"arch-mips/string/memcmp.c",
"arch-mips/string/memcpy.S",
"arch-mips/string/memcpy.c",
"arch-mips/string/memset.S",
"arch-mips/string/strcmp.S",
"arch-mips/string/strncmp.S",
"arch-mips/string/strlen.c",
"arch-mips/string/strnlen.c",
"arch-mips/string/strchr.c",
"arch-mips/string/strcpy.c",
"arch-mips/string/memchr.c",
"arch-mips/string/memmove.c",
"arch-mips64/bionic/__bionic_clone.S",
"arch-mips64/bionic/_exit_with_stack_teardown.S",
@ -1079,6 +1101,10 @@ cc_library_static {
"arch-mips64/bionic/vfork.S",
"arch-mips64/bionic/stat.cpp",
],
exclude_srcs: [
"bionic/strchr.cpp",
"bionic/strnlen.c",
],
},
x86: {

View file

@ -4816,38 +4816,6 @@ Optimized by Bruce D. Evans.
-------------------------------------------------------------------
Copyright (c) 2010 MIPS Technologies, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with
the distribution.
* Neither the name of MIPS Technologies Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2010 The NetBSD Foundation, Inc.
All rights reserved.
@ -5344,35 +5312,6 @@ Copyright (c) 2012-2013, Linaro Limited
-------------------------------------------------------------------
Copyright (c) 2012-2015
MIPS Technologies, Inc., California.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the MIPS Technologies, Inc., nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2013
MIPS Technologies, Inc., California.
@ -5586,35 +5525,6 @@ Copyright (c) 2013-2015, Linaro Limited
-------------------------------------------------------------------
Copyright (c) 2014
Imagination Technologies Limited.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the MIPS Technologies, Inc., nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
Copyright (c) 2014 Bob Beck <beck@obtuse.com>
@ -5750,6 +5660,38 @@ SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2017 Imagination Technologies.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with
the distribution.
* Neither the name of Imagination Technologies nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c)1999 Citrus Project,
All rights reserved.

View file

@ -0,0 +1,185 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#define ENABLE_PREFETCH 1
#define op_t unsigned long int
#define op_size sizeof (op_t)
#if ENABLE_PREFETCH
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
#else
#define PREFETCH(addr)
#endif
#if __mips64 || __mips_isa_rev >= 2
static inline void * __attribute__ ((always_inline))
do_bytes (const op_t* w, op_t inval)
{
const unsigned char *p = (const unsigned char *) w;
op_t outval = 0;
#if __mips64
__asm__ volatile (
"dsbh %1, %0 \n\t"
"dshd %0, %1 \n\t"
"dclz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#else
__asm__ volatile (
"wsbh %1, %0 \n\t"
"rotr %0, %1, 16 \n\t"
"clz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#endif
p += (outval >> 3);
return (void *) p;
}
#define DO_WORD(in, val) { \
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
if (tmp != 0) \
return do_bytes(in, tmp); \
}
#else
static inline void * __attribute__ ((always_inline))
do_bytes (const op_t* w, unsigned char ch)
{
const unsigned char *p = (const unsigned char *) w;
for (; *p != ch; ++p);
return (void *) p;
}
#define DO_WORD(in, val) { \
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
if (tmp != 0) \
return do_bytes(in, ch); \
}
#endif
#define DO_WORDS(w) { \
op_t* w1 = (op_t*) w; \
op_t val0 = w1[0] ^ mask_c; \
op_t val1 = w1[1] ^ mask_c; \
op_t val2 = w1[2] ^ mask_c; \
op_t val3 = w1[3] ^ mask_c; \
DO_WORD(w1, val0) \
DO_WORD(w1 + 1, val1) \
DO_WORD(w1 + 2, val2) \
DO_WORD(w1 + 3, val3) \
}
void *
memchr (void const *s, int c_in, size_t n) __overloadable
{
if (n != 0) {
const unsigned char *p = (const unsigned char *) s;
const op_t *w;
op_t mask_1, mask_128, mask_c;
unsigned char ch = (unsigned char) c_in;
/*
* Check bytewize till initial alignment
*/
for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
if (*p == ch)
return (void *) p;
}
w = (const op_t *) p;
mask_c = ch | (ch << 8);
mask_c |= mask_c << 16;
__asm__ volatile (
"li %0, 0x01010101 \n\t"
: "=r" (mask_1)
);
#if __mips64
mask_1 |= mask_1 << 32;
mask_c |= mask_c << 32;
#endif
mask_128 = mask_1 << 7;
/*
* Check op_size byteswize after initial alignment
*/
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
PREFETCH (w);
PREFETCH (w + 8);
while (n >= 24 * op_size) {
PREFETCH(w + 16);
DO_WORDS(w);
DO_WORDS(w + 4);
w += 8;
n -= 8 * op_size;
}
while (n >= 8 * op_size) {
DO_WORDS(w);
DO_WORDS(w + 4);
w += 8;
n -= 8 * op_size;
}
#else
PREFETCH (w);
PREFETCH (w + 4);
while (n >= 12 * op_size) {
PREFETCH(w + 8);
DO_WORDS(w);
w += 4;
n -= 4 * op_size;
}
while (n >= 4 * op_size) {
DO_WORDS(w);
w += 4;
n -= 4 * op_size;
}
#endif
while (n >= op_size) {
op_t val = *w ^ mask_c;
DO_WORD(w, val);
w++;
n -= op_size;
}
/*
* Check bytewize for remaining bytes
*/
p = (const unsigned char *) w;
for (; n > 0; --n, ++p) {
if (*p == ch)
return (void *) p;
}
}
return NULL;
}

View file

@ -1,51 +1,352 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdint.h>
int memcmp(const void *s1, const void *s2, size_t n)
#define ENABLE_PREFETCH 1
#define STRNG(X) #X
#define PREFETCH(src_ptr, offset) \
asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
#if !defined(UNALIGNED_INSTR_SUPPORT)
/* does target have unaligned lw/ld/ualw/uald instructions? */
#define UNALIGNED_INSTR_SUPPORT 0
#if __mips_isa_rev < 6 && !__mips1
#undef UNALIGNED_INSTR_SUPPORT
#define UNALIGNED_INSTR_SUPPORT 1
#endif
#endif
#if !defined(HW_UNALIGNED_SUPPORT)
/* Does target have hardware support for unaligned accesses? */
#define HW_UNALIGNED_SUPPORT 0
#if __mips_isa_rev >= 6
#undef HW_UNALIGNED_SUPPORT
#define HW_UNALIGNED_SUPPORT 1
#endif
#endif
#define SIZEOF_reg_t 4
#if _MIPS_SIM == _ABIO32
typedef unsigned long reg_t;
typedef struct bits
{
const unsigned char* p1 = s1;
const unsigned char* end1 = p1 + n;
const unsigned char* p2 = s2;
int d = 0;
reg_t B0:8, B1:8, B2:8, B3:8;
} bits_t;
#else
#undef SIZEOF_reg_t
#define SIZEOF_reg_t 8
typedef unsigned long long reg_t;
typedef struct bits
{
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#endif
for (;;) {
if (d || p1 >= end1) break;
d = (int)*p1++ - (int)*p2++;
/* This union assumes that small structures can be in registers. If
not, then memory accesses will be done - not optimal, but ok. */
typedef union
{
reg_t v;
bits_t b;
} bitfields_t;
if (d || p1 >= end1) break;
d = (int)*p1++ - (int)*p2++;
#define do_bitfield(__i) \
if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
if (d || p1 >= end1) break;
d = (int)*p1++ - (int)*p2++;
if (d || p1 >= end1) break;
d = (int)*p1++ - (int)*p2++;
}
return d;
/* pull apart the words to find the first differing unsigned byte. */
static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
{
bitfields_t x, y;
x.v = a;
y.v = b;
do_bitfield (0);
do_bitfield (1);
do_bitfield (2);
#if SIZEOF_reg_t == 4
return x.b.B3 - y.b.B3;
#else
do_bitfield (3);
do_bitfield (4);
do_bitfield (5);
do_bitfield (6);
return x.b.B7 - y.b.B7;
#endif
}
/* This code is called when aligning a pointer, there are remaining bytes
after doing word compares, or architecture does not have some form
of unaligned support. */
static inline int __attribute__ ((always_inline))
do_bytes (const void *a, const void *b, unsigned long len)
{
unsigned char *x = (unsigned char *) a;
unsigned char *y = (unsigned char *) b;
unsigned long i;
/* 'len' might be zero here, so preloading the first two values
before the loop may access unallocated memory. */
for (i = 0; i < len; i++) {
if (*x != *y)
return *x - *y;
x++;
y++;
}
return 0;
}
#if !HW_UNALIGNED_SUPPORT
#if UNALIGNED_INSTR_SUPPORT
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
the compiler to treat the pointer access as unaligned. */
struct ulw
{
reg_t uli;
} __attribute__ ((packed));
/* first pointer is not aligned while second pointer is. */
static int unaligned_words (const struct ulw *a, const reg_t *b,
unsigned long words, unsigned long bytes)
{
#if ENABLE_PREFETCH
/* prefetch pointer aligned to 32 byte boundary */
const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
#endif
for (; words >= 16; words -= 8) {
#if ENABLE_PREFETCH
pref_ptr += 8;
PREFETCH(pref_ptr, 0);
PREFETCH(pref_ptr, 32);
pref_ptr_a += 8;
PREFETCH(pref_ptr_a, 0);
PREFETCH(pref_ptr_a, 32);
#endif
reg_t x0 = a[0].uli, x1 = a[1].uli;
reg_t x2 = a[2].uli, x3 = a[3].uli;
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
x0 = a[4].uli; x1 = a[5].uli;
x2 = a[6].uli; x3 = a[7].uli;
y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
a += 8;
b += 8;
}
for (; words >= 4; words -= 4) {
reg_t x0 = a[0].uli, x1 = a[1].uli;
reg_t x2 = a[2].uli, x3 = a[3].uli;
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
a += 4;
b += 4;
}
/* do remaining words. */
while (words--) {
reg_t x0 = a->uli;
reg_t y0 = *b;
a += 1;
b += 1;
if (x0 != y0)
return do_by_bitfields (x0, y0);
}
/* mop up any remaining bytes. */
return do_bytes (a, b, bytes);
}
#else
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
static int unaligned_words (const reg_t *a, const reg_t *b,
unsigned long words, unsigned long bytes)
{
return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
}
#endif /* UNALIGNED_INSTR_SUPPORT */
#endif /* HW_UNALIGNED_SUPPORT */
/* both pointers are aligned, or first isn't and HW support for unaligned. */
static int aligned_words (const reg_t *a, const reg_t *b,
unsigned long words, unsigned long bytes)
{
#if ENABLE_PREFETCH
/* prefetch pointer aligned to 32 byte boundary */
const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
#endif
for (; words >= 24; words -= 12) {
#if ENABLE_PREFETCH
pref_ptr += 12;
PREFETCH(pref_ptr, 0);
PREFETCH(pref_ptr, 32);
PREFETCH(pref_ptr, 64);
pref_ptr_a += 12;
PREFETCH(pref_ptr_a, 0);
PREFETCH(pref_ptr_a, 32);
PREFETCH(pref_ptr_a, 64);
#endif
reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
a += 12;
b += 12;
}
for (; words >= 4; words -= 4) {
reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
if (x0 != y0)
return do_by_bitfields (x0, y0);
if (x1 != y1)
return do_by_bitfields (x1, y1);
if (x2 != y2)
return do_by_bitfields (x2, y2);
if (x3 != y3)
return do_by_bitfields (x3, y3);
a += 4;
b += 4;
}
/* do remaining words. */
while (words--) {
reg_t x0 = *a;
reg_t y0 = *b;
a += 1;
b += 1;
if (x0 != y0)
return do_by_bitfields (x0, y0);
}
/* mop up any remaining bytes. */
return do_bytes (a, b, bytes);
}
int memcmp (const void *a, const void *b, size_t len)
{
unsigned long bytes, words;
/* shouldn't hit that often. */
if (len < sizeof (reg_t) * 4) {
return do_bytes (a, b, len);
}
/* Align the second pointer to word/dword alignment.
Note that the pointer is only 32-bits for o32/n32 ABIs. For
n32, loads are done as 64-bit while address remains 32-bit. */
bytes = ((unsigned long) b) % sizeof (reg_t);
if (bytes) {
int res;
bytes = sizeof (reg_t) - bytes;
if (bytes > len)
bytes = len;
res = do_bytes (a, b, bytes);
if (res || len == bytes)
return res;
len -= bytes;
a = (const void *) (((unsigned char *) a) + bytes);
b = (const void *) (((unsigned char *) b) + bytes);
}
/* Second pointer now aligned. */
words = len / sizeof (reg_t);
bytes = len % sizeof (reg_t);
#if HW_UNALIGNED_SUPPORT
/* treat possible unaligned first pointer as aligned. */
return aligned_words (a, b, words, bytes);
#else
if (((unsigned long) a) % sizeof (reg_t) == 0) {
return aligned_words (a, b, words, bytes);
}
/* need to use unaligned instructions on first pointer. */
return unaligned_words (a, b, words, bytes);
#endif
}

View file

@ -1,852 +0,0 @@
/*
* Copyright (c) 2012-2015
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifdef __ANDROID__
# include <private/bionic_asm.h>
# define USE_MEMMOVE_FOR_OVERLAP
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
#elif _LIBC
# include <sysdep.h>
# include <regdef.h>
# include <sys/asm.h>
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
#elif _COMPILING_NEWLIB
# include "machine/asm.h"
# include "machine/regdef.h"
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
#else
# include <regdef.h>
# include <sys/asm.h>
#endif
/* Check to see if the MIPS architecture we are compiling for supports
* prefetching.
*/
#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
# ifndef DISABLE_PREFETCH
# define USE_PREFETCH
# endif
#endif
#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
# ifndef DISABLE_DOUBLE
# define USE_DOUBLE
# endif
#endif
#if __mips_isa_rev > 5
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
# undef PREFETCH_STORE_HINT
# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
# endif
# define R6_CODE
#endif
/* Some asm.h files do not have the L macro definition. */
#ifndef L
# if _MIPS_SIM == _ABIO32
# define L(label) $L ## label
# else
# define L(label) .L ## label
# endif
#endif
/* Some asm.h files do not have the PTR_ADDIU macro definition. */
#ifndef PTR_ADDIU
# if _MIPS_SIM == _ABIO32
# define PTR_ADDIU addiu
# else
# define PTR_ADDIU daddiu
# endif
#endif
/* Some asm.h files do not have the PTR_SRA macro definition. */
#ifndef PTR_SRA
# if _MIPS_SIM == _ABIO32
# define PTR_SRA sra
# else
# define PTR_SRA dsra
# endif
#endif
/* New R6 instructions that may not be in asm.h. */
#ifndef PTR_LSA
# if _MIPS_SIM == _ABIO32
# define PTR_LSA lsa
# else
# define PTR_LSA dlsa
# endif
#endif
/*
* Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
* prefetches appears to offer a slight preformance advantage.
*
* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
* or PREFETCH_STORE_STREAMED offers a large performance advantage
* but PREPAREFORSTORE has some special restrictions to consider.
*
* Prefetch with the 'prepare for store' hint does not copy a memory
* location into the cache, it just allocates a cache line and zeros
* it out. This means that if you do not write to the entire cache
* line before writing it out to memory some data will get zero'ed out
* when the cache line is written back to memory and data will be lost.
*
* Also if you are using this memcpy to copy overlapping buffers it may
* not behave correctly when using the 'prepare for store' hint. If you
* use the 'prepare for store' prefetch on a memory area that is in the
* memcpy source (as well as the memcpy destination), then you will get
* some data zero'ed out before you have a chance to read it and data will
* be lost.
*
* If you are going to use this memcpy routine with the 'prepare for store'
* prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
* the problem of running memcpy on overlapping buffers.
*
* There are ifdef'ed sections of this memcpy to make sure that it does not
* do prefetches on cache lines that are not going to be completely written.
* This code is only needed and only used when PREFETCH_STORE_HINT is set to
* PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
* 32 bytes and if the cache line is larger it will not work correctly.
*/
#ifdef USE_PREFETCH
# define PREFETCH_HINT_LOAD 0
# define PREFETCH_HINT_STORE 1
# define PREFETCH_HINT_LOAD_STREAMED 4
# define PREFETCH_HINT_STORE_STREAMED 5
# define PREFETCH_HINT_LOAD_RETAINED 6
# define PREFETCH_HINT_STORE_RETAINED 7
# define PREFETCH_HINT_WRITEBACK_INVAL 25
# define PREFETCH_HINT_PREPAREFORSTORE 30
/*
* If we have not picked out what hints to use at this point use the
* standard load and store prefetch hints.
*/
# ifndef PREFETCH_STORE_HINT
# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
# endif
# ifndef PREFETCH_LOAD_HINT
# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
# endif
/*
* We double everything when USE_DOUBLE is true so we do 2 prefetches to
* get 64 bytes in that case. The assumption is that each individual
* prefetch brings in 32 bytes.
*/
# ifdef USE_DOUBLE
# define PREFETCH_CHUNK 64
# define PREFETCH_FOR_LOAD(chunk, reg) \
pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
# define PREFETCH_FOR_STORE(chunk, reg) \
pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
# else
# define PREFETCH_CHUNK 32
# define PREFETCH_FOR_LOAD(chunk, reg) \
pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
# define PREFETCH_FOR_STORE(chunk, reg) \
pref PREFETCH_STORE_HINT, (chunk)*32(reg)
# endif
/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
* than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
* of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
* hint is used, the code will not work correctly. If PREPAREFORSTORE is not
* used then MAX_PREFETCH_SIZE does not matter. */
# define MAX_PREFETCH_SIZE 128
/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
* than 5 on a STORE prefetch and that a single prefetch can never be larger
* than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
* we actually do two prefetches in that case, one 32 bytes after the other. */
# ifdef USE_DOUBLE
# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
# else
# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
# endif
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
&& ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
/* We cannot handle this because the initial prefetches may fetch bytes that
* are before the buffer being copied. We start copies with an offset
* of 4 so avoid this situation when using PREPAREFORSTORE. */
#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
# endif
#else /* USE_PREFETCH not defined */
# define PREFETCH_FOR_LOAD(offset, reg)
# define PREFETCH_FOR_STORE(offset, reg)
#endif
/* Allow the routine to be named something else if desired. */
#ifndef MEMCPY_NAME
# define MEMCPY_NAME memcpy
#endif
/* We use these 32/64 bit registers as temporaries to do the copying. */
#define REG0 t0
#define REG1 t1
#define REG2 t2
#define REG3 t3
#if defined(_MIPS_SIM) && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
# define REG4 t4
# define REG5 t5
# define REG6 t6
# define REG7 t7
#else
# define REG4 ta0
# define REG5 ta1
# define REG6 ta2
# define REG7 ta3
#endif
/* We load/store 64 bits at a time when USE_DOUBLE is true.
* The C_ prefix stands for CHUNK and is used to avoid macro name
* conflicts with system header files. */
#ifdef USE_DOUBLE
# define C_ST sd
# define C_LD ld
# if __MIPSEB
# define C_LDHI ldl /* high part is left in big-endian */
# define C_STHI sdl /* high part is left in big-endian */
# define C_LDLO ldr /* low part is right in big-endian */
# define C_STLO sdr /* low part is right in big-endian */
# else
# define C_LDHI ldr /* high part is right in little-endian */
# define C_STHI sdr /* high part is right in little-endian */
# define C_LDLO ldl /* low part is left in little-endian */
# define C_STLO sdl /* low part is left in little-endian */
# endif
# define C_ALIGN dalign /* r6 align instruction */
#else
# define C_ST sw
# define C_LD lw
# if __MIPSEB
# define C_LDHI lwl /* high part is left in big-endian */
# define C_STHI swl /* high part is left in big-endian */
# define C_LDLO lwr /* low part is right in big-endian */
# define C_STLO swr /* low part is right in big-endian */
# else
# define C_LDHI lwr /* high part is right in little-endian */
# define C_STHI swr /* high part is right in little-endian */
# define C_LDLO lwl /* low part is left in little-endian */
# define C_STLO swl /* low part is left in little-endian */
# endif
# define C_ALIGN align /* r6 align instruction */
#endif
/* Bookkeeping values for 32 vs. 64 bit mode. */
#ifdef USE_DOUBLE
# define NSIZE 8
# define NSIZEMASK 0x3f
# define NSIZEDMASK 0x7f
#else
# define NSIZE 4
# define NSIZEMASK 0x1f
# define NSIZEDMASK 0x3f
#endif
#define UNIT(unit) ((unit)*NSIZE)
#define UNITM1(unit) (((unit)*NSIZE)-1)
#ifdef __ANDROID__
LEAF(MEMCPY_NAME, 0)
#else
LEAF(MEMCPY_NAME)
#endif
.set nomips16
.set noreorder
/*
* Below we handle the case where memcpy is called with overlapping src and dst.
* Although memcpy is not required to handle this case, some parts of Android
* like Skia rely on such usage. We call memmove to handle such cases.
*/
#ifdef USE_MEMMOVE_FOR_OVERLAP
PTR_SUBU t0,a0,a1
PTR_SRA t2,t0,31
xor t1,t0,t2
PTR_SUBU t0,t1,t2
sltu t2,t0,a2
beq t2,zero,L(memcpy)
nop
#if defined(__LP64__)
daddiu sp,sp,-8
SETUP_GP64(0,MEMCPY_NAME)
LA t9,memmove
RESTORE_GP64
jr t9
daddiu sp,sp,8
#else
LA t9,memmove
jr t9
nop
#endif
L(memcpy):
#endif
/*
* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
* size, copy dst pointer to v0 for the return value.
*/
slti t2,a2,(2 * NSIZE)
bne t2,zero,L(lastb)
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
move v0,zero
#else
move v0,a0
#endif
#ifndef R6_CODE
/*
* If src and dst have different alignments, go to L(unaligned), if they
* have the same alignment (but are not actually aligned) do a partial
* load/store to make them aligned. If they are both already aligned
* we can start copying at L(aligned).
*/
xor t8,a1,a0
andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */
bne t8,zero,L(unaligned)
PTR_SUBU a3, zero, a0
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
beq a3,zero,L(aligned) /* if a3=0, it is already aligned */
PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
C_LDHI t8,0(a1)
PTR_ADDU a1,a1,a3
C_STHI t8,0(a0)
PTR_ADDU a0,a0,a3
#else /* R6_CODE */
/*
* Align the destination and hope that the source gets aligned too. If it
* doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
* align instruction.
*/
andi t8,a0,7
lapc t9,L(atable)
PTR_LSA t9,t8,t9,2
jrc t9
L(atable):
bc L(lb0)
bc L(lb7)
bc L(lb6)
bc L(lb5)
bc L(lb4)
bc L(lb3)
bc L(lb2)
bc L(lb1)
L(lb7):
lb a3, 6(a1)
sb a3, 6(a0)
L(lb6):
lb a3, 5(a1)
sb a3, 5(a0)
L(lb5):
lb a3, 4(a1)
sb a3, 4(a0)
L(lb4):
lb a3, 3(a1)
sb a3, 3(a0)
L(lb3):
lb a3, 2(a1)
sb a3, 2(a0)
L(lb2):
lb a3, 1(a1)
sb a3, 1(a0)
L(lb1):
lb a3, 0(a1)
sb a3, 0(a0)
li t9,8
subu t8,t9,t8
PTR_SUBU a2,a2,t8
PTR_ADDU a0,a0,t8
PTR_ADDU a1,a1,t8
L(lb0):
andi t8,a1,(NSIZE-1)
lapc t9,L(jtable)
PTR_LSA t9,t8,t9,2
jrc t9
L(jtable):
bc L(aligned)
bc L(r6_unaligned1)
bc L(r6_unaligned2)
bc L(r6_unaligned3)
# ifdef USE_DOUBLE
bc L(r6_unaligned4)
bc L(r6_unaligned5)
bc L(r6_unaligned6)
bc L(r6_unaligned7)
# endif
#endif /* R6_CODE */
L(aligned):
/*
* Now dst/src are both aligned to (word or double word) aligned addresses
* Set a2 to count how many bytes we have to copy after all the 64/128 byte
* chunks are copied and a3 to the dst pointer after all the 64/128 byte
* chunks have been copied. We will loop, incrementing a0 and a1 until a0
* equals a3.
*/
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
/* When in the loop we may prefetch with the 'prepare to store' hint,
* in this case the a0+x should not be past the "t0-32" address. This
* means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
* for x=64 the last "safe" a0 address is "t0-96" In the current version we
* will use "prefetch hint,128(a0)", so "t0-160" is the limit.
*/
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
#endif
PREFETCH_FOR_LOAD (0, a1)
PREFETCH_FOR_LOAD (1, a1)
PREFETCH_FOR_LOAD (2, a1)
PREFETCH_FOR_LOAD (3, a1)
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
PREFETCH_FOR_STORE (1, a0)
PREFETCH_FOR_STORE (2, a0)
PREFETCH_FOR_STORE (3, a0)
#endif
#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
sltu v1,t9,a0
bgtz v1,L(skip_set)
nop
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
L(skip_set):
# else
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
# endif
#endif
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
&& (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
# ifdef USE_DOUBLE
PTR_ADDIU v0,v0,32
# endif
#endif
L(loop16w):
C_LD t0,UNIT(0)(a1)
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
bgtz v1,L(skip_pref)
#endif
C_LD t1,UNIT(1)(a1)
#ifndef R6_CODE
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
#else
PREFETCH_FOR_STORE (2, a0)
#endif
#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
# ifdef USE_DOUBLE
PTR_ADDIU v0,v0,32
# endif
#endif
L(skip_pref):
C_LD REG2,UNIT(2)(a1)
C_LD REG3,UNIT(3)(a1)
C_LD REG4,UNIT(4)(a1)
C_LD REG5,UNIT(5)(a1)
C_LD REG6,UNIT(6)(a1)
C_LD REG7,UNIT(7)(a1)
#ifndef R6_CODE
PREFETCH_FOR_LOAD (4, a1)
#else
PREFETCH_FOR_LOAD (3, a1)
#endif
C_ST t0,UNIT(0)(a0)
C_ST t1,UNIT(1)(a0)
C_ST REG2,UNIT(2)(a0)
C_ST REG3,UNIT(3)(a0)
C_ST REG4,UNIT(4)(a0)
C_ST REG5,UNIT(5)(a0)
C_ST REG6,UNIT(6)(a0)
C_ST REG7,UNIT(7)(a0)
C_LD t0,UNIT(8)(a1)
C_LD t1,UNIT(9)(a1)
C_LD REG2,UNIT(10)(a1)
C_LD REG3,UNIT(11)(a1)
C_LD REG4,UNIT(12)(a1)
C_LD REG5,UNIT(13)(a1)
C_LD REG6,UNIT(14)(a1)
C_LD REG7,UNIT(15)(a1)
#ifndef R6_CODE
PREFETCH_FOR_LOAD (5, a1)
#endif
C_ST t0,UNIT(8)(a0)
C_ST t1,UNIT(9)(a0)
C_ST REG2,UNIT(10)(a0)
C_ST REG3,UNIT(11)(a0)
C_ST REG4,UNIT(12)(a0)
C_ST REG5,UNIT(13)(a0)
C_ST REG6,UNIT(14)(a0)
C_ST REG7,UNIT(15)(a0)
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
bne a0,a3,L(loop16w)
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
move a2,t8
/* Here we have src and dest word-aligned but less than 64-bytes or
* 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
* is one. Otherwise jump down to L(chk1w) to handle the tail end of
* the copy.
*/
L(chkw):
PREFETCH_FOR_LOAD (0, a1)
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
/* The t8 is the reminder count past 32-bytes */
beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */
nop
C_LD t0,UNIT(0)(a1)
C_LD t1,UNIT(1)(a1)
C_LD REG2,UNIT(2)(a1)
C_LD REG3,UNIT(3)(a1)
C_LD REG4,UNIT(4)(a1)
C_LD REG5,UNIT(5)(a1)
C_LD REG6,UNIT(6)(a1)
C_LD REG7,UNIT(7)(a1)
PTR_ADDIU a1,a1,UNIT(8)
C_ST t0,UNIT(0)(a0)
C_ST t1,UNIT(1)(a0)
C_ST REG2,UNIT(2)(a0)
C_ST REG3,UNIT(3)(a0)
C_ST REG4,UNIT(4)(a0)
C_ST REG5,UNIT(5)(a0)
C_ST REG6,UNIT(6)(a0)
C_ST REG7,UNIT(7)(a0)
PTR_ADDIU a0,a0,UNIT(8)
/*
* Here we have less than 32(64) bytes to copy. Set up for a loop to
* copy one word (or double word) at a time. Set a2 to count how many
* bytes we have to copy after all the word (or double word) chunks are
* copied and a3 to the dst pointer after all the (d)word chunks have
* been copied. We will loop, incrementing a0 and a1 until a0 equals a3.
*/
L(chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
beq a2,t8,L(lastb)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
/* copying in words (4-byte or 8-byte chunks) */
L(wordCopy_loop):
C_LD REG3,UNIT(0)(a1)
PTR_ADDIU a0,a0,UNIT(1)
PTR_ADDIU a1,a1,UNIT(1)
bne a0,a3,L(wordCopy_loop)
C_ST REG3,UNIT(-1)(a0)
/* Copy the last 8 (or 16) bytes */
L(lastb):
blez a2,L(leave)
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
L(lastbloop):
lb v1,0(a1)
PTR_ADDIU a0,a0,1
PTR_ADDIU a1,a1,1
bne a0,a3,L(lastbloop)
sb v1,-1(a0)
L(leave):
j ra
nop
#ifndef R6_CODE
/*
* UNALIGNED case, got here with a3 = "negu a0"
* This code is nearly identical to the aligned code above
* but only the destination (not the source) gets aligned
* so we need to do partial loads of the source followed
* by normal stores to the destination (once we have aligned
* the destination).
*/
L(unaligned):
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */
PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
C_LDHI v1,UNIT(0)(a1)
C_LDLO v1,UNITM1(1)(a1)
PTR_ADDU a1,a1,a3
C_STHI v1,UNIT(0)(a0)
PTR_ADDU a0,a0,a3
/*
* Now the destination (but not the source) is aligned
* Set a2 to count how many bytes we have to copy after all the 64/128 byte
* chunks are copied and a3 to the dst pointer after all the 64/128 byte
* chunks have been copied. We will loop, incrementing a0 and a1 until a0
* equals a3.
*/
L(ua_chk16w):
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
# endif
PREFETCH_FOR_LOAD (0, a1)
PREFETCH_FOR_LOAD (1, a1)
PREFETCH_FOR_LOAD (2, a1)
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
PREFETCH_FOR_STORE (1, a0)
PREFETCH_FOR_STORE (2, a0)
PREFETCH_FOR_STORE (3, a0)
# endif
# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
bgtz v1,L(ua_skip_set)
nop
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
L(ua_skip_set):
# else
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
# endif
# endif
L(ua_loop16w):
PREFETCH_FOR_LOAD (3, a1)
C_LDHI t0,UNIT(0)(a1)
C_LDHI t1,UNIT(1)(a1)
C_LDHI REG2,UNIT(2)(a1)
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
bgtz v1,L(ua_skip_pref)
# endif
C_LDHI REG3,UNIT(3)(a1)
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
L(ua_skip_pref):
C_LDHI REG4,UNIT(4)(a1)
C_LDHI REG5,UNIT(5)(a1)
C_LDHI REG6,UNIT(6)(a1)
C_LDHI REG7,UNIT(7)(a1)
C_LDLO t0,UNITM1(1)(a1)
C_LDLO t1,UNITM1(2)(a1)
C_LDLO REG2,UNITM1(3)(a1)
C_LDLO REG3,UNITM1(4)(a1)
C_LDLO REG4,UNITM1(5)(a1)
C_LDLO REG5,UNITM1(6)(a1)
C_LDLO REG6,UNITM1(7)(a1)
C_LDLO REG7,UNITM1(8)(a1)
PREFETCH_FOR_LOAD (4, a1)
C_ST t0,UNIT(0)(a0)
C_ST t1,UNIT(1)(a0)
C_ST REG2,UNIT(2)(a0)
C_ST REG3,UNIT(3)(a0)
C_ST REG4,UNIT(4)(a0)
C_ST REG5,UNIT(5)(a0)
C_ST REG6,UNIT(6)(a0)
C_ST REG7,UNIT(7)(a0)
C_LDHI t0,UNIT(8)(a1)
C_LDHI t1,UNIT(9)(a1)
C_LDHI REG2,UNIT(10)(a1)
C_LDHI REG3,UNIT(11)(a1)
C_LDHI REG4,UNIT(12)(a1)
C_LDHI REG5,UNIT(13)(a1)
C_LDHI REG6,UNIT(14)(a1)
C_LDHI REG7,UNIT(15)(a1)
C_LDLO t0,UNITM1(9)(a1)
C_LDLO t1,UNITM1(10)(a1)
C_LDLO REG2,UNITM1(11)(a1)
C_LDLO REG3,UNITM1(12)(a1)
C_LDLO REG4,UNITM1(13)(a1)
C_LDLO REG5,UNITM1(14)(a1)
C_LDLO REG6,UNITM1(15)(a1)
C_LDLO REG7,UNITM1(16)(a1)
PREFETCH_FOR_LOAD (5, a1)
C_ST t0,UNIT(8)(a0)
C_ST t1,UNIT(9)(a0)
C_ST REG2,UNIT(10)(a0)
C_ST REG3,UNIT(11)(a0)
C_ST REG4,UNIT(12)(a0)
C_ST REG5,UNIT(13)(a0)
C_ST REG6,UNIT(14)(a0)
C_ST REG7,UNIT(15)(a0)
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
bne a0,a3,L(ua_loop16w)
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
move a2,t8
/* Here we have src and dest word-aligned but less than 64-bytes or
* 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
* is one. Otherwise jump down to L(ua_chk1w) to handle the tail end of
* the copy. */
L(ua_chkw):
PREFETCH_FOR_LOAD (0, a1)
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
/* t8 is the reminder count past 32-bytes */
beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
nop
C_LDHI t0,UNIT(0)(a1)
C_LDHI t1,UNIT(1)(a1)
C_LDHI REG2,UNIT(2)(a1)
C_LDHI REG3,UNIT(3)(a1)
C_LDHI REG4,UNIT(4)(a1)
C_LDHI REG5,UNIT(5)(a1)
C_LDHI REG6,UNIT(6)(a1)
C_LDHI REG7,UNIT(7)(a1)
C_LDLO t0,UNITM1(1)(a1)
C_LDLO t1,UNITM1(2)(a1)
C_LDLO REG2,UNITM1(3)(a1)
C_LDLO REG3,UNITM1(4)(a1)
C_LDLO REG4,UNITM1(5)(a1)
C_LDLO REG5,UNITM1(6)(a1)
C_LDLO REG6,UNITM1(7)(a1)
C_LDLO REG7,UNITM1(8)(a1)
PTR_ADDIU a1,a1,UNIT(8)
C_ST t0,UNIT(0)(a0)
C_ST t1,UNIT(1)(a0)
C_ST REG2,UNIT(2)(a0)
C_ST REG3,UNIT(3)(a0)
C_ST REG4,UNIT(4)(a0)
C_ST REG5,UNIT(5)(a0)
C_ST REG6,UNIT(6)(a0)
C_ST REG7,UNIT(7)(a0)
PTR_ADDIU a0,a0,UNIT(8)
/*
* Here we have less than 32(64) bytes to copy. Set up for a loop to
* copy one word (or double word) at a time.
*/
L(ua_chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
beq a2,t8,L(ua_smallCopy)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
/* copying in words (4-byte or 8-byte chunks) */
L(ua_wordCopy_loop):
C_LDHI v1,UNIT(0)(a1)
C_LDLO v1,UNITM1(1)(a1)
PTR_ADDIU a0,a0,UNIT(1)
PTR_ADDIU a1,a1,UNIT(1)
bne a0,a3,L(ua_wordCopy_loop)
C_ST v1,UNIT(-1)(a0)
/* Copy the last 8 (or 16) bytes */
L(ua_smallCopy):
beqz a2,L(leave)
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
L(ua_smallCopy_loop):
lb v1,0(a1)
PTR_ADDIU a0,a0,1
PTR_ADDIU a1,a1,1
bne a0,a3,L(ua_smallCopy_loop)
sb v1,-1(a0)
j ra
nop
#else /* R6_CODE */
# if __MIPSEB
# define SWAP_REGS(X,Y) X, Y
# define ALIGN_OFFSET(N) (N)
# else
# define SWAP_REGS(X,Y) Y, X
# define ALIGN_OFFSET(N) (NSIZE-N)
# endif
# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
/* (d)word chunks. */ \
move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
/* after word loop is finished. */ \
PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \
PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \
C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \
L(r6_ua_wordcopy##BYTEOFFSET): \
C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \
C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \
PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
move t0, t1; /* Move second part of source to first. */ \
bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
C_ST REG3, UNIT(-1)(a0); \
j L(lastb); \
nop
/* We are generating R6 code, the destination is 4 byte aligned and
the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
alignment of the source. */
L(r6_unaligned1):
R6_UNALIGNED_WORD_COPY(1)
L(r6_unaligned2):
R6_UNALIGNED_WORD_COPY(2)
L(r6_unaligned3):
R6_UNALIGNED_WORD_COPY(3)
# ifdef USE_DOUBLE
L(r6_unaligned4):
R6_UNALIGNED_WORD_COPY(4)
L(r6_unaligned5):
R6_UNALIGNED_WORD_COPY(5)
L(r6_unaligned6):
R6_UNALIGNED_WORD_COPY(6)
L(r6_unaligned7):
R6_UNALIGNED_WORD_COPY(7)
# endif
#endif /* R6_CODE */
.set at
.set reorder
END(MEMCPY_NAME)
#ifndef __ANDROID__
# ifdef _LIBC
libc_hidden_builtin_def (MEMCPY_NAME)
# endif
#endif

View file

@ -0,0 +1,328 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#if !defined(UNALIGNED_INSTR_SUPPORT)
/* does target have unaligned lw/ld/ualw/uald instructions? */
#define UNALIGNED_INSTR_SUPPORT 0
#if __mips_isa_rev < 6 && !__mips1
#undef UNALIGNED_INSTR_SUPPORT
#define UNALIGNED_INSTR_SUPPORT 1
#endif
#endif
#if !defined(HW_UNALIGNED_SUPPORT)
/* Does target have hardware support for unaligned accesses? */
#define HW_UNALIGNED_SUPPORT 0
#if __mips_isa_rev >= 6
#undef HW_UNALIGNED_SUPPORT
#define HW_UNALIGNED_SUPPORT 1
#endif
#endif
#define ENABLE_PREFETCH 1
#if ENABLE_PREFETCH
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
#else
#define PREFETCH(addr)
#endif
#if _MIPS_SIM == _ABIO32
typedef unsigned long reg_t;
typedef struct
{
reg_t B0:8, B1:8, B2:8, B3:8;
} bits_t;
#else
typedef unsigned long long reg_t;
typedef struct
{
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#endif
typedef union
{
reg_t v;
bits_t b;
} bitfields_t;
#define DO_BYTE(a, i) \
a[i] = bw.b.B##i; \
len--; \
if(!len) return ret; \
/* This code is called when aligning a pointer, there are remaining bytes
after doing word compares, or architecture does not have some form
of unaligned support. */
static inline void * __attribute__ ((always_inline))
do_bytes (void *a, const void *b, unsigned long len, void *ret)
{
unsigned char *x = (unsigned char *) a;
unsigned char *y = (unsigned char *) b;
unsigned long i;
/* 'len' might be zero here, so preloading the first two values
before the loop may access unallocated memory. */
for (i = 0; i < len; i++) {
*x = *y;
x++;
y++;
}
return ret;
}
/* This code is called to copy only remaining bytes within word or doubleword */
static inline void * __attribute__ ((always_inline))
do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
{
unsigned char *x = (unsigned char *) a;
if(len > 0) {
bitfields_t bw;
bw.v = *((reg_t*) b);
#if __mips64
DO_BYTE(x, 0);
DO_BYTE(x, 1);
DO_BYTE(x, 2);
DO_BYTE(x, 3);
DO_BYTE(x, 4);
DO_BYTE(x, 5);
DO_BYTE(x, 6);
DO_BYTE(x, 7);
#else
DO_BYTE(x, 0);
DO_BYTE(x, 1);
DO_BYTE(x, 2);
DO_BYTE(x, 3);
#endif
}
return ret;
}
#if !HW_UNALIGNED_SUPPORT
#if UNALIGNED_INSTR_SUPPORT
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
the compiler to treat the pointer access as unaligned. */
struct ulw
{
reg_t uli;
} __attribute__ ((packed));
/* first pointer is not aligned while second pointer is. */
static void *
unaligned_words (struct ulw *a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b + 8);
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
a[0].uli = y0;
a[1].uli = y1;
a[2].uli = y2;
a[3].uli = y3;
a[4].uli = y4;
a[5].uli = y5;
a[6].uli = y6;
a[7].uli = y7;
a += 8;
b += 8;
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b + 4);
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
a[0].uli = y0;
a[1].uli = y1;
a[2].uli = y2;
a[3].uli = y3;
a += 4;
b += 4;
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
a->uli = *b;
a += 1;
b += 1;
}
/* mop up any remaining bytes. */
return do_bytes_remaining (a, b, bytes, ret);
}
#else
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
static void *
unaligned_words (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
unsigned long i;
unsigned char *x = (unsigned char *) a;
for (i = 0; i < words; i++) {
bitfields_t bw;
bw.v = *((reg_t*) b);
x = (unsigned char *) a;
#if __mips64
x[0] = bw.b.B0;
x[1] = bw.b.B1;
x[2] = bw.b.B2;
x[3] = bw.b.B3;
x[4] = bw.b.B4;
x[5] = bw.b.B5;
x[6] = bw.b.B6;
x[7] = bw.b.B7;
#else
x[0] = bw.b.B0;
x[1] = bw.b.B1;
x[2] = bw.b.B2;
x[3] = bw.b.B3;
#endif
a += 1;
b += 1;
}
/* mop up any remaining bytes */
return do_bytes_remaining (a, b, bytes, ret);
}
#endif /* UNALIGNED_INSTR_SUPPORT */
#endif /* HW_UNALIGNED_SUPPORT */
/* both pointers are aligned, or first isn't and HW support for unaligned. */
static void *
aligned_words (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b + 8);
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
a[0] = x0;
a[1] = x1;
a[2] = x2;
a[3] = x3;
a[4] = x4;
a[5] = x5;
a[6] = x6;
a[7] = x7;
a += 8;
b += 8;
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b + 4);
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
a[0] = x0;
a[1] = x1;
a[2] = x2;
a[3] = x3;
a += 4;
b += 4;
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
*a = *b;
a += 1;
b += 1;
}
/* mop up any remaining bytes. */
return do_bytes_remaining (a, b, bytes, ret);
}
void *
memcpy (void *a, const void *b, size_t len) __overloadable
{
unsigned long bytes, words;
void *ret = a;
/* shouldn't hit that often. */
if (len < sizeof (reg_t) * 4) {
return do_bytes (a, b, len, a);
}
/* Align the second pointer to word/dword alignment.
Note that the pointer is only 32-bits for o32/n32 ABIs. For
n32, loads are done as 64-bit while address remains 32-bit. */
bytes = ((unsigned long) b) % sizeof (reg_t);
if (bytes) {
bytes = sizeof (reg_t) - bytes;
if (bytes > len)
bytes = len;
do_bytes (a, b, bytes, ret);
if (len == bytes)
return ret;
len -= bytes;
a = (void *) (((unsigned char *) a) + bytes);
b = (const void *) (((unsigned char *) b) + bytes);
}
/* Second pointer now aligned. */
words = len / sizeof (reg_t);
bytes = len % sizeof (reg_t);
#if HW_UNALIGNED_SUPPORT
/* treat possible unaligned first pointer as aligned. */
return aligned_words (a, b, words, bytes, ret);
#else
if (((unsigned long) a) % sizeof (reg_t) == 0) {
return aligned_words (a, b, words, bytes, ret);
}
/* need to use unaligned instructions on first pointer. */
return unaligned_words (a, b, words, bytes, ret);
#endif
}

View file

@ -0,0 +1,468 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#if !defined(UNALIGNED_INSTR_SUPPORT)
/* does target have unaligned lw/ld/ualw/uald instructions? */
#define UNALIGNED_INSTR_SUPPORT 0
#if __mips_isa_rev < 6 && !__mips1
#undef UNALIGNED_INSTR_SUPPORT
#define UNALIGNED_INSTR_SUPPORT 1
#endif
#endif
#if !defined(HW_UNALIGNED_SUPPORT)
/* Does target have hardware support for unaligned accesses? */
#define HW_UNALIGNED_SUPPORT 0
#if __mips_isa_rev >= 6
#undef HW_UNALIGNED_SUPPORT
#define HW_UNALIGNED_SUPPORT 1
#endif
#endif
#define ENABLE_PREFETCH 1
#if ENABLE_PREFETCH
#define PREFETCH(addr) __builtin_prefetch (addr, 0, 1);
#else
#define PREFETCH(addr)
#endif
#if _MIPS_SIM == _ABIO32
typedef unsigned long reg_t;
typedef struct
{
reg_t B0:8, B1:8, B2:8, B3:8;
} bits_t;
#else
typedef unsigned long long reg_t;
typedef struct
{
reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#endif
typedef union
{
reg_t v;
bits_t b;
} bitfields_t;
#define DO_BYTE(a, i) \
a[i] = bw.b.B##i; \
len--; \
if(!len) return ret; \
/* This code is called when aligning a pointer, there are remaining bytes
after doing word compares, or architecture does not have some form
of unaligned support. */
static inline void * __attribute__ ((always_inline))
do_bytes (void *a, const void *b, unsigned long len, void *ret)
{
unsigned char *x = (unsigned char *) a;
unsigned char *y = (unsigned char *) b;
unsigned long i;
/* 'len' might be zero here, so preloading the first two values
before the loop may access unallocated memory. */
for (i = 0; i < len; i++)
{
*x = *y;
x++;
y++;
}
return ret;
}
static inline void * __attribute__ ((always_inline))
do_bytes_backward (void *a, const void *b, unsigned long len, void *ret)
{
unsigned char *x = (unsigned char *) a;
unsigned char *y = (unsigned char *) b;
unsigned long i;
/* 'len' might be zero here, so preloading the first two values
before the loop may access unallocated memory. */
for (i = 0; i < len; i++) {
*--x = *--y;
}
return ret;
}
static inline void * __attribute__ ((always_inline))
do_bytes_aligned (void *a, const void *b, unsigned long len, void *ret)
{
unsigned char *x = (unsigned char *) a;
if(len > 0) {
bitfields_t bw;
bw.v = *((reg_t*) b);
#if __mips64
DO_BYTE(x, 0);
DO_BYTE(x, 1);
DO_BYTE(x, 2);
DO_BYTE(x, 3);
DO_BYTE(x, 4);
DO_BYTE(x, 5);
DO_BYTE(x, 6);
DO_BYTE(x, 7);
#else
DO_BYTE(x, 0);
DO_BYTE(x, 1);
DO_BYTE(x, 2);
DO_BYTE(x, 3);
#endif
}
return ret;
}
#if !HW_UNALIGNED_SUPPORT
#if UNALIGNED_INSTR_SUPPORT
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
the compiler to treat the pointer access as unaligned. */
struct ulw
{
reg_t uli;
} __attribute__ ((packed));
#define STORE_UNALIGNED_8(a, b) \
{ \
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7]; \
a[0].uli = y0; \
a[1].uli = y1; \
a[2].uli = y2; \
a[3].uli = y3; \
a[4].uli = y4; \
a[5].uli = y5; \
a[6].uli = y6; \
a[7].uli = y7; \
}
#define STORE_UNALIGNED_4(a, b) \
{ \
reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; \
a[0].uli = y0; \
a[1].uli = y1; \
a[2].uli = y2; \
a[3].uli = y3; \
}
/* first pointer is not aligned while second pointer is. */
static void *
unaligned_words_forward (struct ulw *a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b + 8);
STORE_UNALIGNED_8(a, b);
a += 8;
b += 8;
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b + 4);
STORE_UNALIGNED_4(a, b);
a += 4;
b += 4;
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
a->uli = *b;
a += 1;
b += 1;
}
/* mop up any remaining bytes. */
return do_bytes_aligned (a, b, bytes, ret);
}
static void *
unaligned_words_backward (struct ulw *a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b - 16);
a -= 8;
b -= 8;
STORE_UNALIGNED_8(a, b);
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b - 8);
a -= 4;
b -= 4;
STORE_UNALIGNED_4(a, b);
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
a -= 1;
b -= 1;
a->uli = *b;
}
/* mop up any remaining bytes. */
return do_bytes_backward (a, b, bytes, ret);
}
#else
/* no HW support or unaligned lw/ld/ualw/uald instructions. */
static void *
unaligned_words_forward (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
return do_bytes_aligned (a, b, (sizeof (reg_t) * words) + bytes, ret);
}
static void *
unaligned_words_backward (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
return do_bytes_backward (a, b, (sizeof (reg_t) * words) + bytes, ret);
}
#endif /* UNALIGNED_INSTR_SUPPORT */
#endif /* HW_UNALIGNED_SUPPORT */
/* both pointers are aligned, or first isn't and HW support for unaligned. */
#define STORE_ALIGNED_8(a, b) \
{ \
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7]; \
a[0] = x0; \
a[1] = x1; \
a[2] = x2; \
a[3] = x3; \
a[4] = x4; \
a[5] = x5; \
a[6] = x6; \
a[7] = x7; \
}
#define STORE_ALIGNED_4(a, b) \
{ \
reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; \
a[0] = x0; \
a[1] = x1; \
a[2] = x2; \
a[3] = x3; \
}
static void *
aligned_words_forward (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b + 8);
STORE_ALIGNED_8(a, b);
a += 8;
b += 8;
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b + 4);
STORE_ALIGNED_4(a, b);
a += 4;
b += 4;
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
*a = *b;
a += 1;
b += 1;
}
/* mop up any remaining bytes. */
return do_bytes_aligned (a, b, bytes, ret);
}
static void *
aligned_words_backward (reg_t * a, const reg_t * b,
unsigned long words, unsigned long bytes, void *ret)
{
#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
unsigned long i, words_by_8, words_by_1;
words_by_1 = words % 8;
words_by_8 = words >> 3;
for (; words_by_8 > 0; words_by_8--) {
if(words_by_8 != 1)
PREFETCH (b - 16);
a -= 8;
b -= 8;
STORE_ALIGNED_8(a, b);
}
#else
unsigned long i, words_by_4, words_by_1;
words_by_1 = words % 4;
words_by_4 = words >> 2;
for (; words_by_4 > 0; words_by_4--) {
if(words_by_4 != 1)
PREFETCH (b - 8);
a -= 4;
b -= 4;
STORE_ALIGNED_4(a, b);
}
#endif
/* do remaining words. */
for (i = 0; i < words_by_1; i++) {
a -= 1;
b -= 1;
*a = *b;
}
/* mop up any remaining bytes. */
return do_bytes_backward (a, b, bytes, ret);
}
void *
memmove (void *dst0, const void *src0, size_t length) __overloadable
{
unsigned long bytes, words;
void *ret = dst0;
if (length == 0 || dst0 == src0) /* nothing to do */
return dst0;
if ((unsigned long)dst0 < (unsigned long)src0) {
/* Copy forwards. */
/* This shouldn't hit that often. */
if (length < sizeof (reg_t) * 4) {
return do_bytes (dst0, src0, length, ret);
}
/* Align the second pointer to word/dword alignment.
Note that the pointer is only 32-bits for o32/n32 ABIs. For
n32, loads are done as 64-bit while address remains 32-bit. */
bytes = ((unsigned long) src0) % sizeof (reg_t);
if (bytes) {
bytes = sizeof (reg_t) - bytes;
if (bytes > length)
bytes = length;
do_bytes (dst0, src0, bytes, ret);
if (length == bytes)
return ret;
length -= bytes;
dst0 = (void *) (((unsigned char *) dst0) + bytes);
src0 = (const void *) (((unsigned char *) src0) + bytes);
}
/* Second pointer now aligned. */
words = length / sizeof (reg_t);
bytes = length % sizeof (reg_t);
#if HW_UNALIGNED_SUPPORT
/* treat possible unaligned first pointer as aligned. */
return aligned_words_forward (dst0, src0, words, bytes, ret);
#else
if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
return aligned_words_forward (dst0, src0, words, bytes, ret);
}
/* need to use unaligned instructions on first pointer. */
return unaligned_words_forward (dst0, src0, words, bytes, ret);
#endif
} else {
/* Copy backwards. */
dst0 = (void *) (((unsigned char *) dst0) + length);
src0 = (const void *) (((unsigned char *) src0) + length);
/* This shouldn't hit that often. */
if (length < sizeof (reg_t) * 4) {
return do_bytes_backward (dst0, src0, length, ret);
}
/* Align the second pointer to word/dword alignment.
Note that the pointer is only 32-bits for o32/n32 ABIs. For
n32, loads are done as 64-bit while address remains 32-bit. */
bytes = ((unsigned long) src0) % sizeof (reg_t);
if (bytes) {
if (bytes > length)
bytes = length;
do_bytes_backward (dst0, src0, bytes, ret);
if (length == bytes)
return ret;
length -= bytes;
dst0 = (void *) (((unsigned char *) dst0) - bytes);
src0 = (const void *) (((unsigned char *) src0) - bytes);
}
words = length / sizeof (reg_t);
bytes = length % sizeof (reg_t);
#if HW_UNALIGNED_SUPPORT
/* treat possible unaligned first pointer as aligned. */
return aligned_words_backward ((void *)dst0, (void *)src0, words, bytes, ret);
#else
if (((unsigned long) dst0) % sizeof (reg_t) == 0) {
return aligned_words_backward (dst0, src0, words, bytes, ret);
}
/* need to use unaligned instructions on first pointer. */
return unaligned_words_backward (dst0, src0, words, bytes, ret);
#endif
}
}

View file

@ -1,148 +0,0 @@
/*
* Copyright (c) 2010 MIPS Technologies, Inc.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of MIPS Technologies Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MIPS_STRING_OPS_H
#define __MIPS_STRING_OPS_H
/* This definition of the byte bitfields uses the
assumption that the layout of the bitfields is
equivalent to the layout in memory. Generally,
for the MIPS ABIs, this is true. If you compile
the strcmp.c file with -DSMOKE_TEST_NEW_STRCMP,
this assumption will be tested.
Also, regardless of char signedness, ANSI C dictates that
strcmp() treats each character as unsigned char. For
strlen and the like, signedness doesn't matter.
Also, this code assumes that there are 8-bits per 'char'. */
#if __mips64
typedef struct bits
{
unsigned B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#else
typedef struct bits
{
unsigned B0:8, B1:8, B2:8, B3:8;
} bits_t;
#endif
#ifndef _ULW
/* for MIPS GCC, there is no unaligned builtins - so this code forces
the compiler to treat the pointer access as unaligned. */
struct ulw
{
unsigned b;
} __attribute__ ((packed));
#define _ULW(__x) ((struct ulw *) ((char *)(&__x)))->b;
#endif
/* This union assumes that small structures can be in registers. If
not, then memory accesses will be done - not optimal, but ok. */
typedef union
{
unsigned v;
bits_t b;
} bitfields_t;
#ifndef detect_zero
/* __mips_dsp, __mips_dspr2, and __mips64 are predefined by
the compiler, based on command line options. */
#if (__mips_dsp || __mips_dspr2) && !__mips64
#define __mips_using_dsp 1
/* DSP 4-lane (8 unsigned bits per line) subtract and saturate
* Intrinsic operation. How this works:
* Given a 4-byte string of "ABC\0", subtract this as
* an unsigned integer from 0x01010101:
* 0x01010101
* - 0x41424300
* -----------
( 0xbfbebe01 <-- answer without saturation
* 0x00000001 <-- answer with saturation
* When this 4-lane vector is treated as an unsigned int value,
* a non-zero answer indicates the presence of a zero in the
* original 4-byte argument. */
typedef signed char v4i8 __attribute__ ((vector_size (4)));
#define detect_zero(__x,__y,__01s,__80s)\
((unsigned) __builtin_mips_subu_s_qb((v4i8) __01s,(v4i8) __x))
/* sets all 4 lanes to requested byte. */
#define set_byte_lanes(__x) ((unsigned) __builtin_mips_repl_qb(__x))
/* sets all 4 lanes to 0x01. */
#define def_and_set_01(__x) unsigned __x = (unsigned) __builtin_mips_repl_qb(0x01)
/* sets all 4 lanes to 0x80. Not needed when subu_s.qb used. */
#define def_and_set_80(__x) /* do nothing */
#else
/* this version, originally published in the 80's, uses
a reverse-carry-set like determination of the zero byte.
The steps are, for __x = 0x31ff0001:
__x - _01s = 0x30fdff00
~__x = 0xce00fffe
((__x - _01s) & ~__x) = 0x0000ff00
x & _80s = 0x00008000 <- byte 3 was zero
Some implementaions naively assume that characters are
always 7-bit unsigned ASCII. With that assumption, the
"& ~x" is usually discarded. Since character strings
are 8-bit, the and is needed to catch the case of
a false positive when the byte is 0x80. */
#define detect_zero(__x,__y,_01s,_80s)\
((unsigned) (((__x) - _01s) & ~(__x)) & _80s)
#if __mips64
#define def_and_set_80(__x) unsigned __x = 0x8080808080808080ul
#define def_and_set_01(__x) unsigned __x = 0x0101010101010101ul
#else
#define def_and_set_80(__x) unsigned __x = 0x80808080ul
#define def_and_set_01(__x) unsigned __x = 0x01010101ul
#endif
#endif
#endif
/* dealing with 'void *' conversions without using extra variables. */
#define get_byte(__x,__idx) (((unsigned char *) (__x))[__idx])
#define set_byte(__x,__idx,__fill) ((unsigned char *) (__x))[__idx] = (__fill)
#define get_word(__x,__idx) (((unsigned *) (__x))[__idx])
#define set_word(__x,__idx,__fill) ((unsigned *) (__x))[__idx] = (__fill)
#define inc_ptr_as(__type,__x,__inc) __x = (void *) (((__type) __x) + (__inc))
#define cvt_ptr_to(__type,__x) ((__type) (__x))
#endif

View file

@ -1,224 +0,0 @@
/*
* Copyright (c) 2010 MIPS Technologies, Inc.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of MIPS Technologies Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include "mips-string-ops.h"
#define do_strlen_word(__av) {\
if (detect_zero(x,x,_01s,_80s)) break;\
x = __av;\
cnt += sizeof (unsigned);\
}
#define do_strlen_byte(__x) {\
if ((bx.b.B##__x) == 0) break;\
++cnt;\
}
#if SMOKE_TEST_MIPS_STRLEN
#define strlen my_strlen
#endif
size_t
strlen (const char *_a) __overloadable
{
int cnt = 0;
unsigned x;
/* align the string to word boundary so we can do word at a time. */
if ((cvt_ptr_to (unsigned, _a) & (sizeof (unsigned) - 1)) != 0)
{
if ((cvt_ptr_to (unsigned, _a) & 1) != 0)
{
if (get_byte (_a, 0) == 0)
return cnt;
/* set bit 1 so 2-bytes are checked and incremented. */
inc_ptr_as (char *, _a, 1);
++cnt;
}
if ((cvt_ptr_to (unsigned, _a) & 2) != 0)
{
if (get_byte (_a, 0) == 0)
return cnt + 0;
if (get_byte (_a, 1) == 0)
return cnt + 1;
inc_ptr_as (char *, _a, 2);
cnt += 2;
}
}
#if __mips64
#error strlen: mips64 check for 4-byte alignment not implemented.
#endif
if (1)
{
def_and_set_01 (_01s);
def_and_set_80 (_80s);
/* as advantagous as it is to performance, this code cannot pre-load
the following word, nor can it prefetch the next line at the start
of the loop since the string can be at the end of a page with the
following page unmapped. There are tests in the suite to catch
any attempt to go beyond the current word. */
x = get_word (_a, 0);
while (1)
{
/* doing 8 words should cover most strings. */
do_strlen_word (get_word (_a, 1));
do_strlen_word (get_word (_a, 2));
do_strlen_word (get_word (_a, 3));
do_strlen_word (get_word (_a, 4));
do_strlen_word (get_word (_a, 5));
do_strlen_word (get_word (_a, 6));
do_strlen_word (get_word (_a, 7));
do_strlen_word (get_word (_a, 8));
inc_ptr_as (unsigned *, _a, 8);
}
}
while (1)
{
/* pull apart the last word processed and find the zero. */
bitfields_t bx;
bx.v = x;
#if __mips64
do_strlen_byte (0);
do_strlen_byte (1);
do_strlen_byte (2);
do_strlen_byte (3);
do_strlen_byte (4);
do_strlen_byte (5);
do_strlen_byte (6);
#else
do_strlen_byte (0);
do_strlen_byte (1);
do_strlen_byte (2);
#endif
/* last byte is zero */
break;
}
return cnt;
}
#undef do_strlen_byte
#undef do_strlen_word
#if SMOKE_TEST_MIPS_STRLEN
#include <stdio.h>
char str1[] = "DHRYSTONE PROGRAM, 1'ST STRING";
char str2[] = "DHRYSTONE PROGRAM, 2'ST STRING";
char str3[] = "another string";
char str4[] = "another";
char str5[] = "somes tring";
char str6[] = "somes_tring";
char str7[16], str8[16];
static char *
chk (unsigned mine, unsigned libs, int *errors)
{
static char answer[1024];
char *result = mine == libs ? "PASS" : "FAIL";
sprintf (answer, "new_strlen=%d: lib_strlen=%d: %s!", mine, libs, result);
if (mine != libs)
(*errors)++;
return answer;
}
int
main (int argc, char **argv)
{
int errors = 0;
/* set -1 in one position */
str6[5] = 0xff;
/* set zero in same position with junk in following 3 */
str7[0] = str8[0] = 0;
str7[1] = 0xff;
str7[2] = 'a';
str7[3] = 2;
str8[1] = 's';
str8[2] = -2;
str8[3] = 0;
fprintf (stderr, "========== mips_strlen%s test...\n",
argv[0] ? argv[0] : "unknown strlen");
#define P(__x,__y) {\
int a = my_strlen(__x + __y);\
int b = (strlen)(__x + __y) /* library version */;\
fprintf(stderr,"%s+%d: %s\n",#__x,__y,chk(a,b,&errors));\
}
P (str1, 0);
P (str1, 1);
P (str1, 2);
P (str1, 3);
P (str2, 0);
P (str2, 1);
P (str2, 2);
P (str2, 3);
P (str3, 0);
P (str3, 1);
P (str3, 2);
P (str3, 3);
P (str4, 0);
P (str4, 1);
P (str4, 2);
P (str4, 3);
P (str5, 0);
P (str5, 1);
P (str5, 2);
P (str5, 3);
P (str6, 0);
P (str6, 1);
P (str6, 2);
P (str6, 3);
P (str7, 0);
P (str7, 1);
P (str7, 2);
P (str7, 3);
P (str8, 0);
P (str8, 1);
P (str8, 2);
P (str8, 3);
return errors;
}
#endif

View file

@ -0,0 +1,138 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#define op_t unsigned long int
#define op_size sizeof (op_t)
#if __mips64
typedef struct
{
op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#else
typedef struct
{
op_t B0:8, B1:8, B2:8, B3:8;
} bits_t;
#endif
typedef union
{
op_t v;
bits_t b;
} bitfields_t;
#define DO_BYTE(i) \
if (a.b.B##i != ch) { \
if(a.b.B##i == '\0') return 0; \
p++; \
} else \
return (char *)p;
#define DO_WORD(w, cnt) { \
op_t val = w[cnt] ^ mask_c; \
if ((((w[cnt] - mask_1) & ~w[cnt]) & mask_128) || \
(((val - mask_1) & ~val) & mask_128)) { \
return do_bytes(w + cnt, ch); \
} \
}
static inline char * __attribute__ ((always_inline))
do_bytes (const op_t* w, unsigned char ch)
{
bitfields_t a;
unsigned char* p = (unsigned char *) w;
a.v = *w;
#if __mips64
DO_BYTE(0)
DO_BYTE(1)
DO_BYTE(2)
DO_BYTE(3)
DO_BYTE(4)
DO_BYTE(5)
DO_BYTE(6)
DO_BYTE(7)
#else
DO_BYTE(0)
DO_BYTE(1)
DO_BYTE(2)
DO_BYTE(3)
#endif
return (char *)p;
}
char* strchr(const char* s, int c) __overloadable
{
const op_t *w;
op_t mask_1, mask_128, mask_c;
const unsigned char ch = c;
unsigned char* p = (unsigned char *) s;
/*
* Check byte by byte till initial alignment
*/
for ( ; *p != ch && ((size_t) p % op_size) != 0; p++)
if (*p == '\0')
return 0;
if (*p != ch) {
w = (const op_t *) p;
mask_c = ch | (ch << 8);
mask_c |= mask_c << 16;
__asm__ volatile (
"li %0, 0x01010101 \n\t"
: "=r" (mask_1)
);
#if __mips64
mask_1 |= mask_1 << 32;
mask_c |= mask_c << 32;
#endif
mask_128 = mask_1 << 7;
/*
* Check word/dword wize after initial alignment till character match
* or end of string
*/
while (1) {
DO_WORD(w, 0)
DO_WORD(w, 1)
DO_WORD(w, 2)
DO_WORD(w, 3)
w += 4;
}
}
return (char *)p;
}

View file

@ -1,30 +1,33 @@
/*
* Copyright (c) 2014
* Imagination Technologies Limited.
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __ANDROID__
@ -41,6 +44,22 @@
# include <sys/asm.h>
#endif
#if __mips64
# define NSIZE 8
# define LW ld
# define EXT dext
# define SRL dsrl
# define SLL dsll
# define SUBU dsubu
#else
# define NSIZE 4
# define LW lw
# define EXT ext
# define SRL srl
# define SLL sll
# define SUBU subu
#endif
/* Technically strcmp should not read past the end of the strings being
compared. We will read a full word that may contain excess bits beyond
the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
@ -77,6 +96,23 @@
# endif
#endif
/* It might seem better to do the 'beq' instruction between the two 'lbu'
instructions so that the nop is not needed but testing showed that this
code is actually faster (based on glibc strcmp test). */
#define BYTECMP01(OFFSET) \
lbu v0, OFFSET(a0); \
lbu v1, OFFSET(a1); \
beq v0, zero, L(bexit01); \
nop; \
bne v0, v1, L(bexit01)
#define BYTECMP89(OFFSET) \
lbu t8, OFFSET(a0); \
lbu t9, OFFSET(a1); \
beq t8, zero, L(bexit89); \
nop; \
bne t8, t9, L(bexit89)
/* Allow the routine to be named something else if desired. */
#ifndef STRCMP_NAME
# define STRCMP_NAME strcmp
@ -87,170 +123,236 @@ LEAF(STRCMP_NAME, 0)
#else
LEAF(STRCMP_NAME)
#endif
.set nomips16
.set noreorder
.set nomips16
.set noreorder
or t0, a0, a1
andi t0,0x3
bne t0, zero, L(byteloop)
andi t1, a1, (NSIZE - 1)
beqz t1, L(exitalign)
or t0, zero, NSIZE
SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
/* Both strings are 4 byte aligned at this point. */
L(alignloop): #do by bytes until a1 aligned
BYTECMP01(0)
SUBU t1, t1, 0x1
PTR_ADDIU a0, a0, 0x1
bnez t1, L(alignloop)
PTR_ADDIU a1, a1, 0x1
lui t8, 0x0101
ori t8, t8, 0x0101
lui t9, 0x7f7f
ori t9, 0x7f7f
L(exitalign):
#define STRCMP32(OFFSET) \
lw v0, OFFSET(a0); \
lw v1, OFFSET(a1); \
subu t0, v0, t8; \
bne v0, v1, L(worddiff); \
nor t1, v0, t9; \
and t0, t0, t1; \
bne t0, zero, L(returnzero)
/* string a1 is NSIZE byte aligned at this point. */
lui t8, 0x0101
ori t8, 0x0101
lui t9, 0x7f7f
ori t9, 0x7f7f
#if __mips64
dsll t1, t8, 32
or t8, t1
dsll t1, t9, 32
or t9, t1
#endif
andi t2, a0, (NSIZE - 1) #check if a0 aligned
SUBU t3, t0, t2 #t3 will be used as shifter
bnez t2, L(uloopenter)
SUBU a2, a0, t2 #bring back a0 to aligned position
#define STRCMPW(OFFSET) \
LW v0, OFFSET(a0); \
LW v1, OFFSET(a1); \
SUBU t0, v0, t8; \
bne v0, v1, L(worddiff); \
nor t1, v0, t9; \
and t0, t0, t1; \
bne t0, zero, L(returnzero);\
L(wordloop):
STRCMP32(0)
DELAY_READ
STRCMP32(4)
DELAY_READ
STRCMP32(8)
DELAY_READ
STRCMP32(12)
DELAY_READ
STRCMP32(16)
DELAY_READ
STRCMP32(20)
DELAY_READ
STRCMP32(24)
DELAY_READ
STRCMP32(28)
PTR_ADDIU a0, a0, 32
b L(wordloop)
PTR_ADDIU a1, a1, 32
STRCMPW(0 * NSIZE)
DELAY_READ
STRCMPW(1 * NSIZE)
DELAY_READ
STRCMPW(2 * NSIZE)
DELAY_READ
STRCMPW(3 * NSIZE)
DELAY_READ
STRCMPW(4 * NSIZE)
DELAY_READ
STRCMPW(5 * NSIZE)
DELAY_READ
STRCMPW(6 * NSIZE)
DELAY_READ
STRCMPW(7 * NSIZE)
PTR_ADDIU a0, a0, (8 * NSIZE)
b L(wordloop)
PTR_ADDIU a1, a1, (8 * NSIZE)
#define USTRCMPW(OFFSET) \
LW v1, OFFSET(a1); \
SUBU t0, v0, t8; \
nor t1, v0, t9; \
and t0, t0, t1; \
bne t0, zero, L(worddiff); \
SRL v0, t2; \
LW a3, (OFFSET + NSIZE)(a2); \
SUBU t0, v1, t8; \
SLL t1, a3, t3; \
or v0, v0, t1; \
bne v0, v1, L(worddiff); \
nor t1, v1, t9; \
and t0, t0, t1; \
bne t0, zero, L(returnzero); \
move v0, a3;\
L(uloopenter):
LW v0, 0(a2)
SLL t2, 3 #multiply by 8
SLL t3, 3 #multiply by 8
li a3, -1 #all 1s
SRL a3, t3
or v0, a3 #replace with all 1s if zeros in unintented read
L(uwordloop):
USTRCMPW(0 * NSIZE)
USTRCMPW(1 * NSIZE)
USTRCMPW(2 * NSIZE)
USTRCMPW(3 * NSIZE)
USTRCMPW(4 * NSIZE)
USTRCMPW(5 * NSIZE)
USTRCMPW(6 * NSIZE)
USTRCMPW(7 * NSIZE)
PTR_ADDIU a2, a2, (8 * NSIZE)
b L(uwordloop)
PTR_ADDIU a1, a1, (8 * NSIZE)
L(returnzero):
j ra
move v0, zero
j ra
move v0, zero
#if __mips_isa_rev > 1
#define EXT_COMPARE01(POS) \
EXT t0, v0, POS, 8; \
beq t0, zero, L(wexit01); \
EXT t1, v1, POS, 8; \
bne t0, t1, L(wexit01)
#define EXT_COMPARE89(POS) \
EXT t8, v0, POS, 8; \
beq t8, zero, L(wexit89); \
EXT t9, v1, POS, 8; \
bne t8, t9, L(wexit89)
#else
#define EXT_COMPARE01(POS) \
SRL t0, v0, POS; \
SRL t1, v1, POS; \
andi t0, t0, 0xff; \
beq t0, zero, L(wexit01); \
andi t1, t1, 0xff; \
bne t0, t1, L(wexit01)
#define EXT_COMPARE89(POS) \
SRL t8, v0, POS; \
SRL t9, v1, POS; \
andi t8, t8, 0xff; \
beq t8, zero, L(wexit89); \
andi t9, t9, 0xff; \
bne t8, t9, L(wexit89)
#endif
L(worddiff):
#ifdef USE_CLZ
subu t0, v0, t8
nor t1, v0, t9
and t1, t0, t1
xor t0, v0, v1
or t0, t0, t1
SUBU t0, v0, t8
nor t1, v0, t9
and t1, t0, t1
xor t0, v0, v1
or t0, t0, t1
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
wsbh t0, t0
rotr t0, t0, 16
wsbh t0, t0
rotr t0, t0, 16
# endif
clz t1, t0
and t1, 0xf8
clz t1, t0
and t1, 0xf8
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
neg t1
addu t1, 24
neg t1
addu t1, 24
# endif
rotrv v0, v0, t1
rotrv v1, v1, t1
and v0, v0, 0xff
and v1, v1, 0xff
j ra
subu v0, v0, v1
rotrv v0, v0, t1
rotrv v1, v1, t1
and v0, v0, 0xff
and v1, v1, 0xff
j ra
SUBU v0, v0, v1
#else /* USE_CLZ */
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
andi t0, v0, 0xff
beq t0, zero, L(wexit01)
andi t1, v1, 0xff
bne t0, t1, L(wexit01)
andi t0, v0, 0xff
beq t0, zero, L(wexit01)
andi t1, v1, 0xff
bne t0, t1, L(wexit01)
EXT_COMPARE89(8)
EXT_COMPARE01(16)
#ifndef __mips64
SRL t8, v0, 24
SRL t9, v1, 24
#else
EXT_COMPARE89(24)
EXT_COMPARE01(32)
EXT_COMPARE89(40)
EXT_COMPARE01(48)
SRL t8, v0, 56
SRL t9, v1, 56
#endif
srl t8, v0, 8
srl t9, v1, 8
andi t8, t8, 0xff
beq t8, zero, L(wexit89)
andi t9, t9, 0xff
bne t8, t9, L(wexit89)
srl t0, v0, 16
srl t1, v1, 16
andi t0, t0, 0xff
beq t0, zero, L(wexit01)
andi t1, t1, 0xff
bne t0, t1, L(wexit01)
srl t8, v0, 24
srl t9, v1, 24
# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
srl t0, v0, 24
beq t0, zero, L(wexit01)
srl t1, v1, 24
bne t0, t1, L(wexit01)
#ifdef __mips64
SRL t0, v0, 56
beq t0, zero, L(wexit01)
SRL t1, v1, 56
bne t0, t1, L(wexit01)
EXT_COMPARE89(48)
EXT_COMPARE01(40)
EXT_COMPARE89(32)
EXT_COMPARE01(24)
#else
SRL t0, v0, 24
beq t0, zero, L(wexit01)
SRL t1, v1, 24
bne t0, t1, L(wexit01)
#endif
EXT_COMPARE89(16)
EXT_COMPARE01(8)
srl t8, v0, 16
srl t9, v1, 16
andi t8, t8, 0xff
beq t8, zero, L(wexit89)
andi t9, t9, 0xff
bne t8, t9, L(wexit89)
srl t0, v0, 8
srl t1, v1, 8
andi t0, t0, 0xff
beq t0, zero, L(wexit01)
andi t1, t1, 0xff
bne t0, t1, L(wexit01)
andi t8, v0, 0xff
andi t9, v1, 0xff
andi t8, v0, 0xff
andi t9, v1, 0xff
# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
L(wexit89):
j ra
subu v0, t8, t9
j ra
SUBU v0, t8, t9
L(wexit01):
j ra
subu v0, t0, t1
j ra
SUBU v0, t0, t1
#endif /* USE_CLZ */
/* It might seem better to do the 'beq' instruction between the two 'lbu'
instructions so that the nop is not needed but testing showed that this
code is actually faster (based on glibc strcmp test). */
#define BYTECMP01(OFFSET) \
lbu v0, OFFSET(a0); \
lbu v1, OFFSET(a1); \
beq v0, zero, L(bexit01); \
nop; \
bne v0, v1, L(bexit01)
#define BYTECMP89(OFFSET) \
lbu t8, OFFSET(a0); \
lbu t9, OFFSET(a1); \
beq t8, zero, L(bexit89); \
nop; \
bne t8, t9, L(bexit89)
L(byteloop):
BYTECMP01(0)
BYTECMP89(1)
BYTECMP01(2)
BYTECMP89(3)
BYTECMP01(4)
BYTECMP89(5)
BYTECMP01(6)
BYTECMP89(7)
PTR_ADDIU a0, a0, 8
b L(byteloop)
PTR_ADDIU a1, a1, 8
BYTECMP01(0)
BYTECMP89(1)
BYTECMP01(2)
BYTECMP89(3)
BYTECMP01(4)
BYTECMP89(5)
BYTECMP01(6)
BYTECMP89(7)
PTR_ADDIU a0, a0, 8
b L(byteloop)
PTR_ADDIU a1, a1, 8
L(bexit01):
j ra
subu v0, v0, v1
j ra
SUBU v0, v0, v1
L(bexit89):
j ra
subu v0, t8, t9
j ra
SUBU v0, t8, t9
.set at
.set reorder
.set at
.set reorder
END(STRCMP_NAME)
#ifndef __ANDROID__

View file

@ -0,0 +1,204 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#define op_t unsigned long int
#if !defined(UNALIGNED_INSTR_SUPPORT)
/* does target have unaligned lw/ld/ualw/uald instructions? */
#define UNALIGNED_INSTR_SUPPORT 0
#if __mips_isa_rev < 6 && !__mips1
#undef UNALIGNED_INSTR_SUPPORT
#define UNALIGNED_INSTR_SUPPORT 1
#endif
#endif
#if !defined(HW_UNALIGNED_SUPPORT)
/* Does target have hardware support for unaligned accesses? */
#define HW_UNALIGNED_SUPPORT 0
#if __mips_isa_rev >= 6
#undef HW_UNALIGNED_SUPPORT
#define HW_UNALIGNED_SUPPORT 1
#endif
#endif
#if __mips64
typedef struct
{
op_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
} bits_t;
#else
typedef struct
{
op_t B0:8, B1:8, B2:8, B3:8;
} bits_t;
#endif
typedef union
{
op_t v;
bits_t b;
} bitfields_t;
#if !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT
/* for MIPS GCC, there are no unaligned builtins - so this struct forces
the compiler to treat the pointer access as unaligned. */
struct ulw
{
op_t uli;
} __attribute__ ((packed));
#endif /* !HW_UNALIGNED_SUPPORT && UNALIGNED_INSTR_SUPPORT */
#define DO_BYTE(i, ptdst) { \
*(ptdst+i) = a.b.B##i; \
if(a.b.B##i == '\0') \
return ret; \
}
#if __mips64
#define DO_BYTES(val, dst) { \
bitfields_t a; \
char *tdst = (char *)(dst); \
a.v = val; \
DO_BYTE(0, tdst) \
DO_BYTE(1, tdst) \
DO_BYTE(2, tdst) \
DO_BYTE(3, tdst) \
DO_BYTE(4, tdst) \
DO_BYTE(5, tdst) \
DO_BYTE(6, tdst) \
DO_BYTE(7, tdst) \
}
#else
#define DO_BYTES(val, dst) { \
bitfields_t a; \
char *tdst = (char *)(dst); \
a.v = val; \
DO_BYTE(0, tdst) \
DO_BYTE(1, tdst) \
DO_BYTE(2, tdst) \
DO_BYTE(3, tdst) \
}
#endif
#define DO_WORD_ALIGNED(dst, src) { \
op_t val = *(src); \
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
DO_BYTES(val, dst); \
} else *(dst) = val; \
}
#if !HW_UNALIGNED_SUPPORT
#if UNALIGNED_INSTR_SUPPORT
#define DO_WORD_UNALIGNED(dst, src) { \
op_t val = *(src); \
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
DO_BYTES(val, dst); \
} else { \
struct ulw *a = (struct ulw *)(dst); \
a->uli = val; \
} \
}
#else
#define DO_WORD_UNALIGNED(dst, src) { \
op_t val = *(src); \
if ((((val - mask_1) & ~val) & mask_128) != 0) { \
DO_BYTES(val, dst); \
} else { \
char *pdst = (char *) dst; \
const char *psrc = (const char *) src; \
for (; (*pdst = *psrc) != '\0'; ++psrc, ++pdst); \
return ret; \
} \
}
#endif /* UNALIGNED_INSTR_SUPPORT */
#define PROCESS_UNALIGNED_WORDS(a, b) { \
while (1) { \
DO_WORD_UNALIGNED(a, b); \
DO_WORD_UNALIGNED(a + 1, b + 1); \
DO_WORD_UNALIGNED(a + 2, b + 2); \
DO_WORD_UNALIGNED(a + 3, b + 3); \
a += 4; \
b += 4; \
} \
}
#endif /* HW_UNALIGNED_SUPPORT */
#define PROCESS_ALIGNED_WORDS(a, b) { \
while (1) { \
DO_WORD_ALIGNED(a, b); \
DO_WORD_ALIGNED(a + 1, b + 1); \
DO_WORD_ALIGNED(a + 2, b + 2); \
DO_WORD_ALIGNED(a + 3, b + 3); \
a += 4; \
b += 4; \
} \
}
char *
strcpy (char *to, const char *from) __overloadable
{
char *ret = to;
op_t mask_1, mask_128;
const op_t *src;
op_t *dst;
for (; (*to = *from) != '\0' && ((size_t) from % sizeof (op_t)) != 0; ++from, ++to);
if(*to != '\0') {
__asm__ volatile (
"li %0, 0x01010101 \n\t"
: "=r" (mask_1)
);
#if __mips64
mask_1 |= mask_1 << 32;
#endif
mask_128 = mask_1 << 7;
src = (const op_t *) from;
dst = (op_t *) to;
#if HW_UNALIGNED_SUPPORT
PROCESS_ALIGNED_WORDS(dst, src);
#else
if (((unsigned long) dst) % sizeof (op_t) == 0) {
PROCESS_ALIGNED_WORDS(dst, src);
} else {
PROCESS_UNALIGNED_WORDS(dst, src);
}
#endif
}
return ret;
}

View file

@ -1,43 +1,115 @@
/* $OpenBSD: strlen.c,v 1.8 2014/06/10 04:17:37 deraadt Exp $ */
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
size_t
strlen(const char *str) __overloadable
{
const char *s;
#define op_t unsigned long int
#define op_size sizeof (op_t)
for (s = str; *s; ++s)
;
return (s - str);
#if __mips64 || __mips_isa_rev >= 2
static inline size_t __attribute__ ((always_inline))
do_bytes (const char *base, const char *p, op_t inval)
{
op_t outval = 0;
#if __mips64
__asm__ volatile (
"dsbh %1, %0 \n\t"
"dshd %0, %1 \n\t"
"dclz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#else
__asm__ volatile (
"wsbh %1, %0 \n\t"
"rotr %0, %1, 16 \n\t"
"clz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#endif
p += (outval >> 3);
return (size_t) (p - base);
}
#define DO_WORD(w, cnt) { \
op_t val = ((w[cnt] - mask_1) & ~w[cnt]) & mask_128; \
if (val) \
return do_bytes(str, (const char *)(w + cnt), val); \
}
#else
static inline size_t __attribute__ ((always_inline))
do_bytes (const char *base, const char *p)
{
for (; *p; ++p);
return (size_t) (p - base);
}
#define DO_WORD(w, cnt) { \
if (((w[cnt] - mask_1) & ~w[cnt]) & mask_128) \
return do_bytes(str, (const char *)(w + cnt)); \
}
#endif
size_t
strlen (const char *str) __overloadable
{
if (*str) {
const char *p = (const char *) str;
const op_t *w;
op_t mask_1, mask_128;
while ((size_t) p % sizeof (op_t)) {
if (!(*p))
return (p - str);
p++;
}
__asm__ volatile (
"li %0, 0x01010101 \n\t"
: "=r" (mask_1)
);
#if __mips64
mask_1 |= mask_1 << 32;
#endif
mask_128 = mask_1 << 7;
w = (const op_t *) p;
while (1) {
DO_WORD(w, 0);
DO_WORD(w, 1);
DO_WORD(w, 2);
DO_WORD(w, 3);
w += 4;
}
}
return 0;
}

View file

@ -0,0 +1,401 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __ANDROID__
# include <private/bionic_asm.h>
#elif _LIBC
# include <sysdep.h>
# include <regdef.h>
# include <sys/asm.h>
#elif _COMPILING_NEWLIB
# include "machine/asm.h"
# include "machine/regdef.h"
#else
# include <regdef.h>
# include <sys/asm.h>
#endif
#if __mips64
# define NSIZE 8
# define LW ld
# define LWR ldr
# define LWL ldl
# define EXT dext
# define SRL dsrl
# define SUBU dsubu
#else
# define NSIZE 4
# define LW lw
# define LWR lwr
# define LWL lwl
# define EXT ext
# define SRL srl
# define SUBU subu
#endif
/* Technically strcmp should not read past the end of the strings being
compared. We will read a full word that may contain excess bits beyond
the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
read the next word after the end of string. Setting ENABLE_READAHEAD will
improve performance but is technically illegal based on the definition of
strcmp. */
#ifdef ENABLE_READAHEAD
# define DELAY_READ
#else
# define DELAY_READ nop
#endif
/* Testing on a little endian machine showed using CLZ was a
performance loss, so we are not turning it on by default. */
#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
# define USE_CLZ
#endif
/* Some asm.h files do not have the L macro definition. */
#ifndef L
# if _MIPS_SIM == _ABIO32
# define L(label) $L ## label
# else
# define L(label) .L ## label
# endif
#endif
/* Some asm.h files do not have the PTR_ADDIU macro definition. */
#ifndef PTR_ADDIU
# if _MIPS_SIM == _ABIO32
# define PTR_ADDIU addiu
# else
# define PTR_ADDIU daddiu
# endif
#endif
/* It might seem better to do the 'beq' instruction between the two 'lbu'
instructions so that the nop is not needed but testing showed that this
code is actually faster (based on glibc strcmp test). */
#define BYTECMP01(OFFSET) \
lbu v0, OFFSET(a0); \
lbu v1, OFFSET(a1); \
beq v0, zero, L(bexit01); \
nop; \
bne v0, v1, L(bexit01)
#define BYTECMP89(OFFSET) \
lbu t8, OFFSET(a0); \
lbu t9, OFFSET(a1); \
beq t8, zero, L(bexit89); \
nop; \
bne t8, t9, L(bexit89)
/* Allow the routine to be named something else if desired. */
#ifndef STRNCMP_NAME
# define STRNCMP_NAME strncmp
#endif
#ifdef __ANDROID__
LEAF(STRNCMP_NAME, 0)
#else
LEAF(STRNCMP_NAME)
#endif
.set nomips16
.set noreorder
srl t0, a2, (2 + NSIZE / 4)
beqz t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
andi t1, a1, (NSIZE - 1)
beqz t1, L(exitalign)
or t0, zero, NSIZE
SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
SUBU a2, a2, t1 #dec count by t1
L(alignloop): #do by bytes until a1 aligned
BYTECMP01(0)
SUBU t1, t1, 0x1
PTR_ADDIU a0, a0, 0x1
bne t1, zero, L(alignloop)
PTR_ADDIU a1, a1, 0x1
L(exitalign):
/* string a1 is NSIZE byte aligned at this point. */
#ifndef __mips1
lui t8, 0x0101
ori t8, 0x0101
lui t9, 0x7f7f
ori t9, 0x7f7f
#if __mips64
dsll t0, t8, 32
or t8, t0
dsll t1, t9, 32
or t9, t1
#endif
#endif
/* hardware or software alignment not supported for mips1
rev6 archs have h/w unaligned support
remainings archs need to implemented with unaligned instructions */
#if __mips1
andi t0, a0, (NSIZE - 1)
bne t0, zero, L(byteloop)
#elif __mips_isa_rev < 6
andi t0, a0, (NSIZE - 1)
bne t0, zero, L(uwordloop)
#endif
#define STRCMPW(OFFSET) \
LW v0, (OFFSET)(a0); \
LW v1, (OFFSET)(a1); \
SUBU t0, v0, t8; \
bne v0, v1, L(worddiff); \
nor t1, v0, t9; \
and t0, t0, t1; \
bne t0, zero, L(returnzero);\
L(wordloop):
SUBU t1, a2, (8 * NSIZE)
bltz t1, L(onewords)
STRCMPW(0 * NSIZE)
DELAY_READ
STRCMPW(1 * NSIZE)
DELAY_READ
STRCMPW(2 * NSIZE)
DELAY_READ
STRCMPW(3 * NSIZE)
DELAY_READ
STRCMPW(4 * NSIZE)
DELAY_READ
STRCMPW(5 * NSIZE)
DELAY_READ
STRCMPW(6 * NSIZE)
DELAY_READ
STRCMPW(7 * NSIZE)
SUBU a2, a2, (8 * NSIZE)
PTR_ADDIU a0, a0, (8 * NSIZE)
b L(wordloop)
PTR_ADDIU a1, a1, (8 * NSIZE)
L(onewords):
SUBU t1, a2, NSIZE
bltz t1, L(byteloop)
STRCMPW(0)
SUBU a2, a2, NSIZE
PTR_ADDIU a0, a0, NSIZE
b L(onewords)
PTR_ADDIU a1, a1, NSIZE
#if __mips_isa_rev < 6 && !__mips1
#define USTRCMPW(OFFSET) \
LWR v0, (OFFSET)(a0); \
LWL v0, (OFFSET + NSIZE - 1)(a0); \
LW v1, (OFFSET)(a1); \
SUBU t0, v0, t8; \
bne v0, v1, L(worddiff); \
nor t1, v0, t9; \
and t0, t0, t1; \
bne t0, zero, L(returnzero);\
L(uwordloop):
SUBU t1, a2, (8 * NSIZE)
bltz t1, L(uonewords)
USTRCMPW(0 * NSIZE)
DELAY_READ
USTRCMPW(1 * NSIZE)
DELAY_READ
USTRCMPW(2 * NSIZE)
DELAY_READ
USTRCMPW(3 * NSIZE)
DELAY_READ
USTRCMPW(4 * NSIZE)
DELAY_READ
USTRCMPW(5 * NSIZE)
DELAY_READ
USTRCMPW(6 * NSIZE)
DELAY_READ
USTRCMPW(7 * NSIZE)
SUBU a2, a2, (8 * NSIZE)
PTR_ADDIU a0, a0, (8 * NSIZE)
b L(uwordloop)
PTR_ADDIU a1, a1, (8 * NSIZE)
L(uonewords):
SUBU t1, a2, NSIZE
bltz t1, L(byteloop)
USTRCMPW(0)
SUBU a2, a2, NSIZE
PTR_ADDIU a0, a0, NSIZE
b L(uonewords)
PTR_ADDIU a1, a1, NSIZE
#endif
L(returnzero):
j ra
move v0, zero
#if __mips_isa_rev > 1
#define EXT_COMPARE01(POS) \
EXT t0, v0, POS, 8; \
beq t0, zero, L(wexit01); \
EXT t1, v1, POS, 8; \
bne t0, t1, L(wexit01)
#define EXT_COMPARE89(POS) \
EXT t8, v0, POS, 8; \
beq t8, zero, L(wexit89); \
EXT t9, v1, POS, 8; \
bne t8, t9, L(wexit89)
#else
#define EXT_COMPARE01(POS) \
SRL t0, v0, POS; \
SRL t1, v1, POS; \
andi t0, t0, 0xff; \
beq t0, zero, L(wexit01); \
andi t1, t1, 0xff; \
bne t0, t1, L(wexit01)
#define EXT_COMPARE89(POS) \
SRL t8, v0, POS; \
SRL t9, v1, POS; \
andi t8, t8, 0xff; \
beq t8, zero, L(wexit89); \
andi t9, t9, 0xff; \
bne t8, t9, L(wexit89)
#endif
L(worddiff):
#ifdef USE_CLZ
SUBU t0, v0, t8
nor t1, v0, t9
and t1, t0, t1
xor t0, v0, v1
or t0, t0, t1
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
wsbh t0, t0
rotr t0, t0, 16
# endif
clz t1, t0
and t1, 0xf8
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
neg t1
addu t1, 24
# endif
rotrv v0, v0, t1
rotrv v1, v1, t1
and v0, v0, 0xff
and v1, v1, 0xff
j ra
SUBU v0, v0, v1
#else /* USE_CLZ */
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
andi t0, v0, 0xff
beq t0, zero, L(wexit01)
andi t1, v1, 0xff
bne t0, t1, L(wexit01)
EXT_COMPARE89(8)
EXT_COMPARE01(16)
#ifndef __mips64
SRL t8, v0, 24
SRL t9, v1, 24
#else
EXT_COMPARE89(24)
EXT_COMPARE01(32)
EXT_COMPARE89(40)
EXT_COMPARE01(48)
SRL t8, v0, 56
SRL t9, v1, 56
#endif
# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
#ifdef __mips64
SRL t0, v0, 56
beq t0, zero, L(wexit01)
SRL t1, v1, 56
bne t0, t1, L(wexit01)
EXT_COMPARE89(48)
EXT_COMPARE01(40)
EXT_COMPARE89(32)
EXT_COMPARE01(24)
#else
SRL t0, v0, 24
beq t0, zero, L(wexit01)
SRL t1, v1, 24
bne t0, t1, L(wexit01)
#endif
EXT_COMPARE89(16)
EXT_COMPARE01(8)
andi t8, v0, 0xff
andi t9, v1, 0xff
# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
L(wexit89):
j ra
SUBU v0, t8, t9
L(wexit01):
j ra
SUBU v0, t0, t1
#endif /* USE_CLZ */
L(byteloop):
beq a2, zero, L(returnzero)
SUBU a2, a2, 1
BYTECMP01(0)
nop
beq a2, zero, L(returnzero)
SUBU a2, a2, 1
BYTECMP89(1)
nop
beq a2, zero, L(returnzero)
SUBU a2, a2, 1
BYTECMP01(2)
nop
beq a2, zero, L(returnzero)
SUBU a2, a2, 1
BYTECMP89(3)
PTR_ADDIU a0, a0, 4
b L(byteloop)
PTR_ADDIU a1, a1, 4
L(bexit01):
j ra
SUBU v0, v0, v1
L(bexit89):
j ra
SUBU v0, t8, t9
.set at
.set reorder
END(STRNCMP_NAME)
#ifndef __ANDROID__
# ifdef _LIBC
libc_hidden_builtin_def (STRNCMP_NAME)
# endif
#endif

View file

@ -0,0 +1,139 @@
/*
* Copyright (c) 2017 Imagination Technologies.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with
* the distribution.
* * Neither the name of Imagination Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#define op_t unsigned long int
#define op_size sizeof (op_t)
#if __mips64 || __mips_isa_rev >= 2
static inline size_t __attribute__ ((always_inline))
do_bytes (const char *base, const char *p, op_t inval)
{
op_t outval = 0;
#if __mips64
__asm__ volatile (
"dsbh %1, %0 \n\t"
"dshd %0, %1 \n\t"
"dclz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#else
__asm__ volatile (
"wsbh %1, %0 \n\t"
"rotr %0, %1, 16 \n\t"
"clz %1, %0 \n\t"
: "+r" (inval), "+r" (outval)
);
#endif
p += (outval >> 3);
return (size_t) (p - base);
}
#define DO_WORD(in, val) { \
op_t tmp = ((val - mask_1) & ~val) & mask_128; \
if (tmp) \
return do_bytes(str, (const char *)(in), tmp); \
}
#else
static inline size_t __attribute__ ((always_inline))
do_bytes (const char *base, const char *p)
{
for (; *p; ++p);
return (size_t) (p - base);
}
#define DO_WORD(in, val) { \
if (((val - mask_1) & ~val) & mask_128) { \
return do_bytes(str, (const char *)(in)); \
} \
}
#endif
size_t strnlen (const char *str, size_t n) {
if (n != 0) {
const char *p = (const char *) str;
const op_t *w;
op_t mask_1, mask_128;
for (; n > 0 && ((size_t) p % op_size) != 0; --n, ++p) {
if (!(*p))
return (p - str);
}
w = (const op_t *) p;
__asm__ volatile (
"li %0, 0x01010101 \n\t"
: "=r" (mask_1)
);
#if __mips64
mask_1 |= mask_1 << 32;
#endif
mask_128 = mask_1 << 7;
/*
* Check op_size byteswize after initial alignment
*/
while (n >= 4 * op_size) {
const op_t w0 = w[0];
const op_t w1 = w[1];
const op_t w2 = w[2];
const op_t w3 = w[3];
DO_WORD(w + 0, w0)
DO_WORD(w + 1, w1)
DO_WORD(w + 2, w2)
DO_WORD(w + 3, w3)
w += 4;
n -= 4 * op_size;
}
while (n >= op_size) {
DO_WORD(w, w[0]);
w++;
n -= op_size;
}
/*
* Check bytewize for remaining bytes
*/
p = (const char *) w;
for (; n > 0; --n, ++p) {
if (!(*p))
return (p - str);
}
return (p - str);
}
return 0;
}