Merge "avx2 implementation for memset." am: 61a42bf1ee
Original change: https://android-review.googlesource.com/c/platform/bionic/+/2121016 Change-Id: I24758003d6ba62d2ebd1234cfbab2aa8c783f1ed Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
commit
414b7ed2f9
11 changed files with 257 additions and 369 deletions
|
@ -376,7 +376,6 @@ cc_library_static {
|
|||
"upstream-freebsd/lib/libc/string/wcscat.c",
|
||||
"upstream-freebsd/lib/libc/string/wcscpy.c",
|
||||
"upstream-freebsd/lib/libc/string/wmemcmp.c",
|
||||
"upstream-freebsd/lib/libc/string/wmemset.c",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -927,7 +926,6 @@ cc_library_static {
|
|||
"arch-x86/generic/string/wcscat.c",
|
||||
"arch-x86/generic/string/wcscpy.c",
|
||||
"arch-x86/generic/string/wmemcmp.c",
|
||||
"arch-x86/generic/string/wmemset.c",
|
||||
|
||||
"arch-x86/atom/string/sse2-memchr-atom.S",
|
||||
"arch-x86/atom/string/sse2-memrchr-atom.S",
|
||||
|
@ -977,9 +975,6 @@ cc_library_static {
|
|||
"arch-x86/atom/string/ssse3-strcpy-atom.S",
|
||||
"arch-x86/atom/string/ssse3-strncpy-atom.S",
|
||||
"arch-x86/atom/string/ssse3-wmemcmp-atom.S",
|
||||
|
||||
// avx2 functions
|
||||
"arch-x86/kabylake/string/avx2-wmemset-kbl.S",
|
||||
],
|
||||
|
||||
exclude_srcs: [
|
||||
|
@ -990,6 +985,7 @@ cc_library_static {
|
|||
},
|
||||
x86_64: {
|
||||
srcs: [
|
||||
"arch-x86_64/string/avx2-memset-kbl.S",
|
||||
"arch-x86_64/string/sse2-memmove-slm.S",
|
||||
"arch-x86_64/string/sse2-memset-slm.S",
|
||||
"arch-x86_64/string/sse2-stpcpy-slm.S",
|
||||
|
@ -1002,7 +998,6 @@ cc_library_static {
|
|||
"arch-x86_64/string/sse4-memcmp-slm.S",
|
||||
"arch-x86_64/string/ssse3-strcmp-slm.S",
|
||||
"arch-x86_64/string/ssse3-strncmp-slm.S",
|
||||
"arch-x86_64/string/avx2-wmemset-kbl.S",
|
||||
|
||||
"arch-x86_64/bionic/__bionic_clone.S",
|
||||
"arch-x86_64/bionic/_exit_with_stack_teardown.S",
|
||||
|
@ -1515,6 +1510,9 @@ cc_library_static {
|
|||
name: "libc_static_dispatch",
|
||||
|
||||
arch: {
|
||||
x86_64: {
|
||||
srcs: ["arch-x86_64/static_function_dispatch.S"],
|
||||
},
|
||||
x86: {
|
||||
srcs: ["arch-x86/static_function_dispatch.S"],
|
||||
},
|
||||
|
@ -1540,6 +1538,9 @@ cc_library_static {
|
|||
"-fno-jump-tables",
|
||||
],
|
||||
arch: {
|
||||
x86_64: {
|
||||
srcs: ["arch-x86_64/dynamic_function_dispatch.cpp"],
|
||||
},
|
||||
x86: {
|
||||
srcs: ["arch-x86/dynamic_function_dispatch.cpp"],
|
||||
},
|
||||
|
|
44
libc/NOTICE
44
libc/NOTICE
|
@ -782,22 +782,6 @@ SUCH DAMAGE.
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
|
@ -826,34 +810,6 @@ SUCH DAMAGE.
|
|||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Copyright (C) 2020 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
|
|
|
@ -95,13 +95,6 @@ DEFINE_IFUNC_FOR(wmemcmp) {
|
|||
RETURN_FUNC(wmemcmp_func, wmemcmp_freebsd);
|
||||
}
|
||||
|
||||
typedef int wmemset_func(const wchar_t* __lhs, const wchar_t* __rhs, size_t __n);
|
||||
DEFINE_IFUNC_FOR(wmemset) {
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx2")) RETURN_FUNC(wmemset_func, wmemset_avx2);
|
||||
RETURN_FUNC(wmemset_func, wmemset_freebsd);
|
||||
}
|
||||
|
||||
typedef int strcmp_func(const char* __lhs, const char* __rhs);
|
||||
DEFINE_IFUNC_FOR(strcmp) {
|
||||
__builtin_cpu_init();
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2019 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define wmemset wmemset_freebsd
|
||||
|
||||
#include <upstream-freebsd/lib/libc/string/wmemset.c>
|
|
@ -1,148 +0,0 @@
|
|||
/*
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#ifndef WMEMSET
|
||||
#define WMEMSET wmemset_avx2
|
||||
#endif
|
||||
|
||||
ENTRY(WMEMSET)
|
||||
# BB#0:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %eax
|
||||
movl 32(%esp), %ecx
|
||||
movl 24(%esp), %eax
|
||||
testl %ecx, %ecx
|
||||
je .LBB0_12
|
||||
# BB#1:
|
||||
movl 28(%esp), %edx
|
||||
xorl %edi, %edi
|
||||
movl %eax, %esi
|
||||
cmpl $32, %ecx
|
||||
jb .LBB0_10
|
||||
# BB#2:
|
||||
movl %ecx, %eax
|
||||
andl $-32, %eax
|
||||
vmovd %edx, %xmm0
|
||||
vpbroadcastd %xmm0, %ymm0
|
||||
movl %eax, (%esp) # 4-byte Spill
|
||||
leal -32(%eax), %esi
|
||||
movl %esi, %eax
|
||||
shrl $5, %eax
|
||||
leal 1(%eax), %edi
|
||||
andl $7, %edi
|
||||
xorl %ebx, %ebx
|
||||
cmpl $224, %esi
|
||||
jb .LBB0_5
|
||||
# BB#3:
|
||||
movl 24(%esp), %esi
|
||||
leal 992(%esi), %ebp
|
||||
leal -1(%edi), %esi
|
||||
subl %eax, %esi
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_4: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -992(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -960(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -928(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -896(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -864(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -832(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -800(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -768(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -736(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -704(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -672(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -640(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -608(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -576(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -544(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -512(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -480(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -448(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -416(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -384(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -352(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -320(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -288(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -256(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -224(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -192(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -160(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -128(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -96(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -64(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -32(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, (%ebp,%ebx,4)
|
||||
addl $256, %ebx # imm = 0x100
|
||||
addl $8, %esi
|
||||
jne .LBB0_4
|
||||
.LBB0_5:
|
||||
testl %edi, %edi
|
||||
movl 24(%esp), %eax
|
||||
je .LBB0_8
|
||||
# BB#6:
|
||||
leal (%eax,%ebx,4), %esi
|
||||
addl $96, %esi
|
||||
negl %edi
|
||||
.p2align 4, 0x90
|
||||
.LBB0_7: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -96(%esi)
|
||||
vmovdqu %ymm0, -64(%esi)
|
||||
vmovdqu %ymm0, -32(%esi)
|
||||
vmovdqu %ymm0, (%esi)
|
||||
subl $-128, %esi
|
||||
addl $1, %edi
|
||||
jne .LBB0_7
|
||||
.LBB0_8:
|
||||
movl (%esp), %edi # 4-byte Reload
|
||||
cmpl %ecx, %edi
|
||||
je .LBB0_12
|
||||
# BB#9:
|
||||
leal (%eax,%edi,4), %esi
|
||||
.LBB0_10:
|
||||
subl %edi, %ecx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_11: # =>This Inner Loop Header: Depth=1
|
||||
movl %edx, (%esi)
|
||||
addl $4, %esi
|
||||
addl $-1, %ecx
|
||||
jne .LBB0_11
|
||||
.LBB0_12:
|
||||
addl $4, %esp
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
vzeroupper
|
||||
retl
|
||||
END(WMEMSET)
|
|
@ -45,7 +45,6 @@ FUNCTION_DELEGATE(strcmp, strcmp_generic)
|
|||
FUNCTION_DELEGATE(strncmp, strncmp_generic)
|
||||
FUNCTION_DELEGATE(strcat, strcat_generic)
|
||||
FUNCTION_DELEGATE(wmemcmp, wmemcmp_freebsd)
|
||||
FUNCTION_DELEGATE(wmemset, wmemset_freebsd)
|
||||
FUNCTION_DELEGATE(wcscat, wcscat_freebsd)
|
||||
FUNCTION_DELEGATE(strncat, strncat_openbsd)
|
||||
FUNCTION_DELEGATE(strlcat, strlcat_openbsd)
|
||||
|
|
49
libc/arch-x86_64/dynamic_function_dispatch.cpp
Normal file
49
libc/arch-x86_64/dynamic_function_dispatch.cpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <private/bionic_ifuncs.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
typedef int memset_func(void* __dst, int __ch, size_t __n);
|
||||
DEFINE_IFUNC_FOR(memset) {
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx2")) RETURN_FUNC(memset_func, memset_avx2);
|
||||
RETURN_FUNC(memset_func, memset_generic);
|
||||
}
|
||||
|
||||
typedef void* __memset_chk_func(void* s, int c, size_t n, size_t n2);
|
||||
DEFINE_IFUNC_FOR(__memset_chk) {
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx2")) RETURN_FUNC(__memset_chk_func, __memset_chk_avx2);
|
||||
RETURN_FUNC(__memset_chk_func, __memset_chk_generic);
|
||||
}
|
||||
|
||||
} // extern "C"
|
37
libc/arch-x86_64/static_function_dispatch.S
Normal file
37
libc/arch-x86_64/static_function_dispatch.S
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (C) 2022 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#define FUNCTION_DELEGATE(name, impl) \
|
||||
ENTRY(name); \
|
||||
jmp impl; \
|
||||
END(name)
|
||||
|
||||
FUNCTION_DELEGATE(memset, memset_generic)
|
||||
FUNCTION_DELEGATE(__memset_chk, __memset_chk_generic)
|
160
libc/arch-x86_64/string/avx2-memset-kbl.S
Normal file
160
libc/arch-x86_64/string/avx2-memset-kbl.S
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
Copyright (c) 2014, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
|
||||
.section .text.avx2,"ax",@progbits
|
||||
|
||||
ENTRY(__memset_chk_avx2)
|
||||
# %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
|
||||
cmp %rcx, %rdx
|
||||
ja __memset_chk_fail
|
||||
// Fall through to memset...
|
||||
END(__memset_chk_avx2)
|
||||
|
||||
ENTRY(memset_avx2)
|
||||
movq %rdi, %rax
|
||||
and $0xff, %rsi
|
||||
mov $0x0101010101010101, %rcx
|
||||
imul %rsi, %rcx
|
||||
cmpq $16, %rdx
|
||||
jae L(16bytesormore)
|
||||
testb $8, %dl
|
||||
jnz L(8_15bytes)
|
||||
testb $4, %dl
|
||||
jnz L(4_7bytes)
|
||||
testb $2, %dl
|
||||
jnz L(2_3bytes)
|
||||
testb $1, %dl
|
||||
jz L(return)
|
||||
movb %cl, (%rdi)
|
||||
L(return):
|
||||
ret
|
||||
|
||||
L(8_15bytes):
|
||||
movq %rcx, (%rdi)
|
||||
movq %rcx, -8(%rdi, %rdx)
|
||||
ret
|
||||
|
||||
L(4_7bytes):
|
||||
movl %ecx, (%rdi)
|
||||
movl %ecx, -4(%rdi, %rdx)
|
||||
ret
|
||||
|
||||
L(2_3bytes):
|
||||
movw %cx, (%rdi)
|
||||
movw %cx, -2(%rdi, %rdx)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(16bytesormore):
|
||||
movd %rcx, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
movdqu %xmm0, (%rdi)
|
||||
movdqu %xmm0, -16(%rdi, %rdx)
|
||||
cmpq $32, %rdx
|
||||
jbe L(32bytesless)
|
||||
movdqu %xmm0, 16(%rdi)
|
||||
movdqu %xmm0, -32(%rdi, %rdx)
|
||||
cmpq $64, %rdx
|
||||
jbe L(64bytesless)
|
||||
movdqu %xmm0, 32(%rdi)
|
||||
movdqu %xmm0, 48(%rdi)
|
||||
movdqu %xmm0, -64(%rdi, %rdx)
|
||||
movdqu %xmm0, -48(%rdi, %rdx)
|
||||
cmpq $128, %rdx
|
||||
jbe L(128bytesless)
|
||||
vpbroadcastb %xmm0, %ymm0
|
||||
vmovdqu %ymm0, 64(%rdi)
|
||||
vmovdqu %ymm0, 96(%rdi)
|
||||
vmovdqu %ymm0, -128(%rdi, %rdx)
|
||||
vmovdqu %ymm0, -96(%rdi, %rdx)
|
||||
cmpq $256, %rdx
|
||||
ja L(256bytesmore)
|
||||
L(32bytesless):
|
||||
L(64bytesless):
|
||||
L(128bytesless):
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(256bytesmore):
|
||||
leaq 128(%rdi), %rcx
|
||||
andq $-128, %rcx
|
||||
movq %rdx, %r8
|
||||
addq %rdi, %rdx
|
||||
andq $-128, %rdx
|
||||
cmpq %rcx, %rdx
|
||||
je L(return)
|
||||
|
||||
#ifdef SHARED_CACHE_SIZE
|
||||
cmp $SHARED_CACHE_SIZE, %r8
|
||||
#else
|
||||
cmp __x86_64_shared_cache_size(%rip), %r8
|
||||
#endif
|
||||
ja L(256bytesmore_nt)
|
||||
|
||||
ALIGN (4)
|
||||
L(256bytesmore_normal):
|
||||
vmovdqa %ymm0, (%rcx)
|
||||
vmovdqa %ymm0, 32(%rcx)
|
||||
vmovdqa %ymm0, 64(%rcx)
|
||||
vmovdqa %ymm0, 96(%rcx)
|
||||
addq $128, %rcx
|
||||
cmpq %rcx, %rdx
|
||||
jne L(256bytesmore_normal)
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(256bytesmore_nt):
|
||||
movntdq %xmm0, (%rcx)
|
||||
movntdq %xmm0, 16(%rcx)
|
||||
movntdq %xmm0, 32(%rcx)
|
||||
movntdq %xmm0, 48(%rcx)
|
||||
movntdq %xmm0, 64(%rcx)
|
||||
movntdq %xmm0, 80(%rcx)
|
||||
movntdq %xmm0, 96(%rcx)
|
||||
movntdq %xmm0, 112(%rcx)
|
||||
leaq 128(%rcx), %rcx
|
||||
cmpq %rcx, %rdx
|
||||
jne L(256bytesmore_nt)
|
||||
sfence
|
||||
ret
|
||||
|
||||
END(memset_avx2)
|
|
@ -1,140 +0,0 @@
|
|||
/*
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#ifndef WMEMSET
|
||||
#define WMEMSET wmemset_avx2
|
||||
#endif
|
||||
|
||||
.section .text.avx2,"ax",@progbits
|
||||
|
||||
ENTRY (WMEMSET)
|
||||
# BB#0:
|
||||
testq %rdx, %rdx
|
||||
je .LBB0_14
|
||||
# BB#1:
|
||||
cmpq $32, %rdx
|
||||
jae .LBB0_3
|
||||
# BB#2:
|
||||
xorl %r8d, %r8d
|
||||
movq %rdi, %rax
|
||||
jmp .LBB0_12
|
||||
.LBB0_3:
|
||||
movq %rdx, %r8
|
||||
andq $-32, %r8
|
||||
vmovd %esi, %xmm0
|
||||
vpbroadcastd %xmm0, %ymm0
|
||||
leaq -32(%r8), %rcx
|
||||
movq %rcx, %rax
|
||||
shrq $5, %rax
|
||||
leal 1(%rax), %r9d
|
||||
andl $7, %r9d
|
||||
cmpq $224, %rcx
|
||||
jae .LBB0_5
|
||||
# BB#4:
|
||||
xorl %eax, %eax
|
||||
testq %r9, %r9
|
||||
jne .LBB0_8
|
||||
jmp .LBB0_10
|
||||
.LBB0_5:
|
||||
leaq 992(%rdi), %rcx
|
||||
leaq -1(%r9), %r10
|
||||
subq %rax, %r10
|
||||
xorl %eax, %eax
|
||||
.p2align 4, 0x90
|
||||
.LBB0_6: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -992(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -960(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -928(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -896(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -864(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -832(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -800(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -768(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -736(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -704(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -672(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -640(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -608(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -576(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -544(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -512(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -480(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -448(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -416(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -384(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -352(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -320(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -288(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -256(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -224(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -192(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -160(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -128(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -96(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -64(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -32(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, (%rcx,%rax,4)
|
||||
addq $256, %rax # imm = 0x100
|
||||
addq $8, %r10
|
||||
jne .LBB0_6
|
||||
# BB#7:
|
||||
testq %r9, %r9
|
||||
je .LBB0_10
|
||||
.LBB0_8:
|
||||
leaq (%rdi,%rax,4), %rax
|
||||
addq $96, %rax
|
||||
negq %r9
|
||||
.p2align 4, 0x90
|
||||
.LBB0_9: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -96(%rax)
|
||||
vmovdqu %ymm0, -64(%rax)
|
||||
vmovdqu %ymm0, -32(%rax)
|
||||
vmovdqu %ymm0, (%rax)
|
||||
subq $-128, %rax
|
||||
addq $1, %r9
|
||||
jne .LBB0_9
|
||||
.LBB0_10:
|
||||
cmpq %rdx, %r8
|
||||
je .LBB0_14
|
||||
# BB#11:
|
||||
leaq (%rdi,%r8,4), %rax
|
||||
.LBB0_12:
|
||||
subq %r8, %rdx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_13: # =>This Inner Loop Header: Depth=1
|
||||
movl %esi, (%rax)
|
||||
addq $4, %rax
|
||||
addq $-1, %rdx
|
||||
jne .LBB0_13
|
||||
.LBB0_14:
|
||||
movq %rdi, %rax
|
||||
vzeroupper
|
||||
retq
|
||||
END(WMEMSET)
|
|
@ -41,16 +41,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
|
||||
ENTRY(__memset_chk)
|
||||
ENTRY(__memset_chk_generic)
|
||||
# %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
|
||||
cmp %rcx, %rdx
|
||||
ja __memset_chk_fail
|
||||
// Fall through to memset...
|
||||
END(__memset_chk)
|
||||
END(__memset_chk_generic)
|
||||
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ENTRY(memset)
|
||||
ENTRY(memset_generic)
|
||||
movq %rdi, %rax
|
||||
and $0xff, %rsi
|
||||
mov $0x0101010101010101, %rcx
|
||||
|
@ -146,4 +146,4 @@ L(128bytesmore_nt):
|
|||
sfence
|
||||
ret
|
||||
|
||||
END(memset)
|
||||
END(memset_generic)
|
||||
|
|
Loading…
Reference in a new issue