Add avx2 version of wmemset in binoic
Test: ./tests/run-on-host.sh 64 Change-Id: Id2f696cc60a10c01846ca3fe0d3a5d513020afe3 Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
This commit is contained in:
parent
cfede4fdd1
commit
4ed2f475d8
6 changed files with 319 additions and 0 deletions
|
@ -271,6 +271,7 @@ cc_library_static {
|
|||
"upstream-freebsd/lib/libc/string/wcscat.c",
|
||||
"upstream-freebsd/lib/libc/string/wcscpy.c",
|
||||
"upstream-freebsd/lib/libc/string/wmemcmp.c",
|
||||
"upstream-freebsd/lib/libc/string/wmemset.c",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -899,6 +900,7 @@ cc_library_static {
|
|||
"arch-x86/generic/string/wcscat.c",
|
||||
"arch-x86/generic/string/wcscpy.c",
|
||||
"arch-x86/generic/string/wmemcmp.c",
|
||||
"arch-x86/generic/string/wmemset.c",
|
||||
|
||||
"arch-x86/atom/string/sse2-memchr-atom.S",
|
||||
"arch-x86/atom/string/sse2-memrchr-atom.S",
|
||||
|
@ -947,6 +949,9 @@ cc_library_static {
|
|||
"arch-x86/atom/string/ssse3-strcpy-atom.S",
|
||||
"arch-x86/atom/string/ssse3-strncpy-atom.S",
|
||||
"arch-x86/atom/string/ssse3-wmemcmp-atom.S",
|
||||
|
||||
// avx2 functions
|
||||
"arch-x86/kabylake/string/avx2-wmemset-kbl.S",
|
||||
],
|
||||
|
||||
exclude_srcs: [
|
||||
|
@ -969,6 +974,7 @@ cc_library_static {
|
|||
"arch-x86_64/string/sse4-memcmp-slm.S",
|
||||
"arch-x86_64/string/ssse3-strcmp-slm.S",
|
||||
"arch-x86_64/string/ssse3-strncmp-slm.S",
|
||||
"arch-x86_64/string/avx2-wmemset-kbl.S",
|
||||
|
||||
"arch-x86_64/bionic/__bionic_clone.S",
|
||||
"arch-x86_64/bionic/_exit_with_stack_teardown.S",
|
||||
|
|
|
@ -107,6 +107,13 @@ DEFINE_IFUNC_FOR(wmemcmp) {
|
|||
RETURN_FUNC(wmemcmp_func, wmemcmp_freebsd);
|
||||
}
|
||||
|
||||
typedef int wmemset_func(const wchar_t* __lhs, const wchar_t* __rhs, size_t __n);
|
||||
DEFINE_IFUNC_FOR(wmemset) {
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx2")) RETURN_FUNC(wmemset_func, wmemset_avx2);
|
||||
RETURN_FUNC(wmemset_func, wmemset_freebsd);
|
||||
}
|
||||
|
||||
typedef int strcmp_func(const char* __lhs, const char* __rhs);
|
||||
DEFINE_IFUNC_FOR(strcmp) {
|
||||
__builtin_cpu_init();
|
||||
|
|
17
libc/arch-x86/generic/string/wmemset.c
Normal file
17
libc/arch-x86/generic/string/wmemset.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
// Copyright (C) 2019 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#define wmemset wmemset_freebsd
|
||||
|
||||
#include <upstream-freebsd/lib/libc/string/wmemset.c>
|
148
libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
Normal file
148
libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
Normal file
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#ifndef WMEMSET
|
||||
#define WMEMSET wmemset_avx2
|
||||
#endif
|
||||
|
||||
ENTRY(WMEMSET)
|
||||
# BB#0:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %eax
|
||||
movl 32(%esp), %ecx
|
||||
movl 24(%esp), %eax
|
||||
testl %ecx, %ecx
|
||||
je .LBB0_12
|
||||
# BB#1:
|
||||
movl 28(%esp), %edx
|
||||
xorl %edi, %edi
|
||||
movl %eax, %esi
|
||||
cmpl $32, %ecx
|
||||
jb .LBB0_10
|
||||
# BB#2:
|
||||
movl %ecx, %eax
|
||||
andl $-32, %eax
|
||||
vmovd %edx, %xmm0
|
||||
vpbroadcastd %xmm0, %ymm0
|
||||
movl %eax, (%esp) # 4-byte Spill
|
||||
leal -32(%eax), %esi
|
||||
movl %esi, %eax
|
||||
shrl $5, %eax
|
||||
leal 1(%eax), %edi
|
||||
andl $7, %edi
|
||||
xorl %ebx, %ebx
|
||||
cmpl $224, %esi
|
||||
jb .LBB0_5
|
||||
# BB#3:
|
||||
movl 24(%esp), %esi
|
||||
leal 992(%esi), %ebp
|
||||
leal -1(%edi), %esi
|
||||
subl %eax, %esi
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_4: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -992(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -960(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -928(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -896(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -864(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -832(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -800(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -768(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -736(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -704(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -672(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -640(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -608(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -576(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -544(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -512(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -480(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -448(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -416(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -384(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -352(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -320(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -288(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -256(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -224(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -192(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -160(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -128(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -96(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -64(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, -32(%ebp,%ebx,4)
|
||||
vmovdqu %ymm0, (%ebp,%ebx,4)
|
||||
addl $256, %ebx # imm = 0x100
|
||||
addl $8, %esi
|
||||
jne .LBB0_4
|
||||
.LBB0_5:
|
||||
testl %edi, %edi
|
||||
movl 24(%esp), %eax
|
||||
je .LBB0_8
|
||||
# BB#6:
|
||||
leal (%eax,%ebx,4), %esi
|
||||
addl $96, %esi
|
||||
negl %edi
|
||||
.p2align 4, 0x90
|
||||
.LBB0_7: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -96(%esi)
|
||||
vmovdqu %ymm0, -64(%esi)
|
||||
vmovdqu %ymm0, -32(%esi)
|
||||
vmovdqu %ymm0, (%esi)
|
||||
subl $-128, %esi
|
||||
addl $1, %edi
|
||||
jne .LBB0_7
|
||||
.LBB0_8:
|
||||
movl (%esp), %edi # 4-byte Reload
|
||||
cmpl %ecx, %edi
|
||||
je .LBB0_12
|
||||
# BB#9:
|
||||
leal (%eax,%edi,4), %esi
|
||||
.LBB0_10:
|
||||
subl %edi, %ecx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_11: # =>This Inner Loop Header: Depth=1
|
||||
movl %edx, (%esi)
|
||||
addl $4, %esi
|
||||
addl $-1, %ecx
|
||||
jne .LBB0_11
|
||||
.LBB0_12:
|
||||
addl $4, %esp
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
vzeroupper
|
||||
retl
|
||||
END(WMEMSET)
|
|
@ -45,6 +45,7 @@ FUNCTION_DELEGATE(strcmp, strcmp_generic)
|
|||
FUNCTION_DELEGATE(strncmp, strncmp_generic)
|
||||
FUNCTION_DELEGATE(strcat, strcat_generic)
|
||||
FUNCTION_DELEGATE(wmemcmp, wmemcmp_freebsd)
|
||||
FUNCTION_DELEGATE(wmemset, wmemset_freebsd)
|
||||
FUNCTION_DELEGATE(wcscat, wcscat_freebsd)
|
||||
FUNCTION_DELEGATE(strncat, strncat_openbsd)
|
||||
FUNCTION_DELEGATE(strlcat, strlcat_openbsd)
|
||||
|
|
140
libc/arch-x86_64/string/avx2-wmemset-kbl.S
Normal file
140
libc/arch-x86_64/string/avx2-wmemset-kbl.S
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
Copyright (C) 2019 The Android Open Source Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#ifndef WMEMSET
|
||||
#define WMEMSET wmemset_avx2
|
||||
#endif
|
||||
|
||||
.section .text.avx2,"ax",@progbits
|
||||
|
||||
ENTRY (WMEMSET)
|
||||
# BB#0:
|
||||
testq %rdx, %rdx
|
||||
je .LBB0_14
|
||||
# BB#1:
|
||||
cmpq $32, %rdx
|
||||
jae .LBB0_3
|
||||
# BB#2:
|
||||
xorl %r8d, %r8d
|
||||
movq %rdi, %rax
|
||||
jmp .LBB0_12
|
||||
.LBB0_3:
|
||||
movq %rdx, %r8
|
||||
andq $-32, %r8
|
||||
vmovd %esi, %xmm0
|
||||
vpbroadcastd %xmm0, %ymm0
|
||||
leaq -32(%r8), %rcx
|
||||
movq %rcx, %rax
|
||||
shrq $5, %rax
|
||||
leal 1(%rax), %r9d
|
||||
andl $7, %r9d
|
||||
cmpq $224, %rcx
|
||||
jae .LBB0_5
|
||||
# BB#4:
|
||||
xorl %eax, %eax
|
||||
testq %r9, %r9
|
||||
jne .LBB0_8
|
||||
jmp .LBB0_10
|
||||
.LBB0_5:
|
||||
leaq 992(%rdi), %rcx
|
||||
leaq -1(%r9), %r10
|
||||
subq %rax, %r10
|
||||
xorl %eax, %eax
|
||||
.p2align 4, 0x90
|
||||
.LBB0_6: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -992(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -960(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -928(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -896(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -864(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -832(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -800(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -768(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -736(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -704(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -672(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -640(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -608(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -576(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -544(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -512(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -480(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -448(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -416(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -384(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -352(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -320(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -288(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -256(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -224(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -192(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -160(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -128(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -96(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -64(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, -32(%rcx,%rax,4)
|
||||
vmovdqu %ymm0, (%rcx,%rax,4)
|
||||
addq $256, %rax # imm = 0x100
|
||||
addq $8, %r10
|
||||
jne .LBB0_6
|
||||
# BB#7:
|
||||
testq %r9, %r9
|
||||
je .LBB0_10
|
||||
.LBB0_8:
|
||||
leaq (%rdi,%rax,4), %rax
|
||||
addq $96, %rax
|
||||
negq %r9
|
||||
.p2align 4, 0x90
|
||||
.LBB0_9: # =>This Inner Loop Header: Depth=1
|
||||
vmovdqu %ymm0, -96(%rax)
|
||||
vmovdqu %ymm0, -64(%rax)
|
||||
vmovdqu %ymm0, -32(%rax)
|
||||
vmovdqu %ymm0, (%rax)
|
||||
subq $-128, %rax
|
||||
addq $1, %r9
|
||||
jne .LBB0_9
|
||||
.LBB0_10:
|
||||
cmpq %rdx, %r8
|
||||
je .LBB0_14
|
||||
# BB#11:
|
||||
leaq (%rdi,%r8,4), %rax
|
||||
.LBB0_12:
|
||||
subq %r8, %rdx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_13: # =>This Inner Loop Header: Depth=1
|
||||
movl %esi, (%rax)
|
||||
addq $4, %rax
|
||||
addq $-1, %rdx
|
||||
jne .LBB0_13
|
||||
.LBB0_14:
|
||||
movq %rdi, %rax
|
||||
vzeroupper
|
||||
retq
|
||||
END(WMEMSET)
|
Loading…
Reference in a new issue