ea9957a72a
Previous change was reverted in 9690b121e3
.
This change added .arch directive to kryo/ to avoid invalid instruction error.
Test: Run bionic unit test.
Test: Use gdb to make sure the right function is selected.
Test: Build previously failed target: make PRODUCT-sdk_phone_arm64-sdk
Change-Id: I14de41851121fc1a0b38c98fda5eb844b6a9695c
193 lines
4.8 KiB
ArmAsm
193 lines
4.8 KiB
ArmAsm
/***************************************************************************
|
|
Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of The Linux Foundation nor the names of its contributors may
|
|
be used to endorse or promote products derived from this software
|
|
without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
***************************************************************************/
|
|
|
|
/* Assumes neon instructions and a cache line size of 64 bytes. */
|
|
|
|
#define PLDOFFS (10)
|
|
#define PLDTHRESH (PLDOFFS)
|
|
#define BBTHRESH (4096/64)
|
|
#define PLDSIZE (64)
|
|
|
|
#if (PLDOFFS < 1)
|
|
#error Routine does not support offsets less than 1
|
|
#endif
|
|
|
|
#if (PLDTHRESH < PLDOFFS)
|
|
#error PLD threshold must be greater than or equal to the PLD offset
|
|
#endif
|
|
|
|
.text
|
|
.syntax unified
|
|
.fpu neon
|
|
|
|
// To avoid warning about deprecated instructions, add an explicit
|
|
// arch. The code generated is exactly the same.
|
|
.arch armv7-a
|
|
|
|
.L_memcpy_base:
|
|
cmp r2, #4
|
|
blt .L_neon_lt4
|
|
cmp r2, #16
|
|
blt .L_neon_lt16
|
|
cmp r2, #32
|
|
blt .L_neon_16
|
|
cmp r2, #64
|
|
blt .L_neon_copy_32_a
|
|
|
|
mov r12, r2, lsr #6
|
|
cmp r12, #PLDTHRESH
|
|
ble .L_neon_copy_64_loop_nopld
|
|
|
|
push {r9, r10}
|
|
.cfi_adjust_cfa_offset 8
|
|
.cfi_rel_offset r9, 0
|
|
.cfi_rel_offset r10, 4
|
|
|
|
cmp r12, #BBTHRESH
|
|
ble .L_neon_prime_pump
|
|
|
|
add lr, r0, #0x400
|
|
add r9, r1, #(PLDOFFS*PLDSIZE)
|
|
sub lr, lr, r9
|
|
lsl lr, lr, #21
|
|
lsr lr, lr, #21
|
|
add lr, lr, #(PLDOFFS*PLDSIZE)
|
|
cmp r12, lr, lsr #6
|
|
ble .L_neon_prime_pump
|
|
|
|
itt gt
|
|
movgt r9, #(PLDOFFS)
|
|
rsbsgt r9, r9, lr, lsr #6
|
|
ble .L_neon_prime_pump
|
|
|
|
add r10, r1, lr
|
|
bic r10, #0x3F
|
|
|
|
sub r12, r12, lr, lsr #6
|
|
|
|
cmp r9, r12
|
|
itee le
|
|
suble r12, r12, r9
|
|
movgt r9, r12
|
|
movgt r12, #0
|
|
|
|
pld [r1, #((PLDOFFS-1)*PLDSIZE)]
|
|
.L_neon_copy_64_loop_outer_doublepld:
|
|
pld [r1, #((PLDOFFS)*PLDSIZE)]
|
|
vld1.32 {q0, q1}, [r1]!
|
|
vld1.32 {q2, q3}, [r1]!
|
|
ldr r3, [r10]
|
|
subs r9, r9, #1
|
|
vst1.32 {q0, q1}, [r0]!
|
|
vst1.32 {q2, q3}, [r0]!
|
|
add r10, #64
|
|
bne .L_neon_copy_64_loop_outer_doublepld
|
|
cmp r12, #0
|
|
beq .L_neon_pop_before_nopld
|
|
|
|
cmp r12, #(512*1024/64)
|
|
blt .L_neon_copy_64_loop_outer
|
|
|
|
.L_neon_copy_64_loop_ddr:
|
|
vld1.32 {q0, q1}, [r1]!
|
|
vld1.32 {q2, q3}, [r1]!
|
|
pld [r10]
|
|
subs r12, r12, #1
|
|
vst1.32 {q0, q1}, [r0]!
|
|
vst1.32 {q2, q3}, [r0]!
|
|
add r10, #64
|
|
bne .L_neon_copy_64_loop_ddr
|
|
b .L_neon_pop_before_nopld
|
|
|
|
.L_neon_prime_pump:
|
|
mov lr, #(PLDOFFS*PLDSIZE)
|
|
add r10, r1, #(PLDOFFS*PLDSIZE)
|
|
bic r10, #0x3F
|
|
sub r12, r12, #PLDOFFS
|
|
ldr r3, [r10, #(-1*PLDSIZE)]
|
|
|
|
.L_neon_copy_64_loop_outer:
|
|
vld1.32 {q0, q1}, [r1]!
|
|
vld1.32 {q2, q3}, [r1]!
|
|
ldr r3, [r10]
|
|
subs r12, r12, #1
|
|
vst1.32 {q0, q1}, [r0]!
|
|
vst1.32 {q2, q3}, [r0]!
|
|
add r10, #64
|
|
bne .L_neon_copy_64_loop_outer
|
|
|
|
.L_neon_pop_before_nopld:
|
|
mov r12, lr, lsr #6
|
|
pop {r9, r10}
|
|
.cfi_adjust_cfa_offset -8
|
|
.cfi_restore r9
|
|
.cfi_restore r10
|
|
|
|
.L_neon_copy_64_loop_nopld:
|
|
vld1.32 {q8, q9}, [r1]!
|
|
vld1.32 {q10, q11}, [r1]!
|
|
subs r12, r12, #1
|
|
vst1.32 {q8, q9}, [r0]!
|
|
vst1.32 {q10, q11}, [r0]!
|
|
bne .L_neon_copy_64_loop_nopld
|
|
ands r2, r2, #0x3f
|
|
beq .L_neon_exit
|
|
|
|
.L_neon_copy_32_a:
|
|
movs r3, r2, lsl #27
|
|
bcc .L_neon_16
|
|
vld1.32 {q0,q1}, [r1]!
|
|
vst1.32 {q0,q1}, [r0]!
|
|
|
|
.L_neon_16:
|
|
bpl .L_neon_lt16
|
|
vld1.32 {q8}, [r1]!
|
|
vst1.32 {q8}, [r0]!
|
|
ands r2, r2, #0x0f
|
|
beq .L_neon_exit
|
|
|
|
.L_neon_lt16:
|
|
movs r3, r2, lsl #29
|
|
bcc 1f
|
|
vld1.8 {d0}, [r1]!
|
|
vst1.8 {d0}, [r0]!
|
|
1:
|
|
bge .L_neon_lt4
|
|
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
|
|
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
|
|
|
|
.L_neon_lt4:
|
|
movs r2, r2, lsl #31
|
|
itt cs
|
|
ldrhcs r3, [r1], #2
|
|
strhcs r3, [r0], #2
|
|
itt mi
|
|
ldrbmi r3, [r1]
|
|
strbmi r3, [r0]
|
|
|
|
.L_neon_exit:
|
|
pop {r0, pc}
|