Remove pushes from memsets (krait/cortex-a9).
On the path that only uses r0 in both the krait and cortex-a9 memset, remove the push and use r3 instead. In addition, for cortex-a9, remove the artificial function since it's not needed since dwarf unwinding is now supported on arm. Change-Id: Ia4ed1cc435b03627a7193215e76c8ea3335f949a
This commit is contained in:
parent
adc5795fb7
commit
8264cbba7e
2 changed files with 19 additions and 28 deletions
|
@ -69,12 +69,9 @@ END(bzero)
|
||||||
ENTRY(memset)
|
ENTRY(memset)
|
||||||
// The neon memset only wins for less than 132.
|
// The neon memset only wins for less than 132.
|
||||||
cmp r2, #132
|
cmp r2, #132
|
||||||
bhi __memset_large_copy
|
bhi .L_memset_large_copy
|
||||||
|
|
||||||
stmfd sp!, {r0}
|
|
||||||
.cfi_def_cfa_offset 4
|
|
||||||
.cfi_rel_offset r0, 0
|
|
||||||
|
|
||||||
|
mov r3, r0
|
||||||
vdup.8 q0, r1
|
vdup.8 q0, r1
|
||||||
|
|
||||||
/* make sure we have at least 32 bytes to write */
|
/* make sure we have at least 32 bytes to write */
|
||||||
|
@ -84,7 +81,7 @@ ENTRY(memset)
|
||||||
|
|
||||||
1: /* The main loop writes 32 bytes at a time */
|
1: /* The main loop writes 32 bytes at a time */
|
||||||
subs r2, r2, #32
|
subs r2, r2, #32
|
||||||
vst1.8 {d0 - d3}, [r0]!
|
vst1.8 {d0 - d3}, [r3]!
|
||||||
bhs 1b
|
bhs 1b
|
||||||
|
|
||||||
2: /* less than 32 left */
|
2: /* less than 32 left */
|
||||||
|
@ -93,22 +90,20 @@ ENTRY(memset)
|
||||||
beq 3f
|
beq 3f
|
||||||
|
|
||||||
// writes 16 bytes, 128-bits aligned
|
// writes 16 bytes, 128-bits aligned
|
||||||
vst1.8 {d0, d1}, [r0]!
|
vst1.8 {d0, d1}, [r3]!
|
||||||
3: /* write up to 15-bytes (count in r2) */
|
3: /* write up to 15-bytes (count in r2) */
|
||||||
movs ip, r2, lsl #29
|
movs ip, r2, lsl #29
|
||||||
bcc 1f
|
bcc 1f
|
||||||
vst1.8 {d0}, [r0]!
|
vst1.8 {d0}, [r3]!
|
||||||
1: bge 2f
|
1: bge 2f
|
||||||
vst1.32 {d0[0]}, [r0]!
|
vst1.32 {d0[0]}, [r3]!
|
||||||
2: movs ip, r2, lsl #31
|
2: movs ip, r2, lsl #31
|
||||||
strbmi r1, [r0], #1
|
strbmi r1, [r3], #1
|
||||||
strbcs r1, [r0], #1
|
strbcs r1, [r3], #1
|
||||||
strbcs r1, [r0], #1
|
strbcs r1, [r3], #1
|
||||||
ldmfd sp!, {r0}
|
|
||||||
bx lr
|
bx lr
|
||||||
END(memset)
|
|
||||||
|
|
||||||
ENTRY_PRIVATE(__memset_large_copy)
|
.L_memset_large_copy:
|
||||||
/* compute the offset to align the destination
|
/* compute the offset to align the destination
|
||||||
* offset = (4-(src&3))&3 = -src & 3
|
* offset = (4-(src&3))&3 = -src & 3
|
||||||
*/
|
*/
|
||||||
|
@ -180,7 +175,7 @@ ENTRY_PRIVATE(__memset_large_copy)
|
||||||
movs r2, r2, lsl #2
|
movs r2, r2, lsl #2
|
||||||
strbcs r1, [r0]
|
strbcs r1, [r0]
|
||||||
ldmfd sp!, {r0, r4-r7, pc}
|
ldmfd sp!, {r0, r4-r7, pc}
|
||||||
END(__memset_large_copy)
|
END(memset)
|
||||||
|
|
||||||
.data
|
.data
|
||||||
error_string:
|
error_string:
|
||||||
|
|
|
@ -69,10 +69,7 @@ END(bzero)
|
||||||
|
|
||||||
/* memset() returns its first argument. */
|
/* memset() returns its first argument. */
|
||||||
ENTRY(memset)
|
ENTRY(memset)
|
||||||
stmfd sp!, {r0}
|
mov r3, r0
|
||||||
.cfi_def_cfa_offset 4
|
|
||||||
.cfi_rel_offset r0, 0
|
|
||||||
|
|
||||||
vdup.8 q0, r1
|
vdup.8 q0, r1
|
||||||
|
|
||||||
/* make sure we have at least 32 bytes to write */
|
/* make sure we have at least 32 bytes to write */
|
||||||
|
@ -82,7 +79,7 @@ ENTRY(memset)
|
||||||
|
|
||||||
1: /* The main loop writes 32 bytes at a time */
|
1: /* The main loop writes 32 bytes at a time */
|
||||||
subs r2, r2, #32
|
subs r2, r2, #32
|
||||||
vst1.8 {d0 - d3}, [r0]!
|
vst1.8 {d0 - d3}, [r3]!
|
||||||
bhs 1b
|
bhs 1b
|
||||||
|
|
||||||
2: /* less than 32 left */
|
2: /* less than 32 left */
|
||||||
|
@ -91,18 +88,17 @@ ENTRY(memset)
|
||||||
beq 3f
|
beq 3f
|
||||||
|
|
||||||
// writes 16 bytes, 128-bits aligned
|
// writes 16 bytes, 128-bits aligned
|
||||||
vst1.8 {d0, d1}, [r0]!
|
vst1.8 {d0, d1}, [r3]!
|
||||||
3: /* write up to 15-bytes (count in r2) */
|
3: /* write up to 15-bytes (count in r2) */
|
||||||
movs ip, r2, lsl #29
|
movs ip, r2, lsl #29
|
||||||
bcc 1f
|
bcc 1f
|
||||||
vst1.8 {d0}, [r0]!
|
vst1.8 {d0}, [r3]!
|
||||||
1: bge 2f
|
1: bge 2f
|
||||||
vst1.32 {d0[0]}, [r0]!
|
vst1.32 {d0[0]}, [r3]!
|
||||||
2: movs ip, r2, lsl #31
|
2: movs ip, r2, lsl #31
|
||||||
strbmi r1, [r0], #1
|
strbmi r1, [r3], #1
|
||||||
strbcs r1, [r0], #1
|
strbcs r1, [r3], #1
|
||||||
strbcs r1, [r0], #1
|
strbcs r1, [r3], #1
|
||||||
ldmfd sp!, {r0}
|
|
||||||
bx lr
|
bx lr
|
||||||
END(memset)
|
END(memset)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue