Remove pushes from memsets (krait/cortex-a9).

On the path that only uses r0 in both the krait and cortex-a9
memset, remove the push and use r3 instead.

In addition, for cortex-a9, remove the artificial function since
it's not needed since dwarf unwinding is now supported on arm.

Change-Id: Ia4ed1cc435b03627a7193215e76c8ea3335f949a
This commit is contained in:
Christopher Ferris 2015-07-27 13:51:31 -07:00
parent adc5795fb7
commit 8264cbba7e
2 changed files with 19 additions and 28 deletions

View file

@ -69,12 +69,9 @@ END(bzero)
ENTRY(memset) ENTRY(memset)
// The neon memset only wins for less than 132. // The neon memset only wins for less than 132.
cmp r2, #132 cmp r2, #132
bhi __memset_large_copy bhi .L_memset_large_copy
stmfd sp!, {r0}
.cfi_def_cfa_offset 4
.cfi_rel_offset r0, 0
mov r3, r0
vdup.8 q0, r1 vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */ /* make sure we have at least 32 bytes to write */
@ -84,7 +81,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */ 1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32 subs r2, r2, #32
vst1.8 {d0 - d3}, [r0]! vst1.8 {d0 - d3}, [r3]!
bhs 1b bhs 1b
2: /* less than 32 left */ 2: /* less than 32 left */
@ -93,22 +90,20 @@ ENTRY(memset)
beq 3f beq 3f
// writes 16 bytes, 128-bits aligned // writes 16 bytes, 128-bits aligned
vst1.8 {d0, d1}, [r0]! vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */ 3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29 movs ip, r2, lsl #29
bcc 1f bcc 1f
vst1.8 {d0}, [r0]! vst1.8 {d0}, [r3]!
1: bge 2f 1: bge 2f
vst1.32 {d0[0]}, [r0]! vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31 2: movs ip, r2, lsl #31
strbmi r1, [r0], #1 strbmi r1, [r3], #1
strbcs r1, [r0], #1 strbcs r1, [r3], #1
strbcs r1, [r0], #1 strbcs r1, [r3], #1
ldmfd sp!, {r0}
bx lr bx lr
END(memset)
ENTRY_PRIVATE(__memset_large_copy) .L_memset_large_copy:
/* compute the offset to align the destination /* compute the offset to align the destination
* offset = (4-(src&3))&3 = -src & 3 * offset = (4-(src&3))&3 = -src & 3
*/ */
@ -180,7 +175,7 @@ ENTRY_PRIVATE(__memset_large_copy)
movs r2, r2, lsl #2 movs r2, r2, lsl #2
strbcs r1, [r0] strbcs r1, [r0]
ldmfd sp!, {r0, r4-r7, pc} ldmfd sp!, {r0, r4-r7, pc}
END(__memset_large_copy) END(memset)
.data .data
error_string: error_string:

View file

@ -69,10 +69,7 @@ END(bzero)
/* memset() returns its first argument. */ /* memset() returns its first argument. */
ENTRY(memset) ENTRY(memset)
stmfd sp!, {r0} mov r3, r0
.cfi_def_cfa_offset 4
.cfi_rel_offset r0, 0
vdup.8 q0, r1 vdup.8 q0, r1
/* make sure we have at least 32 bytes to write */ /* make sure we have at least 32 bytes to write */
@ -82,7 +79,7 @@ ENTRY(memset)
1: /* The main loop writes 32 bytes at a time */ 1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32 subs r2, r2, #32
vst1.8 {d0 - d3}, [r0]! vst1.8 {d0 - d3}, [r3]!
bhs 1b bhs 1b
2: /* less than 32 left */ 2: /* less than 32 left */
@ -91,18 +88,17 @@ ENTRY(memset)
beq 3f beq 3f
// writes 16 bytes, 128-bits aligned // writes 16 bytes, 128-bits aligned
vst1.8 {d0, d1}, [r0]! vst1.8 {d0, d1}, [r3]!
3: /* write up to 15-bytes (count in r2) */ 3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29 movs ip, r2, lsl #29
bcc 1f bcc 1f
vst1.8 {d0}, [r0]! vst1.8 {d0}, [r3]!
1: bge 2f 1: bge 2f
vst1.32 {d0[0]}, [r0]! vst1.32 {d0[0]}, [r3]!
2: movs ip, r2, lsl #31 2: movs ip, r2, lsl #31
strbmi r1, [r0], #1 strbmi r1, [r3], #1
strbcs r1, [r0], #1 strbcs r1, [r3], #1
strbcs r1, [r0], #1 strbcs r1, [r3], #1
ldmfd sp!, {r0}
bx lr bx lr
END(memset) END(memset)