platform_bionic/libc/arch-arm/generic/bionic/strcmp.S

/*
 * Copyright (c) 2011 The Android Open Source Project
 * Copyright (c) 2008 ARM Ltd
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the company may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <private/bionic_asm.h>

	.text

#ifdef __ARMEB__
#define SHFT2LSB lsl
#define SHFT2LSBEQ lsleq
#define SHFT2MSB lsr
#define SHFT2MSBEQ lsreq
#define MSB 0x000000ff
#define LSB 0xff000000
#else
#define SHFT2LSB lsr
#define SHFT2LSBEQ lsreq
#define SHFT2MSB lsl
#define SHFT2MSBEQ lsleq
#define MSB 0xff000000
#define LSB 0x000000ff
#endif

#define magic1(REG) REG
#define magic2(REG) REG, lsl #7

ENTRY(strcmp)
	pld	[r0, #0]
	pld	[r1, #0]
	eor	r2, r0, r1
	tst	r2, #3

	/* Strings not at same byte offset from a word boundary.  */
	bne	.Lstrcmp_unaligned
	ands	r2, r0, #3
	bic	r0, r0, #3
	bic	r1, r1, #3
	ldr	ip, [r0], #4
	it	eq
	ldreq	r3, [r1], #4
	beq	1f

	/* Although s1 and s2 have identical initial alignment, they are
	 * not currently word aligned.  Rather than comparing bytes,
	 * make sure that any bytes fetched from before the addressed
	 * bytes are forced to 0xff.  Then they will always compare
	 * equal.
	 */
	eor	r2, r2, #3
	lsl	r2, r2, #3
	mvn	r3, #MSB
	SHFT2LSB	r2, r3, r2
	ldr	r3, [r1], #4
	orr	ip, ip, r2
	orr	r3, r3, r2
1:
	/* Load the 'magic' constant 0x01010101. */
	str	r4, [sp, #-4]!
	mov	r4, #1
	orr	r4, r4, r4, lsl #8
	orr	r4, r4, r4, lsl #16
	.p2align	2
4:
	pld	[r0, #8]
	pld	[r1, #8]
	sub	r2, ip, magic1(r4)
	cmp	ip, r3
	itttt	eq

	/* check for any zero bytes in first word */
	biceq	r2, r2, ip
	tsteq	r2, magic2(r4)
	ldreq	ip, [r0], #4
	ldreq	r3, [r1], #4
	beq	4b
2:
	/* There's a zero or a different byte in the word */
	SHFT2MSB	r0, ip, #24
	SHFT2LSB	ip, ip, #8
	cmp	r0, #1
	it	cs
	cmpcs	r0, r3, SHFT2MSB #24
	it	eq
	SHFT2LSBEQ r3, r3, #8
	beq	2b
	/* On a big-endian machine, r0 contains the desired byte in bits
	 * 0-7; on a little-endian machine they are in bits 24-31.  In
	 * both cases the other bits in r0 are all zero.  For r3 the
	 * interesting byte is at the other end of the word, but the
	 * other bits are not necessarily zero.  We need a signed result
	 * representing the differnece in the unsigned bytes, so for the
	 * little-endian case we can't just shift the interesting bits up.
	 */
#ifdef __ARMEB__
	sub	r0, r0, r3, lsr #24
#else
	and	r3, r3, #255
	/* No RSB instruction in Thumb2 */
#ifdef __thumb2__
	lsr	r0, r0, #24
	sub	r0, r0, r3
#else
	rsb	r0, r3, r0, lsr #24
#endif
#endif
	ldr	r4, [sp], #4
	bx	lr

.Lstrcmp_unaligned:
	wp1 .req r0
	wp2 .req r1
	b1  .req r2
	w1  .req r4
	w2  .req r5
	t1  .req ip
	@ r3 is scratch

	/* First of all, compare bytes until wp1(sp1) is word-aligned. */
1:
	tst	wp1, #3
	beq	2f
	ldrb	r2, [wp1], #1
	ldrb	r3, [wp2], #1
	cmp	r2, #1
	it	cs
	cmpcs	r2, r3
	beq	1b
	sub	r0, r2, r3
	bx	lr

2:
	str	r5, [sp, #-4]!
	str	r4, [sp, #-4]!
	mov	b1, #1
	orr	b1, b1, b1, lsl #8
	orr	b1, b1, b1, lsl #16

	and	t1, wp2, #3
	bic	wp2, wp2, #3
	ldr	w1, [wp1], #4
	ldr	w2, [wp2], #4
	cmp	t1, #2
	beq	2f
	bhi	3f

	/* Critical inner Loop: Block with 3 bytes initial overlap */
	.p2align	2
1:
	bic	t1, w1, #MSB
	cmp	t1, w2, SHFT2LSB #8
	sub	r3, w1, b1
	bic	r3, r3, w1
	bne	4f
	ands	r3, r3, b1, lsl #7
	it	eq
	ldreq	w2, [wp2], #4
	bne	5f
	eor	t1, t1, w1
	cmp	t1, w2, SHFT2MSB #24
	bne	6f
	ldr	w1, [wp1], #4
	b	1b
4:
	SHFT2LSB	w2, w2, #8
	b	8f

5:
#ifdef __ARMEB__
	/* The syndrome value may contain false ones if the string ends
	 * with the bytes 0x01 0x00
	 */
	tst	w1, #0xff000000
	itt	ne
	tstne	w1, #0x00ff0000
	tstne	w1, #0x0000ff00
	beq	7f
#else
	bics	r3, r3, #0xff000000
	bne	7f
#endif
	ldrb	w2, [wp2]
	SHFT2LSB	t1, w1, #24
#ifdef __ARMEB__
	lsl	w2, w2, #24
#endif
	b	8f

6:
	SHFT2LSB	t1, w1, #24
	and	w2, w2, #LSB
	b	8f

	/* Critical inner Loop: Block with 2 bytes initial overlap */
	.p2align	2
2:
	SHFT2MSB	t1, w1, #16
	sub	r3, w1, b1
	SHFT2LSB	t1, t1, #16
	bic	r3, r3, w1
	cmp	t1, w2, SHFT2LSB #16
	bne	4f
	ands	r3, r3, b1, lsl #7
	it	eq
	ldreq	w2, [wp2], #4
	bne	5f
	eor	t1, t1, w1
	cmp	t1, w2, SHFT2MSB #16
	bne	6f
	ldr	w1, [wp1], #4
	b	2b

5:
#ifdef __ARMEB__
	/* The syndrome value may contain false ones if the string ends
	 * with the bytes 0x01 0x00
	 */
	tst	w1, #0xff000000
	it	ne
	tstne	w1, #0x00ff0000
	beq	7f
#else
	lsls	r3, r3, #16
	bne	7f
#endif
	ldrh	w2, [wp2]
	SHFT2LSB	t1, w1, #16
#ifdef __ARMEB__
	lsl	w2, w2, #16
#endif
	b	8f

6:
	SHFT2MSB	w2, w2, #16
	SHFT2LSB	t1, w1, #16
4:
	SHFT2LSB	w2, w2, #16
	b	8f

	/* Critical inner Loop: Block with 1 byte initial overlap */
	.p2align	2
3:
	and	t1, w1, #LSB
	cmp	t1, w2, SHFT2LSB #24
	sub	r3, w1, b1
	bic	r3, r3, w1
	bne	4f
	ands	r3, r3, b1, lsl #7
	it	eq
	ldreq	w2, [wp2], #4
	bne	5f
	eor	t1, t1, w1
	cmp	t1, w2, SHFT2MSB #8
	bne	6f
	ldr	w1, [wp1], #4
	b	3b
4:
	SHFT2LSB	w2, w2, #24
	b	8f
5:
	/* The syndrome value may contain false ones if the string ends
	 * with the bytes 0x01 0x00
	 */
	tst	w1, #LSB
	beq	7f
	ldr	w2, [wp2], #4
6:
	SHFT2LSB	t1, w1, #8
	bic	w2, w2, #MSB
	b	8f
7:
	mov	r0, #0
	ldr	r4, [sp], #4
	ldr	r5, [sp], #4
	bx	lr

8:
	and	r2, t1, #LSB
	and	r0, w2, #LSB
	cmp	r0, #1
	it	cs
	cmpcs	r0, r2
	itt	eq
	SHFT2LSBEQ	t1, t1, #8
	SHFT2LSBEQ	w2, w2, #8
	beq	8b
	sub	r0, r2, r0
	ldr	r4, [sp], #4
	ldr	r5, [sp], #4
	bx	lr
END(strcmp)
Create arch specific versions of strcmp. This uses the new strcmp.a15.S code as the basis for new versions of strcmp.S. The cortex-a15 code is the performance optimized version of strcmp.a15.S taken with only the addition of a few pld instructions. The cortex-a9 code is the same as the cortex-a15 code except that the unaligned strcmp code was taken from the original strcmp.S. The krait code is the same as the cortex-a15 code except that one path in the unaligned strcmp code was taken from the original strcmp.S code (the 2 byte overlap case). The generic code is the original unmodified strmp.S from the bionic subdirectory. All three new versions underwent these test cases: Strings the same, all same size: - Both pointers double word aligned. - One pointer double word aligned, one pointer word aligned. - Both pointers word aligned. - One pointer double word aligned, one pointer 1 off a word alignment. - One pointer double word aligned, one pointer 2 off a word alignment. - One pointer double word aligned, one pointer 3 off a word alignment. - One pointer word aligned, one pointer 1 off a word alignment. - One pointer word aligned, one pointer 2 off a word alignment. - One pointer word aligned, one pointer 3 off a word alignment. For all cases where it made sense, the two pointers were also tested swapped. Different strings, all same size: - Single difference at double word boundary. - Single difference at word boudary. - Single difference at 1 off a word alignment. - Single difference at 2 off a word alignment. - Single difference at 3 off a word alignment. Different sized strings, strings the same until the end: - Shorter string ends on a double word boundary. - Shorter string ends on word boundary. - Shorter string ends at 1 off a word boundary. - Shorter string ends at 2 off a word boundary. - Shorter string ends at 3 off a word boundary. For all different cases, run them through the same pointer alignment cases when the strings are the same size. For all cases the two pointers were also tested swapped. Bug: 8005082 Merge from internal master. (cherry-picked from commit a9a5870d166f8060a8182cd61e5536b0becea74e) Change-Id: I4c2b98f8a50804fb98ab67f75e9d660f1315a144 2013-03-09 01:50:31 +01:00			`/*`
			`* Copyright (c) 2011 The Android Open Source Project`
			`* Copyright (c) 2008 ARM Ltd`
			`* All rights reserved.`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions`
			`* are met:`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in the`
			`* documentation and/or other materials provided with the distribution.`
			`* 3. The name of the company may not be used to endorse or promote`
			`* products derived from this software without specific prior written`
			`* permission.`
			`*`
			* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
			`* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF`
			`* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.`
			`* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED`
			`* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR`
			`* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF`
			`* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING`
			`* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS`
			`* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`

Unify our assembler macros. Our <machine/asm.h> files were modified from upstream, to the extent that no architecture was actually using the upstream ENTRY or END macros, assuming that architecture even had such a macro upstream. This patch moves everyone to the same macros, with just a few tweaks remaining in the <machine/asm.h> files, which no one should now use directly. I've removed most of the unused cruft from the <machine/asm.h> files, though there's still rather a lot in the mips/mips64 ones. Bug: 12229603 Change-Id: I2fff287dc571ac1087abe9070362fb9420d85d6d 2014-02-20 01:53:20 +01:00			`#include <private/bionic_asm.h>`
Create arch specific versions of strcmp. This uses the new strcmp.a15.S code as the basis for new versions of strcmp.S. The cortex-a15 code is the performance optimized version of strcmp.a15.S taken with only the addition of a few pld instructions. The cortex-a9 code is the same as the cortex-a15 code except that the unaligned strcmp code was taken from the original strcmp.S. The krait code is the same as the cortex-a15 code except that one path in the unaligned strcmp code was taken from the original strcmp.S code (the 2 byte overlap case). The generic code is the original unmodified strmp.S from the bionic subdirectory. All three new versions underwent these test cases: Strings the same, all same size: - Both pointers double word aligned. - One pointer double word aligned, one pointer word aligned. - Both pointers word aligned. - One pointer double word aligned, one pointer 1 off a word alignment. - One pointer double word aligned, one pointer 2 off a word alignment. - One pointer double word aligned, one pointer 3 off a word alignment. - One pointer word aligned, one pointer 1 off a word alignment. - One pointer word aligned, one pointer 2 off a word alignment. - One pointer word aligned, one pointer 3 off a word alignment. For all cases where it made sense, the two pointers were also tested swapped. Different strings, all same size: - Single difference at double word boundary. - Single difference at word boudary. - Single difference at 1 off a word alignment. - Single difference at 2 off a word alignment. - Single difference at 3 off a word alignment. Different sized strings, strings the same until the end: - Shorter string ends on a double word boundary. - Shorter string ends on word boundary. - Shorter string ends at 1 off a word boundary. - Shorter string ends at 2 off a word boundary. - Shorter string ends at 3 off a word boundary. For all different cases, run them through the same pointer alignment cases when the strings are the same size. For all cases the two pointers were also tested swapped. Bug: 8005082 Merge from internal master. (cherry-picked from commit a9a5870d166f8060a8182cd61e5536b0becea74e) Change-Id: I4c2b98f8a50804fb98ab67f75e9d660f1315a144 2013-03-09 01:50:31 +01:00
			`.text`

			`#ifdef __ARMEB__`
			`#define SHFT2LSB lsl`
			`#define SHFT2LSBEQ lsleq`
			`#define SHFT2MSB lsr`
			`#define SHFT2MSBEQ lsreq`
			`#define MSB 0x000000ff`
			`#define LSB 0xff000000`
			`#else`
			`#define SHFT2LSB lsr`
			`#define SHFT2LSBEQ lsreq`
			`#define SHFT2MSB lsl`
			`#define SHFT2MSBEQ lsleq`
			`#define MSB 0xff000000`
			`#define LSB 0x000000ff`
			`#endif`

			`#define magic1(REG) REG`
			`#define magic2(REG) REG, lsl #7`

			`ENTRY(strcmp)`
Clean up some ARMv4/ARMv5 cruft. Change-Id: I29e836fea4b53901e29f96c6888869c35f6726be 2013-12-13 21:17:13 +01:00			`pld [r0, #0]`
			`pld [r1, #0]`
Create arch specific versions of strcmp. This uses the new strcmp.a15.S code as the basis for new versions of strcmp.S. The cortex-a15 code is the performance optimized version of strcmp.a15.S taken with only the addition of a few pld instructions. The cortex-a9 code is the same as the cortex-a15 code except that the unaligned strcmp code was taken from the original strcmp.S. The krait code is the same as the cortex-a15 code except that one path in the unaligned strcmp code was taken from the original strcmp.S code (the 2 byte overlap case). The generic code is the original unmodified strmp.S from the bionic subdirectory. All three new versions underwent these test cases: Strings the same, all same size: - Both pointers double word aligned. - One pointer double word aligned, one pointer word aligned. - Both pointers word aligned. - One pointer double word aligned, one pointer 1 off a word alignment. - One pointer double word aligned, one pointer 2 off a word alignment. - One pointer double word aligned, one pointer 3 off a word alignment. - One pointer word aligned, one pointer 1 off a word alignment. - One pointer word aligned, one pointer 2 off a word alignment. - One pointer word aligned, one pointer 3 off a word alignment. For all cases where it made sense, the two pointers were also tested swapped. Different strings, all same size: - Single difference at double word boundary. - Single difference at word boudary. - Single difference at 1 off a word alignment. - Single difference at 2 off a word alignment. - Single difference at 3 off a word alignment. Different sized strings, strings the same until the end: - Shorter string ends on a double word boundary. - Shorter string ends on word boundary. - Shorter string ends at 1 off a word boundary. - Shorter string ends at 2 off a word boundary. - Shorter string ends at 3 off a word boundary. For all different cases, run them through the same pointer alignment cases when the strings are the same size. For all cases the two pointers were also tested swapped. Bug: 8005082 Merge from internal master. (cherry-picked from commit a9a5870d166f8060a8182cd61e5536b0becea74e) Change-Id: I4c2b98f8a50804fb98ab67f75e9d660f1315a144 2013-03-09 01:50:31 +01:00			`eor r2, r0, r1`
			`tst r2, #3`

			`/* Strings not at same byte offset from a word boundary. */`
			`bne .Lstrcmp_unaligned`
			`ands r2, r0, #3`
			`bic r0, r0, #3`
			`bic r1, r1, #3`
			`ldr ip, [r0], #4`
			`it eq`
			`ldreq r3, [r1], #4`
			`beq 1f`

			`/* Although s1 and s2 have identical initial alignment, they are`
			`* not currently word aligned. Rather than comparing bytes,`
			`* make sure that any bytes fetched from before the addressed`
			`* bytes are forced to 0xff. Then they will always compare`
			`* equal.`
			`*/`
			`eor r2, r2, #3`
			`lsl r2, r2, #3`
			`mvn r3, #MSB`
			`SHFT2LSB r2, r3, r2`
			`ldr r3, [r1], #4`
			`orr ip, ip, r2`
			`orr r3, r3, r2`
			`1:`
			`/* Load the 'magic' constant 0x01010101. */`
			`str r4, [sp, #-4]!`
			`mov r4, #1`
			`orr r4, r4, r4, lsl #8`
			`orr r4, r4, r4, lsl #16`
			`.p2align 2`
			`4:`
Clean up some ARMv4/ARMv5 cruft. Change-Id: I29e836fea4b53901e29f96c6888869c35f6726be 2013-12-13 21:17:13 +01:00			`pld [r0, #8]`
			`pld [r1, #8]`
Create arch specific versions of strcmp. This uses the new strcmp.a15.S code as the basis for new versions of strcmp.S. The cortex-a15 code is the performance optimized version of strcmp.a15.S taken with only the addition of a few pld instructions. The cortex-a9 code is the same as the cortex-a15 code except that the unaligned strcmp code was taken from the original strcmp.S. The krait code is the same as the cortex-a15 code except that one path in the unaligned strcmp code was taken from the original strcmp.S code (the 2 byte overlap case). The generic code is the original unmodified strmp.S from the bionic subdirectory. All three new versions underwent these test cases: Strings the same, all same size: - Both pointers double word aligned. - One pointer double word aligned, one pointer word aligned. - Both pointers word aligned. - One pointer double word aligned, one pointer 1 off a word alignment. - One pointer double word aligned, one pointer 2 off a word alignment. - One pointer double word aligned, one pointer 3 off a word alignment. - One pointer word aligned, one pointer 1 off a word alignment. - One pointer word aligned, one pointer 2 off a word alignment. - One pointer word aligned, one pointer 3 off a word alignment. For all cases where it made sense, the two pointers were also tested swapped. Different strings, all same size: - Single difference at double word boundary. - Single difference at word boudary. - Single difference at 1 off a word alignment. - Single difference at 2 off a word alignment. - Single difference at 3 off a word alignment. Different sized strings, strings the same until the end: - Shorter string ends on a double word boundary. - Shorter string ends on word boundary. - Shorter string ends at 1 off a word boundary. - Shorter string ends at 2 off a word boundary. - Shorter string ends at 3 off a word boundary. For all different cases, run them through the same pointer alignment cases when the strings are the same size. For all cases the two pointers were also tested swapped. Bug: 8005082 Merge from internal master. (cherry-picked from commit a9a5870d166f8060a8182cd61e5536b0becea74e) Change-Id: I4c2b98f8a50804fb98ab67f75e9d660f1315a144 2013-03-09 01:50:31 +01:00			`sub r2, ip, magic1(r4)`
			`cmp ip, r3`
			`itttt eq`

			`/* check for any zero bytes in first word */`
			`biceq r2, r2, ip`
			`tsteq r2, magic2(r4)`
			`ldreq ip, [r0], #4`
			`ldreq r3, [r1], #4`
			`beq 4b`
			`2:`
			`/* There's a zero or a different byte in the word */`
			`SHFT2MSB r0, ip, #24`
			`SHFT2LSB ip, ip, #8`
			`cmp r0, #1`
			`it cs`
			`cmpcs r0, r3, SHFT2MSB #24`
			`it eq`
			`SHFT2LSBEQ r3, r3, #8`
			`beq 2b`
			`/* On a big-endian machine, r0 contains the desired byte in bits`
			`* 0-7; on a little-endian machine they are in bits 24-31. In`
			`* both cases the other bits in r0 are all zero. For r3 the`
			`* interesting byte is at the other end of the word, but the`
			`* other bits are not necessarily zero. We need a signed result`
			`* representing the differnece in the unsigned bytes, so for the`
			`* little-endian case we can't just shift the interesting bits up.`
			`*/`
			`#ifdef __ARMEB__`
			`sub r0, r0, r3, lsr #24`
			`#else`
			`and r3, r3, #255`
			`/* No RSB instruction in Thumb2 */`
			`#ifdef __thumb2__`
			`lsr r0, r0, #24`
			`sub r0, r0, r3`
			`#else`
			`rsb r0, r3, r0, lsr #24`
			`#endif`
			`#endif`
			`ldr r4, [sp], #4`
			`bx lr`

			`.Lstrcmp_unaligned:`
			`wp1 .req r0`
			`wp2 .req r1`
			`b1 .req r2`
			`w1 .req r4`
			`w2 .req r5`
			`t1 .req ip`
			`@ r3 is scratch`

			`/* First of all, compare bytes until wp1(sp1) is word-aligned. */`
			`1:`
			`tst wp1, #3`
			`beq 2f`
			`ldrb r2, [wp1], #1`
			`ldrb r3, [wp2], #1`
			`cmp r2, #1`
			`it cs`
			`cmpcs r2, r3`
			`beq 1b`
			`sub r0, r2, r3`
			`bx lr`

			`2:`
			`str r5, [sp, #-4]!`
			`str r4, [sp, #-4]!`
			`mov b1, #1`
			`orr b1, b1, b1, lsl #8`
			`orr b1, b1, b1, lsl #16`

			`and t1, wp2, #3`
			`bic wp2, wp2, #3`
			`ldr w1, [wp1], #4`
			`ldr w2, [wp2], #4`
			`cmp t1, #2`
			`beq 2f`
			`bhi 3f`

			`/* Critical inner Loop: Block with 3 bytes initial overlap */`
			`.p2align 2`
			`1:`
			`bic t1, w1, #MSB`
			`cmp t1, w2, SHFT2LSB #8`
			`sub r3, w1, b1`
			`bic r3, r3, w1`
			`bne 4f`
			`ands r3, r3, b1, lsl #7`
			`it eq`
			`ldreq w2, [wp2], #4`
			`bne 5f`
			`eor t1, t1, w1`
			`cmp t1, w2, SHFT2MSB #24`
			`bne 6f`
			`ldr w1, [wp1], #4`
			`b 1b`
			`4:`
			`SHFT2LSB w2, w2, #8`
			`b 8f`

			`5:`
			`#ifdef __ARMEB__`
			`/* The syndrome value may contain false ones if the string ends`
			`* with the bytes 0x01 0x00`
			`*/`
			`tst w1, #0xff000000`
			`itt ne`
			`tstne w1, #0x00ff0000`
			`tstne w1, #0x0000ff00`
			`beq 7f`
			`#else`
			`bics r3, r3, #0xff000000`
			`bne 7f`
			`#endif`
			`ldrb w2, [wp2]`
			`SHFT2LSB t1, w1, #24`
			`#ifdef __ARMEB__`
			`lsl w2, w2, #24`
			`#endif`
			`b 8f`

			`6:`
			`SHFT2LSB t1, w1, #24`
			`and w2, w2, #LSB`
			`b 8f`

			`/* Critical inner Loop: Block with 2 bytes initial overlap */`
			`.p2align 2`
			`2:`
			`SHFT2MSB t1, w1, #16`
			`sub r3, w1, b1`
			`SHFT2LSB t1, t1, #16`
			`bic r3, r3, w1`
			`cmp t1, w2, SHFT2LSB #16`
			`bne 4f`
			`ands r3, r3, b1, lsl #7`
			`it eq`
			`ldreq w2, [wp2], #4`
			`bne 5f`
			`eor t1, t1, w1`
			`cmp t1, w2, SHFT2MSB #16`
			`bne 6f`
			`ldr w1, [wp1], #4`
			`b 2b`

			`5:`
			`#ifdef __ARMEB__`
			`/* The syndrome value may contain false ones if the string ends`
			`* with the bytes 0x01 0x00`
			`*/`
			`tst w1, #0xff000000`
			`it ne`
			`tstne w1, #0x00ff0000`
			`beq 7f`
			`#else`
			`lsls r3, r3, #16`
			`bne 7f`
			`#endif`
			`ldrh w2, [wp2]`
			`SHFT2LSB t1, w1, #16`
			`#ifdef __ARMEB__`
			`lsl w2, w2, #16`
			`#endif`
			`b 8f`

			`6:`
			`SHFT2MSB w2, w2, #16`
			`SHFT2LSB t1, w1, #16`
			`4:`
			`SHFT2LSB w2, w2, #16`
			`b 8f`

			`/* Critical inner Loop: Block with 1 byte initial overlap */`
			`.p2align 2`
			`3:`
			`and t1, w1, #LSB`
			`cmp t1, w2, SHFT2LSB #24`
			`sub r3, w1, b1`
			`bic r3, r3, w1`
			`bne 4f`
			`ands r3, r3, b1, lsl #7`
			`it eq`
			`ldreq w2, [wp2], #4`
			`bne 5f`
			`eor t1, t1, w1`
			`cmp t1, w2, SHFT2MSB #8`
			`bne 6f`
			`ldr w1, [wp1], #4`
			`b 3b`
			`4:`
			`SHFT2LSB w2, w2, #24`
			`b 8f`
			`5:`
			`/* The syndrome value may contain false ones if the string ends`
			`* with the bytes 0x01 0x00`
			`*/`
			`tst w1, #LSB`
			`beq 7f`
			`ldr w2, [wp2], #4`
			`6:`
			`SHFT2LSB t1, w1, #8`
			`bic w2, w2, #MSB`
			`b 8f`
			`7:`
			`mov r0, #0`
			`ldr r4, [sp], #4`
			`ldr r5, [sp], #4`
			`bx lr`

			`8:`
			`and r2, t1, #LSB`
			`and r0, w2, #LSB`
			`cmp r0, #1`
			`it cs`
			`cmpcs r0, r2`
			`itt eq`
			`SHFT2LSBEQ t1, t1, #8`
			`SHFT2LSBEQ w2, w2, #8`
			`beq 8b`
			`sub r0, r2, r0`
			`ldr r4, [sp], #4`
			`ldr r5, [sp], #4`
			`bx lr`
			`END(strcmp)`