65a6211f4d
This patch was originally written by Siddhesh Poyarekar and pushed on cortex-strings [1]. The mutually misaligned inputs on aarch64 are compared with a simple byte copy, which is not very efficient. This patch enhances the comparison similar to strcmp by loading a double-word at a time. Comparison on the default bionic and proposed optimized routines shows the following performance improvements on A54 (using the new proposed memcmp input data from test_strncmp.xml): - No noticeable change on aligned inputs or with same alignment. - Large improvements on unaligned inputs from sizes larger than 16 bytes. Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- BM_string_strncmp/1/0/0 -0.0954 -0.0954 19 17 19 17 BM_string_strncmp/2/0/0 -0.0344 -0.0344 19 18 19 18 BM_string_strncmp/3/0/0 +0.1768 +0.1768 15 18 15 18 BM_string_strncmp/4/0/0 -0.0344 -0.0344 19 18 19 18 BM_string_strncmp/5/0/0 -0.0344 -0.0344 19 18 19 18 BM_string_strncmp/6/0/0 +0.1589 +0.1589 15 18 15 18 BM_string_strncmp/7/0/0 -0.0344 -0.0344 19 18 19 18 BM_string_strncmp/8/0/0 -0.0998 -0.0998 19 17 19 17 BM_string_strncmp/9/0/0 -0.0277 -0.0277 23 22 23 22 BM_string_strncmp/10/0/0 -0.0270 -0.0270 23 22 23 22 BM_string_strncmp/11/0/0 -0.0331 -0.0331 23 22 23 22 BM_string_strncmp/12/0/0 -0.0270 -0.0270 23 22 23 22 BM_string_strncmp/13/0/0 -0.0284 -0.0284 23 22 23 22 BM_string_strncmp/14/0/0 +0.1042 +0.1042 20 22 20 22 BM_string_strncmp/15/0/0 -0.0277 -0.0277 23 22 23 22 BM_string_strncmp/16/0/0 +0.0214 +0.0215 22 22 22 22 BM_string_strncmp/24/0/0 -0.1291 -0.1291 24 21 24 21 BM_string_strncmp/32/0/0 -0.0470 -0.0470 27 26 27 26 BM_string_strncmp/40/0/0 -0.0433 -0.0433 29 28 29 28 BM_string_strncmp/48/0/0 -0.0301 -0.0301 31 30 31 30 BM_string_strncmp/56/0/0 -0.0800 -0.0800 33 31 33 31 BM_string_strncmp/64/0/0 +0.0188 +0.0188 34 34 34 34 BM_string_strncmp/72/0/0 -0.0334 -0.0334 38 37 38 37 BM_string_strncmp/80/0/0 -0.0000 -0.0000 40 40 40 40 BM_string_strncmp/88/0/0 +0.0413 +0.0413 61 64 61 64 BM_string_strncmp/96/0/0 -0.0215 -0.0216 69 67 69 67 BM_string_strncmp/104/0/0 -0.0208 -0.0208 72 70 72 70 BM_string_strncmp/112/0/0 -0.0173 -0.0173 75 74 75 74 BM_string_strncmp/120/0/0 -0.0166 -0.0166 78 77 78 77 BM_string_strncmp/128/0/0 -0.0158 -0.0158 81 80 81 80 BM_string_strncmp/136/0/0 -0.0149 -0.0149 84 83 84 83 BM_string_strncmp/144/0/0 -0.0201 -0.0201 88 86 88 86 BM_string_strncmp/160/0/0 -0.0136 -0.0136 94 93 94 93 BM_string_strncmp/176/0/0 +0.0224 +0.0224 96 98 96 98 BM_string_strncmp/192/0/0 +0.0289 +0.0289 102 105 102 105 BM_string_strncmp/208/0/0 +0.0101 +0.0101 111 112 111 112 BM_string_strncmp/224/0/0 -0.0107 -0.0107 119 118 119 118 BM_string_strncmp/240/0/0 -0.0088 -0.0088 126 125 126 125 BM_string_strncmp/256/0/0 -0.0101 -0.0101 132 131 132 131 BM_string_strncmp/512/0/0 -0.0056 -0.0056 235 233 235 233 BM_string_strncmp/1024/0/0 -0.0030 -0.0030 439 437 439 437 BM_string_strncmp/8192/0/0 -0.0431 -0.0431 3799 3635 3799 3635 BM_string_strncmp/16384/0/0 -0.0069 -0.0069 6778 6732 6779 6732 BM_string_strncmp/32768/0/0 -0.0001 -0.0002 13405 13403 13405 13403 BM_string_strncmp/65536/0/0 +0.0005 +0.0005 26968 26981 26968 26981 BM_string_strncmp/131072/0/0 -0.0057 -0.0057 53959 53650 53958 53650 BM_string_strncmp/1/4/0 -0.1352 -0.1352 12 10 12 10 BM_string_strncmp/2/4/0 +0.0020 +0.0020 15 15 15 15 BM_string_strncmp/3/4/0 -0.1560 -0.1560 20 17 20 17 BM_string_strncmp/4/4/0 +0.0296 +0.0296 22 22 22 22 BM_string_strncmp/5/4/0 +0.0573 +0.0573 22 23 22 23 BM_string_strncmp/6/4/0 -0.0340 -0.0340 25 24 25 24 BM_string_strncmp/7/4/0 +0.0185 +0.0185 26 26 26 26 BM_string_strncmp/8/4/0 -0.0050 -0.0050 27 27 27 27 BM_string_strncmp/9/4/0 -0.1294 -0.1294 28 24 28 24 BM_string_strncmp/10/4/0 +0.0109 +0.0109 29 29 29 29 BM_string_strncmp/11/4/0 -0.0000 -0.0001 30 30 30 30 BM_string_strncmp/12/4/0 +0.0055 +0.0055 50 50 50 50 BM_string_strncmp/13/4/0 -0.0249 -0.0249 51 50 51 50 BM_string_strncmp/14/4/0 -0.0289 -0.0289 53 52 53 52 BM_string_strncmp/15/4/0 -0.0205 -0.0205 55 54 55 54 BM_string_strncmp/16/4/0 -0.4616 -0.4616 57 31 57 31 BM_string_strncmp/24/4/0 -0.4871 -0.4871 72 37 72 37 BM_string_strncmp/32/4/0 -0.5549 -0.5549 87 39 87 39 BM_string_strncmp/40/4/0 -0.5964 -0.5964 103 42 103 42 BM_string_strncmp/48/4/0 -0.6647 -0.6647 118 40 118 40 BM_string_strncmp/56/4/0 -0.6551 -0.6551 134 46 134 46 BM_string_strncmp/64/4/0 -0.6609 -0.6609 145 49 145 49 BM_string_strncmp/72/4/0 -0.5709 -0.5710 164 70 164 70 BM_string_strncmp/80/4/0 -0.5929 -0.5929 180 73 180 73 BM_string_strncmp/88/4/0 -0.6051 -0.6051 195 77 195 77 BM_string_strncmp/96/4/0 -0.6160 -0.6160 210 81 210 81 BM_string_strncmp/104/4/0 -0.6199 -0.6199 223 85 223 85 BM_string_strncmp/112/4/0 -0.6293 -0.6293 240 89 240 89 BM_string_strncmp/120/4/0 -0.6439 -0.6439 255 91 255 91 BM_string_strncmp/128/4/0 -0.6493 -0.6493 271 95 271 95 BM_string_strncmp/136/4/0 -0.6704 -0.6704 287 95 287 95 BM_string_strncmp/144/4/0 -0.6744 -0.6744 302 98 302 98 BM_string_strncmp/160/4/0 -0.6700 -0.6700 333 110 333 110 BM_string_strncmp/176/4/0 -0.6821 -0.6821 364 116 364 116 BM_string_strncmp/192/4/0 -0.6887 -0.6887 394 123 394 123 BM_string_strncmp/208/4/0 -0.6949 -0.6949 425 130 425 130 BM_string_strncmp/224/4/0 -0.7069 -0.7069 456 134 456 134 BM_string_strncmp/240/4/0 -0.7042 -0.7042 486 144 486 144 BM_string_strncmp/256/4/0 -0.7043 -0.7043 514 152 514 152 BM_string_strncmp/1/0/4 +0.0227 +0.0227 14 14 14 14 BM_string_strncmp/2/0/4 +0.0442 +0.0442 15 16 15 16 BM_string_strncmp/3/0/4 +0.5829 +0.5829 17 27 17 27 BM_string_strncmp/4/0/4 -0.1593 -0.1593 22 19 22 19 BM_string_strncmp/5/0/4 -0.0516 -0.0516 23 22 23 22 BM_string_strncmp/6/0/4 -0.1684 -0.1684 25 20 25 20 BM_string_strncmp/7/0/4 +0.0170 +0.0170 26 26 26 26 BM_string_strncmp/8/0/4 +0.0006 +0.0006 27 27 27 27 BM_string_strncmp/9/0/4 +0.1272 +0.1272 25 28 25 28 BM_string_strncmp/10/0/4 +0.0108 +0.0108 29 29 29 29 BM_string_strncmp/11/0/4 -0.0001 -0.0001 30 30 30 30 BM_string_strncmp/12/0/4 -0.3557 -0.3557 50 32 50 32 BM_string_strncmp/13/0/4 -0.3370 -0.3370 51 34 51 34 BM_string_strncmp/14/0/4 -0.3444 -0.3444 53 35 53 35 BM_string_strncmp/15/0/4 +0.0946 +0.0946 51 56 51 56 BM_string_strncmp/16/0/4 -0.5203 -0.5203 53 25 53 25 BM_string_strncmp/24/0/4 -0.6109 -0.6109 72 28 72 28 BM_string_strncmp/32/0/4 -0.6934 -0.6934 88 27 88 27 BM_string_strncmp/40/0/4 -0.6833 -0.6833 103 33 103 33 BM_string_strncmp/48/0/4 -0.6973 -0.6973 118 36 118 36 BM_string_strncmp/56/0/4 -0.7116 -0.7116 134 39 134 39 BM_string_strncmp/64/0/4 -0.6017 -0.6018 149 59 149 59 BM_string_strncmp/72/0/4 -0.6268 -0.6268 164 61 164 61 BM_string_strncmp/80/0/4 -0.6409 -0.6409 179 64 179 64 BM_string_strncmp/88/0/4 -0.6465 -0.6465 195 69 195 69 BM_string_strncmp/96/0/4 -0.6551 -0.6551 210 72 210 72 BM_string_strncmp/104/0/4 -0.6662 -0.6662 227 76 227 76 BM_string_strncmp/112/0/4 -0.6700 -0.6700 240 79 240 79 BM_string_strncmp/120/0/4 -0.6740 -0.6740 256 83 256 83 BM_string_strncmp/128/0/4 -0.6862 -0.6862 271 85 271 85 BM_string_strncmp/136/0/4 -0.6883 -0.6883 287 89 287 89 BM_string_strncmp/144/0/4 -0.7031 -0.7031 297 88 297 88 BM_string_strncmp/160/0/4 -0.6985 -0.6985 333 100 333 100 BM_string_strncmp/176/0/4 -0.7082 -0.7082 364 106 364 106 BM_string_strncmp/192/0/4 -0.7223 -0.7223 396 110 396 110 BM_string_strncmp/208/0/4 -0.7135 -0.7135 421 121 421 121 BM_string_strncmp/224/0/4 -0.7194 -0.7194 455 128 455 128 BM_string_strncmp/240/0/4 -0.7233 -0.7233 487 135 487 135 BM_string_strncmp/256/0/4 -0.7239 -0.7239 516 143 516 143 BM_string_strncmp/1/4/4 +0.0224 +0.0225 21 22 21 22 BM_string_strncmp/2/4/4 -0.0001 -0.0001 22 22 22 22 BM_string_strncmp/3/4/4 -0.0001 -0.0001 22 22 22 22 BM_string_strncmp/4/4/4 -0.0435 -0.0435 22 21 22 21 BM_string_strncmp/5/4/4 -0.0118 -0.0118 27 27 27 27 BM_string_strncmp/6/4/4 -0.0118 -0.0118 27 27 27 27 BM_string_strncmp/7/4/4 -0.0117 -0.0117 27 27 27 27 BM_string_strncmp/8/4/4 -0.0118 -0.0118 27 27 27 27 BM_string_strncmp/9/4/4 -0.0117 -0.0117 27 27 27 27 BM_string_strncmp/10/4/4 +0.1447 +0.1447 23 27 23 27 BM_string_strncmp/11/4/4 -0.0062 -0.0062 27 27 27 27 BM_string_strncmp/12/4/4 -0.0454 -0.0454 28 27 28 27 BM_string_strncmp/13/4/4 -0.1507 -0.1507 29 24 29 24 BM_string_strncmp/14/4/4 -0.0003 -0.0003 29 29 29 29 BM_string_strncmp/15/4/4 -0.0002 -0.0003 29 29 29 29 BM_string_strncmp/16/4/4 +0.0047 +0.0047 29 29 29 29 BM_string_strncmp/24/4/4 -0.0104 -0.0104 31 30 31 30 BM_string_strncmp/32/4/4 -0.0290 -0.0290 33 32 33 32 BM_string_strncmp/40/4/4 -0.0189 -0.0189 34 33 34 33 BM_string_strncmp/48/4/4 -0.0059 -0.0059 36 36 36 36 BM_string_strncmp/56/4/4 +0.0000 +0.0000 39 39 39 39 BM_string_strncmp/64/4/4 +0.0000 +0.0000 42 42 42 42 BM_string_strncmp/72/4/4 +0.0000 +0.0000 45 45 45 45 BM_string_strncmp/80/4/4 +0.0391 +0.0392 65 68 65 68 BM_string_strncmp/88/4/4 -0.0090 -0.0090 71 70 71 70 BM_string_strncmp/96/4/4 -0.0034 -0.0034 74 74 74 74 BM_string_strncmp/104/4/4 -0.0482 -0.0482 77 73 77 73 BM_string_strncmp/112/4/4 +0.0387 +0.0387 77 80 77 80 BM_string_strncmp/120/4/4 -0.0072 -0.0073 84 83 84 83 BM_string_strncmp/128/4/4 -0.0071 -0.0071 87 86 87 86 BM_string_strncmp/136/4/4 +0.0366 +0.0366 86 89 86 89 BM_string_strncmp/144/4/4 -0.0068 -0.0068 93 93 93 93 BM_string_strncmp/160/4/4 -0.0064 -0.0064 100 99 100 99 BM_string_strncmp/176/4/4 -0.0063 -0.0063 106 105 106 105 BM_string_strncmp/192/4/4 -0.0012 -0.0012 112 112 112 112 BM_string_strncmp/208/4/4 -0.0098 -0.0098 119 118 119 118 BM_string_strncmp/224/4/4 -0.0050 -0.0050 125 125 125 125 BM_string_strncmp/240/4/4 -0.0060 -0.0060 132 131 132 131 BM_string_strncmp/256/4/4 -0.0046 -0.0046 138 137 138 137 [1] Commit id: 26cc4faec37a55529e5d0a39949f7b6ec81008f9 Test: bionic tests and benchmarks on aarch64. Change-Id: Ied579d2044b4092fc95fad486af6541d1eb71dc3 |
||
---|---|---|
.. | ||
arch-arm | ||
arch-arm64 | ||
arch-common/bionic | ||
arch-mips | ||
arch-mips64 | ||
arch-x86 | ||
arch-x86_64 | ||
async_safe | ||
bionic | ||
dns | ||
include | ||
kernel | ||
malloc_debug | ||
malloc_hooks | ||
private | ||
seccomp | ||
stdio | ||
stdlib | ||
system_properties | ||
tools | ||
tzcode | ||
upstream-freebsd | ||
upstream-netbsd | ||
upstream-openbsd | ||
versioner-dependencies | ||
Android.bp | ||
fs_config_generator.py | ||
libc.arm.map | ||
libc.arm64.map | ||
libc.map.txt | ||
libc.mips.map | ||
libc.mips64.map | ||
libc.x86.map | ||
libc.x86_64.map | ||
libstdc++.arm.map | ||
libstdc++.arm64.map | ||
libstdc++.map.txt | ||
libstdc++.mips.map | ||
libstdc++.mips64.map | ||
libstdc++.x86.map | ||
libstdc++.x86_64.map | ||
MODULE_LICENSE_BSD | ||
NOTICE | ||
SECCOMP_BLACKLIST_APP.TXT | ||
SECCOMP_BLACKLIST_COMMON.TXT | ||
SECCOMP_WHITELIST_APP.TXT | ||
SECCOMP_WHITELIST_COMMON.TXT | ||
SECCOMP_WHITELIST_GLOBAL.TXT | ||
SECCOMP_WHITELIST_SYSTEM.TXT | ||
SYSCALLS.TXT | ||
version_script.txt |