7244cf2aa5
* Explicitly specify default .align 0. * Use standard ldmfdlo instruction. * Before and after gas outputs are identical, with align 0 sections. * Objdump showed .text/.data/.bss section alignment attributes are 2^0 from gas and 2^2 from llvm assembler. These .S files might be working when compiled by gas, but llvm assembler's output should be more correct or conservative. Change-Id: I4e578dbc8155c0d06d1bbc1c33ec4cc851a18479
213 lines
5.8 KiB
ArmAsm
213 lines
5.8 KiB
ArmAsm
/*
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
.text
|
|
.align 0
|
|
|
|
.global scanline_t32cb16blend_arm64
|
|
|
|
/*
|
|
* .macro pixel
|
|
*
|
|
* This macro alpha blends RGB565 original pixel located in either
|
|
* top or bottom 16 bits of DREG register with SRC 32 bit pixel value
|
|
* and writes the result to FB register
|
|
*
|
|
* \DREG is a 32-bit register containing *two* original destination RGB565
|
|
* pixels, with the even one in the low-16 bits, and the odd one in the
|
|
* high 16 bits.
|
|
*
|
|
* \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
|
|
*
|
|
* \FB is a target register that will contain the blended pixel values.
|
|
*
|
|
* \ODD is either 0 or 1 and indicates if we're blending the lower or
|
|
* upper 16-bit pixels in DREG into FB
|
|
*
|
|
*
|
|
* clobbered: w6, w7, w16, w17, w18
|
|
*
|
|
*/
|
|
|
|
.macro pixel, DREG, SRC, FB, ODD
|
|
|
|
// SRC = 0xAABBGGRR
|
|
lsr w7, \SRC, #24 // sA
|
|
add w7, w7, w7, lsr #7 // sA + (sA >> 7)
|
|
mov w6, #0x100
|
|
sub w7, w6, w7 // sA = 0x100 - (sA+(sA>>7))
|
|
|
|
1:
|
|
|
|
.if \ODD //Blending odd pixel present in top 16 bits of DREG register
|
|
|
|
// red
|
|
lsr w16, \DREG, #(16 + 11)
|
|
mul w16, w7, w16
|
|
lsr w6, \SRC, #3
|
|
and w6, w6, #0x1F
|
|
add w16, w6, w16, lsr #8
|
|
cmp w16, #0x1F
|
|
orr w17, \FB, #(0x1F<<(16 + 11))
|
|
orr w18, \FB, w16, lsl #(16 + 11)
|
|
csel \FB, w17, w18, hi
|
|
// green
|
|
and w6, \DREG, #(0x3F<<(16 + 5))
|
|
lsr w17,w6,#(16+5)
|
|
mul w6, w7, w17
|
|
lsr w16, \SRC, #(8+2)
|
|
and w16, w16, #0x3F
|
|
add w6, w16, w6, lsr #8
|
|
cmp w6, #0x3F
|
|
orr w17, \FB, #(0x3F<<(16 + 5))
|
|
orr w18, \FB, w6, lsl #(16 + 5)
|
|
csel \FB, w17, w18, hi
|
|
// blue
|
|
and w16, \DREG, #(0x1F << 16)
|
|
lsr w17,w16,#16
|
|
mul w16, w7, w17
|
|
lsr w6, \SRC, #(8+8+3)
|
|
and w6, w6, #0x1F
|
|
add w16, w6, w16, lsr #8
|
|
cmp w16, #0x1F
|
|
orr w17, \FB, #(0x1F << 16)
|
|
orr w18, \FB, w16, lsl #16
|
|
csel \FB, w17, w18, hi
|
|
|
|
.else //Blending even pixel present in bottom 16 bits of DREG register
|
|
|
|
// red
|
|
lsr w16, \DREG, #11
|
|
and w16, w16, #0x1F
|
|
mul w16, w7, w16
|
|
lsr w6, \SRC, #3
|
|
and w6, w6, #0x1F
|
|
add w16, w6, w16, lsr #8
|
|
cmp w16, #0x1F
|
|
mov w17, #(0x1F<<11)
|
|
lsl w18, w16, #11
|
|
csel \FB, w17, w18, hi
|
|
|
|
|
|
// green
|
|
and w6, \DREG, #(0x3F<<5)
|
|
mul w6, w7, w6
|
|
lsr w16, \SRC, #(8+2)
|
|
and w16, w16, #0x3F
|
|
add w6, w16, w6, lsr #(5+8)
|
|
cmp w6, #0x3F
|
|
orr w17, \FB, #(0x3F<<5)
|
|
orr w18, \FB, w6, lsl #5
|
|
csel \FB, w17, w18, hi
|
|
|
|
// blue
|
|
and w16, \DREG, #0x1F
|
|
mul w16, w7, w16
|
|
lsr w6, \SRC, #(8+8+3)
|
|
and w6, w6, #0x1F
|
|
add w16, w6, w16, lsr #8
|
|
cmp w16, #0x1F
|
|
orr w17, \FB, #0x1F
|
|
orr w18, \FB, w16
|
|
csel \FB, w17, w18, hi
|
|
|
|
.endif // End of blending even pixel
|
|
|
|
.endm // End of pixel macro
|
|
|
|
|
|
// x0: dst ptr
|
|
// x1: src ptr
|
|
// w2: count
|
|
// w3: d
|
|
// w4: s0
|
|
// w5: s1
|
|
// w6: pixel
|
|
// w7: pixel
|
|
// w8: free
|
|
// w9: free
|
|
// w10: free
|
|
// w11: free
|
|
// w12: scratch
|
|
// w14: pixel
|
|
|
|
scanline_t32cb16blend_arm64:
|
|
|
|
// align DST to 32 bits
|
|
tst x0, #0x3
|
|
b.eq aligned
|
|
subs w2, w2, #1
|
|
b.lo return
|
|
|
|
last:
|
|
ldr w4, [x1], #4
|
|
ldrh w3, [x0]
|
|
pixel w3, w4, w12, 0
|
|
strh w12, [x0], #2
|
|
|
|
aligned:
|
|
subs w2, w2, #2
|
|
b.lo 9f
|
|
|
|
// The main loop is unrolled twice and processes 4 pixels
|
|
8:
|
|
ldp w4,w5, [x1], #8
|
|
add x0, x0, #4
|
|
// it's all zero, skip this pixel
|
|
orr w3, w4, w5
|
|
cbz w3, 7f
|
|
|
|
// load the destination
|
|
ldr w3, [x0, #-4]
|
|
// stream the destination
|
|
pixel w3, w4, w12, 0
|
|
pixel w3, w5, w12, 1
|
|
str w12, [x0, #-4]
|
|
|
|
// 2nd iteration of the loop, don't stream anything
|
|
subs w2, w2, #2
|
|
csel w4, w5, w4, lt
|
|
blt 9f
|
|
ldp w4,w5, [x1], #8
|
|
add x0, x0, #4
|
|
orr w3, w4, w5
|
|
cbz w3, 7f
|
|
ldr w3, [x0, #-4]
|
|
pixel w3, w4, w12, 0
|
|
pixel w3, w5, w12, 1
|
|
str w12, [x0, #-4]
|
|
|
|
7: subs w2, w2, #2
|
|
bhs 8b
|
|
mov w4, w5
|
|
|
|
9: adds w2, w2, #1
|
|
b.lo return
|
|
b last
|
|
|
|
return:
|
|
ret
|