52cf0a800d
tested by ajacoutot@, krw@ and on a bulk ports build by landry@.
331 lines
6.8 KiB
ArmAsm
331 lines
6.8 KiB
ArmAsm
/*
|
|
* Copyright © 2008 Mozilla Corporation
|
|
*
|
|
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
* documentation for any purpose is hereby granted without fee, provided that
|
|
* the above copyright notice appear in all copies and that both that
|
|
* copyright notice and this permission notice appear in supporting
|
|
* documentation, and that the name of Mozilla Corporation not be used in
|
|
* advertising or publicity pertaining to distribution of the software without
|
|
* specific, written prior permission. Mozilla Corporation makes no
|
|
* representations about the suitability of this software for any purpose. It
|
|
* is provided "as is" without express or implied warranty.
|
|
*
|
|
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
|
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
|
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
|
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
* SOFTWARE.
|
|
*
|
|
* Author: Jeff Muizelaar (jeff@infidigm.net)
|
|
*
|
|
*/
|
|
|
|
/* Prevent the stack from becoming executable */
|
|
#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|
|
|
|
.text
|
|
.arch armv6
|
|
.object_arch armv4
|
|
.arm
|
|
.altmacro
|
|
|
|
/* Supplementary macro for setting function attributes */
|
|
.macro pixman_asm_function fname
|
|
.func fname
|
|
.global fname
|
|
#ifdef __ELF__
|
|
.hidden fname
|
|
.type fname, %function
|
|
#endif
|
|
fname:
|
|
.endm
|
|
|
|
/*
|
|
* The code below was generated by gcc 4.3.4 from the commented out
|
|
* functions in 'pixman-arm-simd.c' file with the following optimization
|
|
* options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
|
|
*
|
|
* TODO: replace gcc generated code with hand tuned versions because
|
|
* the code quality is not very good, introduce symbolic register
|
|
* aliases for better readability and maintainability.
|
|
*/
|
|
|
|
pixman_asm_function pixman_composite_add_8_8_asm_armv6
|
|
push {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
mov r10, r1
|
|
sub sp, sp, #4
|
|
subs r10, r10, #1
|
|
mov r11, r0
|
|
mov r8, r2
|
|
str r3, [sp]
|
|
ldr r7, [sp, #36]
|
|
bcc 0f
|
|
6: cmp r11, #0
|
|
beq 1f
|
|
orr r3, r8, r7
|
|
tst r3, #3
|
|
beq 2f
|
|
mov r1, r8
|
|
mov r0, r7
|
|
mov r12, r11
|
|
b 3f
|
|
5: tst r3, #3
|
|
beq 4f
|
|
3: ldrb r2, [r0], #1
|
|
subs r12, r12, #1
|
|
ldrb r3, [r1]
|
|
uqadd8 r3, r2, r3
|
|
strb r3, [r1], #1
|
|
orr r3, r1, r0
|
|
bne 5b
|
|
1: ldr r3, [sp]
|
|
add r8, r8, r3
|
|
ldr r3, [sp, #40]
|
|
add r7, r7, r3
|
|
10: subs r10, r10, #1
|
|
bcs 6b
|
|
0: add sp, sp, #4
|
|
pop {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
bx lr
|
|
2: mov r12, r11
|
|
mov r1, r8
|
|
mov r0, r7
|
|
4: cmp r12, #3
|
|
subgt r6, r12, #4
|
|
movgt r9, r12
|
|
lsrgt r5, r6, #2
|
|
addgt r3, r5, #1
|
|
movgt r12, #0
|
|
lslgt r4, r3, #2
|
|
ble 7f
|
|
8: ldr r3, [r0, r12]
|
|
ldr r2, [r1, r12]
|
|
uqadd8 r3, r3, r2
|
|
str r3, [r1, r12]
|
|
add r12, r12, #4
|
|
cmp r12, r4
|
|
bne 8b
|
|
sub r3, r9, #4
|
|
bic r3, r3, #3
|
|
add r3, r3, #4
|
|
subs r12, r6, r5, lsl #2
|
|
add r1, r1, r3
|
|
add r0, r0, r3
|
|
beq 1b
|
|
7: mov r4, #0
|
|
9: ldrb r3, [r1, r4]
|
|
ldrb r2, [r0, r4]
|
|
uqadd8 r3, r2, r3
|
|
strb r3, [r1, r4]
|
|
add r4, r4, #1
|
|
cmp r4, r12
|
|
bne 9b
|
|
ldr r3, [sp]
|
|
add r8, r8, r3
|
|
ldr r3, [sp, #40]
|
|
add r7, r7, r3
|
|
b 10b
|
|
.endfunc
|
|
|
|
pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
|
|
push {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
sub sp, sp, #20
|
|
cmp r1, #0
|
|
mov r12, r2
|
|
str r1, [sp, #12]
|
|
str r0, [sp, #16]
|
|
ldr r2, [sp, #52]
|
|
beq 0f
|
|
lsl r3, r3, #2
|
|
str r3, [sp]
|
|
ldr r3, [sp, #56]
|
|
mov r10, #0
|
|
lsl r3, r3, #2
|
|
str r3, [sp, #8]
|
|
mov r11, r3
|
|
b 1f
|
|
6: ldr r11, [sp, #8]
|
|
1: ldr r9, [sp]
|
|
mov r0, r12
|
|
add r12, r12, r9
|
|
mov r1, r2
|
|
str r12, [sp, #4]
|
|
add r2, r2, r11
|
|
ldr r12, [sp, #16]
|
|
ldr r3, =0x00800080
|
|
ldr r9, =0xff00ff00
|
|
mov r11, #255
|
|
cmp r12, #0
|
|
beq 4f
|
|
5: ldr r5, [r1], #4
|
|
ldr r4, [r0]
|
|
sub r8, r11, r5, lsr #24
|
|
uxtb16 r6, r4
|
|
uxtb16 r7, r4, ror #8
|
|
mla r6, r6, r8, r3
|
|
mla r7, r7, r8, r3
|
|
uxtab16 r6, r6, r6, ror #8
|
|
uxtab16 r7, r7, r7, ror #8
|
|
and r7, r7, r9
|
|
uxtab16 r6, r7, r6, ror #8
|
|
uqadd8 r5, r6, r5
|
|
str r5, [r0], #4
|
|
subs r12, r12, #1
|
|
bne 5b
|
|
4: ldr r3, [sp, #12]
|
|
add r10, r10, #1
|
|
cmp r10, r3
|
|
ldr r12, [sp, #4]
|
|
bne 6b
|
|
0: add sp, sp, #20
|
|
pop {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
bx lr
|
|
.endfunc
|
|
|
|
pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
|
|
push {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
sub sp, sp, #28
|
|
cmp r1, #0
|
|
str r1, [sp, #12]
|
|
ldrb r1, [sp, #71]
|
|
mov r12, r2
|
|
str r0, [sp, #16]
|
|
ldr r2, [sp, #60]
|
|
str r1, [sp, #24]
|
|
beq 0f
|
|
lsl r3, r3, #2
|
|
str r3, [sp, #20]
|
|
ldr r3, [sp, #64]
|
|
mov r10, #0
|
|
lsl r3, r3, #2
|
|
str r3, [sp, #8]
|
|
mov r11, r3
|
|
b 1f
|
|
5: ldr r11, [sp, #8]
|
|
1: ldr r4, [sp, #20]
|
|
mov r0, r12
|
|
mov r1, r2
|
|
add r12, r12, r4
|
|
add r2, r2, r11
|
|
str r12, [sp]
|
|
str r2, [sp, #4]
|
|
ldr r12, [sp, #16]
|
|
ldr r2, =0x00800080
|
|
ldr r3, [sp, #24]
|
|
mov r11, #255
|
|
cmp r12, #0
|
|
beq 3f
|
|
4: ldr r5, [r1], #4
|
|
ldr r4, [r0]
|
|
uxtb16 r6, r5
|
|
uxtb16 r7, r5, ror #8
|
|
mla r6, r6, r3, r2
|
|
mla r7, r7, r3, r2
|
|
uxtab16 r6, r6, r6, ror #8
|
|
uxtab16 r7, r7, r7, ror #8
|
|
uxtb16 r6, r6, ror #8
|
|
uxtb16 r7, r7, ror #8
|
|
orr r5, r6, r7, lsl #8
|
|
uxtb16 r6, r4
|
|
uxtb16 r7, r4, ror #8
|
|
sub r8, r11, r5, lsr #24
|
|
mla r6, r6, r8, r2
|
|
mla r7, r7, r8, r2
|
|
uxtab16 r6, r6, r6, ror #8
|
|
uxtab16 r7, r7, r7, ror #8
|
|
uxtb16 r6, r6, ror #8
|
|
uxtb16 r7, r7, ror #8
|
|
orr r6, r6, r7, lsl #8
|
|
uqadd8 r5, r6, r5
|
|
str r5, [r0], #4
|
|
subs r12, r12, #1
|
|
bne 4b
|
|
3: ldr r1, [sp, #12]
|
|
add r10, r10, #1
|
|
cmp r10, r1
|
|
ldr r12, [sp]
|
|
ldr r2, [sp, #4]
|
|
bne 5b
|
|
0: add sp, sp, #28
|
|
pop {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
bx lr
|
|
.endfunc
|
|
|
|
pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
|
|
push {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
sub sp, sp, #28
|
|
cmp r1, #0
|
|
ldr r9, [sp, #60]
|
|
str r1, [sp, #12]
|
|
bic r1, r9, #-16777216
|
|
str r1, [sp, #20]
|
|
mov r12, r2
|
|
lsr r1, r9, #8
|
|
ldr r2, [sp, #20]
|
|
bic r1, r1, #-16777216
|
|
bic r2, r2, #65280
|
|
bic r1, r1, #65280
|
|
str r2, [sp, #20]
|
|
str r0, [sp, #16]
|
|
str r1, [sp, #4]
|
|
ldr r2, [sp, #68]
|
|
beq 0f
|
|
lsl r3, r3, #2
|
|
str r3, [sp, #24]
|
|
mov r0, #0
|
|
b 1f
|
|
5: ldr r3, [sp, #24]
|
|
1: ldr r4, [sp, #72]
|
|
mov r10, r12
|
|
mov r1, r2
|
|
add r12, r12, r3
|
|
add r2, r2, r4
|
|
str r12, [sp, #8]
|
|
str r2, [sp]
|
|
ldr r12, [sp, #16]
|
|
ldr r11, =0x00800080
|
|
ldr r2, [sp, #4]
|
|
ldr r3, [sp, #20]
|
|
cmp r12, #0
|
|
beq 3f
|
|
4: ldrb r5, [r1], #1
|
|
ldr r4, [r10]
|
|
mla r6, r3, r5, r11
|
|
mla r7, r2, r5, r11
|
|
uxtab16 r6, r6, r6, ror #8
|
|
uxtab16 r7, r7, r7, ror #8
|
|
uxtb16 r6, r6, ror #8
|
|
uxtb16 r7, r7, ror #8
|
|
orr r5, r6, r7, lsl #8
|
|
uxtb16 r6, r4
|
|
uxtb16 r7, r4, ror #8
|
|
mvn r8, r5
|
|
lsr r8, r8, #24
|
|
mla r6, r6, r8, r11
|
|
mla r7, r7, r8, r11
|
|
uxtab16 r6, r6, r6, ror #8
|
|
uxtab16 r7, r7, r7, ror #8
|
|
uxtb16 r6, r6, ror #8
|
|
uxtb16 r7, r7, ror #8
|
|
orr r6, r6, r7, lsl #8
|
|
uqadd8 r5, r6, r5
|
|
str r5, [r10], #4
|
|
subs r12, r12, #1
|
|
bne 4b
|
|
3: ldr r4, [sp, #12]
|
|
add r0, r0, #1
|
|
cmp r0, r4
|
|
ldr r12, [sp, #8]
|
|
ldr r2, [sp]
|
|
bne 5b
|
|
0: add sp, sp, #28
|
|
pop {r4, r5, r6, r7, r8, r9, r10, r11}
|
|
bx lr
|
|
.endfunc
|