xenocara/lib/pixman/pixman/pixman-arm-simd-asm.S
matthieu 52cf0a800d Update to pixman 0.20.0.
tested by ajacoutot@, krw@ and on a bulk ports build by landry@.
2010-11-14 13:42:49 +00:00

331 lines
6.8 KiB
ArmAsm

/*
* Copyright © 2008 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Jeff Muizelaar (jeff@infidigm.net)
*
*/
/* Prevent the stack from becoming executable */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
.text
.arch armv6
.object_arch armv4
.arm
.altmacro
/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
.func fname
.global fname
#ifdef __ELF__
.hidden fname
.type fname, %function
#endif
fname:
.endm
/*
* The code below was generated by gcc 4.3.4 from the commented out
* functions in 'pixman-arm-simd.c' file with the following optimization
* options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
*
* TODO: replace gcc generated code with hand tuned versions because
* the code quality is not very good, introduce symbolic register
* aliases for better readability and maintainability.
*/
pixman_asm_function pixman_composite_add_8_8_asm_armv6
push {r4, r5, r6, r7, r8, r9, r10, r11}
mov r10, r1
sub sp, sp, #4
subs r10, r10, #1
mov r11, r0
mov r8, r2
str r3, [sp]
ldr r7, [sp, #36]
bcc 0f
6: cmp r11, #0
beq 1f
orr r3, r8, r7
tst r3, #3
beq 2f
mov r1, r8
mov r0, r7
mov r12, r11
b 3f
5: tst r3, #3
beq 4f
3: ldrb r2, [r0], #1
subs r12, r12, #1
ldrb r3, [r1]
uqadd8 r3, r2, r3
strb r3, [r1], #1
orr r3, r1, r0
bne 5b
1: ldr r3, [sp]
add r8, r8, r3
ldr r3, [sp, #40]
add r7, r7, r3
10: subs r10, r10, #1
bcs 6b
0: add sp, sp, #4
pop {r4, r5, r6, r7, r8, r9, r10, r11}
bx lr
2: mov r12, r11
mov r1, r8
mov r0, r7
4: cmp r12, #3
subgt r6, r12, #4
movgt r9, r12
lsrgt r5, r6, #2
addgt r3, r5, #1
movgt r12, #0
lslgt r4, r3, #2
ble 7f
8: ldr r3, [r0, r12]
ldr r2, [r1, r12]
uqadd8 r3, r3, r2
str r3, [r1, r12]
add r12, r12, #4
cmp r12, r4
bne 8b
sub r3, r9, #4
bic r3, r3, #3
add r3, r3, #4
subs r12, r6, r5, lsl #2
add r1, r1, r3
add r0, r0, r3
beq 1b
7: mov r4, #0
9: ldrb r3, [r1, r4]
ldrb r2, [r0, r4]
uqadd8 r3, r2, r3
strb r3, [r1, r4]
add r4, r4, #1
cmp r4, r12
bne 9b
ldr r3, [sp]
add r8, r8, r3
ldr r3, [sp, #40]
add r7, r7, r3
b 10b
.endfunc
pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
push {r4, r5, r6, r7, r8, r9, r10, r11}
sub sp, sp, #20
cmp r1, #0
mov r12, r2
str r1, [sp, #12]
str r0, [sp, #16]
ldr r2, [sp, #52]
beq 0f
lsl r3, r3, #2
str r3, [sp]
ldr r3, [sp, #56]
mov r10, #0
lsl r3, r3, #2
str r3, [sp, #8]
mov r11, r3
b 1f
6: ldr r11, [sp, #8]
1: ldr r9, [sp]
mov r0, r12
add r12, r12, r9
mov r1, r2
str r12, [sp, #4]
add r2, r2, r11
ldr r12, [sp, #16]
ldr r3, =0x00800080
ldr r9, =0xff00ff00
mov r11, #255
cmp r12, #0
beq 4f
5: ldr r5, [r1], #4
ldr r4, [r0]
sub r8, r11, r5, lsr #24
uxtb16 r6, r4
uxtb16 r7, r4, ror #8
mla r6, r6, r8, r3
mla r7, r7, r8, r3
uxtab16 r6, r6, r6, ror #8
uxtab16 r7, r7, r7, ror #8
and r7, r7, r9
uxtab16 r6, r7, r6, ror #8
uqadd8 r5, r6, r5
str r5, [r0], #4
subs r12, r12, #1
bne 5b
4: ldr r3, [sp, #12]
add r10, r10, #1
cmp r10, r3
ldr r12, [sp, #4]
bne 6b
0: add sp, sp, #20
pop {r4, r5, r6, r7, r8, r9, r10, r11}
bx lr
.endfunc
pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
push {r4, r5, r6, r7, r8, r9, r10, r11}
sub sp, sp, #28
cmp r1, #0
str r1, [sp, #12]
ldrb r1, [sp, #71]
mov r12, r2
str r0, [sp, #16]
ldr r2, [sp, #60]
str r1, [sp, #24]
beq 0f
lsl r3, r3, #2
str r3, [sp, #20]
ldr r3, [sp, #64]
mov r10, #0
lsl r3, r3, #2
str r3, [sp, #8]
mov r11, r3
b 1f
5: ldr r11, [sp, #8]
1: ldr r4, [sp, #20]
mov r0, r12
mov r1, r2
add r12, r12, r4
add r2, r2, r11
str r12, [sp]
str r2, [sp, #4]
ldr r12, [sp, #16]
ldr r2, =0x00800080
ldr r3, [sp, #24]
mov r11, #255
cmp r12, #0
beq 3f
4: ldr r5, [r1], #4
ldr r4, [r0]
uxtb16 r6, r5
uxtb16 r7, r5, ror #8
mla r6, r6, r3, r2
mla r7, r7, r3, r2
uxtab16 r6, r6, r6, ror #8
uxtab16 r7, r7, r7, ror #8
uxtb16 r6, r6, ror #8
uxtb16 r7, r7, ror #8
orr r5, r6, r7, lsl #8
uxtb16 r6, r4
uxtb16 r7, r4, ror #8
sub r8, r11, r5, lsr #24
mla r6, r6, r8, r2
mla r7, r7, r8, r2
uxtab16 r6, r6, r6, ror #8
uxtab16 r7, r7, r7, ror #8
uxtb16 r6, r6, ror #8
uxtb16 r7, r7, ror #8
orr r6, r6, r7, lsl #8
uqadd8 r5, r6, r5
str r5, [r0], #4
subs r12, r12, #1
bne 4b
3: ldr r1, [sp, #12]
add r10, r10, #1
cmp r10, r1
ldr r12, [sp]
ldr r2, [sp, #4]
bne 5b
0: add sp, sp, #28
pop {r4, r5, r6, r7, r8, r9, r10, r11}
bx lr
.endfunc
pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
push {r4, r5, r6, r7, r8, r9, r10, r11}
sub sp, sp, #28
cmp r1, #0
ldr r9, [sp, #60]
str r1, [sp, #12]
bic r1, r9, #-16777216
str r1, [sp, #20]
mov r12, r2
lsr r1, r9, #8
ldr r2, [sp, #20]
bic r1, r1, #-16777216
bic r2, r2, #65280
bic r1, r1, #65280
str r2, [sp, #20]
str r0, [sp, #16]
str r1, [sp, #4]
ldr r2, [sp, #68]
beq 0f
lsl r3, r3, #2
str r3, [sp, #24]
mov r0, #0
b 1f
5: ldr r3, [sp, #24]
1: ldr r4, [sp, #72]
mov r10, r12
mov r1, r2
add r12, r12, r3
add r2, r2, r4
str r12, [sp, #8]
str r2, [sp]
ldr r12, [sp, #16]
ldr r11, =0x00800080
ldr r2, [sp, #4]
ldr r3, [sp, #20]
cmp r12, #0
beq 3f
4: ldrb r5, [r1], #1
ldr r4, [r10]
mla r6, r3, r5, r11
mla r7, r2, r5, r11
uxtab16 r6, r6, r6, ror #8
uxtab16 r7, r7, r7, ror #8
uxtb16 r6, r6, ror #8
uxtb16 r7, r7, ror #8
orr r5, r6, r7, lsl #8
uxtb16 r6, r4
uxtb16 r7, r4, ror #8
mvn r8, r5
lsr r8, r8, #24
mla r6, r6, r8, r11
mla r7, r7, r8, r11
uxtab16 r6, r6, r6, ror #8
uxtab16 r7, r7, r7, ror #8
uxtb16 r6, r6, ror #8
uxtb16 r7, r7, ror #8
orr r6, r6, r7, lsl #8
uqadd8 r5, r6, r5
str r5, [r10], #4
subs r12, r12, #1
bne 4b
3: ldr r4, [sp, #12]
add r0, r0, #1
cmp r0, r4
ldr r12, [sp, #8]
ldr r2, [sp]
bne 5b
0: add sp, sp, #28
pop {r4, r5, r6, r7, r8, r9, r10, r11}
bx lr
.endfunc