1 /* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 /* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_DequantTransformResidualFromPairAndAdd 29 .func omxVCM4P10_DequantTransformResidualFromPairAndAdd 30 omxVCM4P10_DequantTransformResidualFromPairAndAdd: 31 PUSH {r4-r12,lr} 32 VPUSH {d8-d9} 33 SUB sp,sp,#0x20 34 ADD r4,sp,#0 35 LDR r5,[sp,#0x64] 36 MOV r7,r1 37 MOV r8,r2 38 MOV r9,r3 39 CMP r5,#0 40 BEQ L0x114 41 MOV r1,r4 42 BL armVCM4P10_UnpackBlock4x4 ;// 43 LDR r1,[sp,#0x60] 44 LDR r11, .LarmVCM4P10_QPModuloTable 45 P0: ADD r11, pc 46 LDR r10, .LarmVCM4P10_QPDivTable 47 P1: ADD r10, pc 48 LDR r2, .LarmVCM4P10_VMatrixU16 49 P2: ADD r2, pc 50 LDRSB r12,[r11,r1] 51 LDRSB lr,[r10,r1] 52 LDR r10, =0x3020504 53 LDR r1, =0x5040100 54 ADD r2,r2,r12 55 VDUP.32 d7,r1 56 VDUP.32 d9,r10 57 VDUP.16 d5,lr 58 VLD1.8 {d6},[r2] 59 VTBL.8 d8,{d6},d7 60 VTBL.8 d4,{d6},d9 61 CMP r8,#0 62 VLD1.16 {d0,d1,d2,d3},[r4] 63 VSHL.U16 d8,d8,d5 64 VSHL.U16 d4,d4,d5 65 BEQ L1 66 LDRSH r10,[r8,#0] 67 L1: 68 VMUL.I16 d0,d0,d8 69 VMUL.I16 d1,d1,d4 70 VMUL.I16 d2,d2,d8 71 VMUL.I16 d3,d3,d4 72 VMOVNE.16 d0[0],r10 73 VTRN.16 d0,d1 74 VTRN.16 d2,d3 75 VTRN.32 q0,q1 76 VMOV.I16 d4,#0 77 VADD.I16 d5,d0,d2 78 VSUB.I16 d6,d0,d2 79 VHADD.S16 d7,d1,d4 80 VHADD.S16 d8,d3,d4 81 VSUB.I16 d7,d7,d3 82 VADD.I16 d8,d1,d8 83 VADD.I16 d0,d5,d8 84 VADD.I16 d1,d6,d7 85 VSUB.I16 d2,d6,d7 86 VSUB.I16 d3,d5,d8 87 VTRN.16 d0,d1 88 VTRN.16 d2,d3 89 VTRN.32 q0,q1 90 VADD.I16 d5,d0,d2 91 VSUB.I16 d6,d0,d2 92 VHADD.S16 d7,d1,d4 93 VHADD.S16 d8,d3,d4 94 VSUB.I16 d7,d7,d3 95 VADD.I16 d8,d1,d8 96 VADD.I16 d0,d5,d8 97 VADD.I16 d1,d6,d7 98 VSUB.I16 d2,d6,d7 99 VSUB.I16 d3,d5,d8 100 VRSHR.S16 d0,d0,#6 101 VRSHR.S16 d1,d1,#6 102 VRSHR.S16 d2,d2,#6 103 VRSHR.S16 d3,d3,#6 104 B L0x130 105 L0x114: 106 LDRSH r10,[r8,#0] 107 ADD r10,r10,#0x20 108 ASR r10,r10,#6 109 VDUP.16 d0,r10 110 VDUP.16 d1,r10 111 VDUP.16 d2,r10 112 VDUP.16 d3,r10 113 L0x130: 114 LDR r1,[sp,#0x58] 115 LDR r10,[sp,#0x5c] 116 LDR r3,[r7],r1 117 LDR r5,[r7],r1 118 VMOV d4,r3,r5 119 LDR r3,[r7],r1 120 LDR r5,[r7,#0] 121 VMOV d5,r3,r5 122 VADDW.U8 q3,q0,d4 123 VADDW.U8 q4,q1,d5 124 VQMOVUN.S16 d0,q3 125 VQMOVUN.S16 d1,q4 126 VST1.32 {d0[0]},[r9],r10 127 VST1.32 {d0[1]},[r9],r10 128 VST1.32 {d1[0]},[r9],r10 129 VST1.32 {d1[1]},[r9] 130 MOV r0,#0 131 ADD sp,sp,#0x20 132 VPOP {d8-d9} 133 POP {r4-r12,pc} 134 .endfunc 135 136 .LarmVCM4P10_QPModuloTable: 137 .word armVCM4P10_QPModuloTable-(P0+8) 138 .LarmVCM4P10_QPDivTable: 139 .word armVCM4P10_QPDivTable-(P1+8) 140 .LarmVCM4P10_VMatrixU16: 141 .word armVCM4P10_VMatrixU16-(P2+8) 142 143 .end 144 145