1 /* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 /* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 .align 4 28 29 armVCM4P10_pIndexTable8x8: 30 .word OMX_VC_CHROMA_DC-(P0+8), OMX_VC_CHROMA_HOR-(P0+8) 31 .word OMX_VC_CHROMA_VERT-(P0+8), OMX_VC_CHROMA_PLANE-(P0+8) 32 33 armVCM4P10_MultiplierTableChroma8x8: 34 .hword 3, 2, 1,4 35 .hword -3,-2,-1,0 36 .hword 1, 2, 3,4 37 38 .global omxVCM4P10_PredictIntraChroma_8x8 39 .func omxVCM4P10_PredictIntraChroma_8x8 40 omxVCM4P10_PredictIntraChroma_8x8: 41 PUSH {r4-r10,lr} 42 VPUSH {d8-d15} 43 ADR r8, armVCM4P10_pIndexTable8x8 44 LDR r6,[sp,#0x68] 45 LDR r4,[sp,#0x60] 46 LDR r5,[sp,#0x64] 47 LDR r7,[sp,#0x6c] 48 LDR r8,[r8,r6,LSL #2] 49 P0: ADD pc,r8 50 51 OMX_VC_CHROMA_DC: 52 TST r7,#2 53 BEQ L0xe8 54 ADD r9,r0,r4 55 ADD r10,r4,r4 56 VLD1.8 {d1[0]},[r0],r10 57 VLD1.8 {d1[1]},[r9],r10 58 VLD1.8 {d1[2]},[r0],r10 59 VLD1.8 {d1[3]},[r9],r10 60 VLD1.8 {d1[4]},[r0],r10 61 VLD1.8 {d1[5]},[r9],r10 62 VLD1.8 {d1[6]},[r0],r10 63 VLD1.8 {d1[7]},[r9] 64 TST r7,#1 65 BEQ L0xcc 66 VLD1.8 {d0},[r1] 67 MOV r0,#0 68 VPADDL.U8 d2,d0 69 VPADDL.U16 d3,d2 70 VPADDL.U8 d2,d1 71 VPADDL.U16 d1,d2 72 VADD.I32 d2,d3,d1 73 VRSHR.U32 d2,d2,#3 74 VRSHR.U32 d3,d3,#2 75 VRSHR.U32 d1,d1,#2 76 VMOV.I8 d5,#0xc 77 VMOV.I8 d6,#0x4 78 VSHL.I64 d5,d5,#32 79 VSHR.U64 d6,d6,#32 80 VADD.I8 d6,d6,d5 81 VTBL.8 d0,{d2-d3},d5 82 VTBL.8 d4,{d1-d2},d6 83 L0x9c: 84 ADD r9,r3,r5 85 ADD r10,r5,r5 86 VST1.8 {d0},[r3],r10 87 VST1.8 {d0},[r9],r10 88 VST1.8 {d0},[r3],r10 89 VST1.8 {d0},[r9],r10 90 VST1.8 {d4},[r3],r10 91 VST1.8 {d4},[r9],r10 92 VST1.8 {d4},[r3],r10 93 VST1.8 {d4},[r9] 94 VPOP {d8-d15} 95 POP {r4-r10,pc} 96 L0xcc: 97 MOV r0,#0 98 VPADDL.U8 d2,d1 99 VPADDL.U16 d1,d2 100 VRSHR.U32 d1,d1,#2 101 VDUP.8 d0,d1[0] 102 VDUP.8 d4,d1[4] 103 B L0x9c 104 L0xe8: 105 TST r7,#1 106 BEQ L0x114 107 VLD1.8 {d0},[r1] 108 MOV r0,#0 109 VPADDL.U8 d2,d0 110 VPADDL.U16 d3,d2 111 VRSHR.U32 d3,d3,#2 112 VMOV.I8 d5,#0x4 113 VSHL.I64 d5,d5,#32 114 VTBL.8 d0,{d3},d5 115 B L0x11c 116 L0x114: 117 VMOV.I8 d0,#0x80 118 MOV r0,#0 119 L0x11c: 120 ADD r9,r3,r5 121 ADD r10,r5,r5 122 VST1.8 {d0},[r3],r10 123 VST1.8 {d0},[r9],r10 124 VST1.8 {d0},[r3],r10 125 VST1.8 {d0},[r9],r10 126 VST1.8 {d0},[r3],r10 127 VST1.8 {d0},[r9],r10 128 VST1.8 {d0},[r3],r10 129 VST1.8 {d0},[r9] 130 VPOP {d8-d15} 131 POP {r4-r10,pc} 132 OMX_VC_CHROMA_VERT: 133 VLD1.8 {d0},[r1] 134 MOV r0,#0 135 B L0x11c 136 OMX_VC_CHROMA_HOR: 137 ADD r9,r0,r4 138 ADD r10,r4,r4 139 VLD1.8 {d0[]},[r0],r10 140 VLD1.8 {d1[]},[r9],r10 141 VLD1.8 {d2[]},[r0],r10 142 VLD1.8 {d3[]},[r9],r10 143 VLD1.8 {d4[]},[r0],r10 144 VLD1.8 {d5[]},[r9],r10 145 VLD1.8 {d6[]},[r0],r10 146 VLD1.8 {d7[]},[r9] 147 B L0x28c 148 OMX_VC_CHROMA_PLANE: 149 ADD r9,r0,r4 150 ADD r10,r4,r4 151 VLD1.8 {d0},[r1] 152 VLD1.8 {d2[0]},[r2] 153 VLD1.8 {d1[0]},[r0],r10 154 VLD1.8 {d1[1]},[r9],r10 155 VLD1.8 {d1[2]},[r0],r10 156 VLD1.8 {d1[3]},[r9],r10 157 VLD1.8 {d1[4]},[r0],r10 158 VLD1.8 {d1[5]},[r9],r10 159 VLD1.8 {d1[6]},[r0],r10 160 VLD1.8 {d1[7]},[r9] 161 VREV64.8 d3,d0 162 VSUBL.U8 q3,d3,d2 163 VSHR.U64 d3,d3,#8 164 VSUBL.U8 q2,d3,d0 165 VREV64.8 d3,d1 166 VSUBL.U8 q7,d3,d2 167 VSHR.U64 d3,d3,#8 168 VSUBL.U8 q6,d3,d1 169 ADR r2, armVCM4P10_MultiplierTableChroma8x8 170 VSHL.I64 d4,d4,#16 171 VEXT.8 d9,d4,d6,#2 172 VLD1.16 {d10},[r2]! 173 VSHL.I64 d12,d12,#16 174 VEXT.8 d16,d12,d14,#2 175 VMUL.I16 d11,d9,d10 176 VMUL.I16 d3,d16,d10 177 VPADD.I16 d3,d11,d3 178 VPADDL.S16 d3,d3 179 VSHL.I32 d2,d3,#4 180 VADD.I32 d3,d3,d2 181 VLD1.16 {d10,d11},[r2] 182 VRSHR.S32 d3,d3,#5 183 VADDL.U8 q0,d0,d1 184 VDUP.16 q0,d1[3] 185 VSHL.I16 q0,q0,#4 186 VDUP.16 q2,d3[0] 187 VDUP.16 q3,d3[2] 188 VMUL.I16 q2,q2,q5 189 VMUL.I16 q3,q3,q5 190 VADD.I16 q2,q2,q0 191 VDUP.16 q0,d6[0] 192 VDUP.16 q1,d6[1] 193 VDUP.16 q4,d6[2] 194 VDUP.16 q5,d6[3] 195 VDUP.16 q6,d7[0] 196 VDUP.16 q7,d7[1] 197 VDUP.16 q8,d7[2] 198 VDUP.16 q9,d7[3] 199 VADD.I16 q0,q2,q0 200 VADD.I16 q1,q2,q1 201 VADD.I16 q4,q2,q4 202 VADD.I16 q5,q2,q5 203 VADD.I16 q6,q2,q6 204 VADD.I16 q7,q2,q7 205 VADD.I16 q8,q2,q8 206 VADD.I16 q9,q2,q9 207 VQRSHRUN.S16 d0,q0,#5 208 VQRSHRUN.S16 d1,q1,#5 209 VQRSHRUN.S16 d2,q4,#5 210 VQRSHRUN.S16 d3,q5,#5 211 VQRSHRUN.S16 d4,q6,#5 212 VQRSHRUN.S16 d5,q7,#5 213 VQRSHRUN.S16 d6,q8,#5 214 VQRSHRUN.S16 d7,q9,#5 215 L0x28c: 216 ADD r9,r3,r5 217 ADD r10,r5,r5 218 VST1.8 {d0},[r3],r10 219 VST1.8 {d1},[r9],r10 220 VST1.8 {d2},[r3],r10 221 VST1.8 {d3},[r9],r10 222 VST1.8 {d4},[r3],r10 223 VST1.8 {d5},[r9],r10 224 VST1.8 {d6},[r3],r10 225 VST1.8 {d7},[r9] 226 MOV r0,#0 227 VPOP {d8-d15} 228 POP {r4-r10,pc} 229 .endfunc 230 231 .end 232 233