1 /* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 /* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 27 .text 28 .align 4 29 30 armVCM4P10_pSwitchTable4x4: 31 .word OMX_VC_4x4_VERT-(P0+8), OMX_VC_4x4_HOR-(P0+8) 32 .word OMX_VC_4x4_DC-(P0+8), OMX_VC_4x4_DIAG_DL-(P0+8) 33 .word OMX_VC_4x4_DIAG_DR-(P0+8), OMX_VC_4x4_VR-(P0+8) 34 .word OMX_VC_4x4_HD-(P0+8), OMX_VC_4x4_VL-(P0+8) 35 .word OMX_VC_4x4_HU-(P0+8) 36 37 .global omxVCM4P10_PredictIntra_4x4 38 omxVCM4P10_PredictIntra_4x4: 39 PUSH {r4-r12,lr} 40 VPUSH {d8-d12} 41 ADR r8, armVCM4P10_pSwitchTable4x4 42 LDRD r6,r7,[sp,#0x58] 43 LDRD r4,r5,[sp,#0x50] 44 LDR r8,[r8,r6,LSL #2] 45 P0: ADD pc, r8 46 47 OMX_VC_4x4_HOR: 48 ADD r9,r0,r4 49 ADD r10,r4,r4 50 VLD1.8 {d0[]},[r0],r10 51 VLD1.8 {d1[]},[r9],r10 52 VLD1.8 {d2[]},[r0] 53 VLD1.8 {d3[]},[r9] 54 ADD r11,r3,r5 55 ADD r12,r5,r5 56 VST1.32 {d0[0]},[r3],r12 57 VST1.32 {d1[0]},[r11],r12 58 VST1.32 {d2[0]},[r3] 59 VST1.32 {d3[0]},[r11] 60 B L0x348 61 OMX_VC_4x4_VERT: 62 VLD1.32 {d0[0]},[r1] 63 ADD r11,r3,r5 64 ADD r12,r5,r5 65 L0x58: 66 VST1.32 {d0[0]},[r3],r12 67 VST1.32 {d0[0]},[r11],r12 68 VST1.32 {d0[0]},[r3] 69 VST1.32 {d0[0]},[r11] 70 B L0x348 71 OMX_VC_4x4_DC: 72 TST r7,#2 73 BEQ L0xdc 74 ADD r9,r0,r4 75 ADD r10,r4,r4 76 VLD1.8 {d0[0]},[r0],r10 77 VLD1.8 {d0[1]},[r9],r10 78 VLD1.8 {d0[2]},[r0] 79 VLD1.8 {d0[3]},[r9] 80 TST r7,#1 81 BEQ L0xbc 82 VLD1.32 {d0[1]},[r1] 83 MOV r0,#0 84 VPADDL.U8 d1,d0 85 VPADDL.U16 d1,d1 86 VPADDL.U32 d1,d1 87 VRSHR.U64 d1,d1,#3 88 ADD r11,r3,r5 89 ADD r12,r5,r5 90 VDUP.8 d0,d1[0] 91 B L0x58 92 L0xbc: 93 MOV r0,#0 94 VPADDL.U8 d1,d0 95 VPADDL.U16 d1,d1 96 VRSHR.U32 d1,d1,#2 97 ADD r11,r3,r5 98 ADD r12,r5,r5 99 VDUP.8 d0,d1[0] 100 B L0x58 101 L0xdc: 102 TST r7,#1 103 BEQ L0x108 104 VLD1.32 {d0[0]},[r1] 105 MOV r0,#0 106 VPADDL.U8 d1,d0 107 VPADDL.U16 d1,d1 108 VRSHR.U32 d1,d1,#2 109 ADD r11,r3,r5 110 ADD r12,r5,r5 111 VDUP.8 d0,d1[0] 112 B L0x58 113 L0x108: 114 VMOV.I8 d0,#0x80 115 MOV r0,#0 116 ADD r11,r3,r5 117 ADD r12,r5,r5 118 B L0x58 119 OMX_VC_4x4_DIAG_DL: 120 TST r7,#0x40 121 BEQ L0x138 122 VLD1.8 {d3},[r1] 123 VDUP.8 d2,d3[7] 124 VEXT.8 d4,d3,d2,#1 125 VEXT.8 d5,d3,d2,#2 126 B L0x14c 127 L0x138: 128 VLD1.32 {d0[1]},[r1] 129 VDUP.8 d2,d0[7] 130 VEXT.8 d3,d0,d2,#4 131 VEXT.8 d4,d0,d2,#5 132 VEXT.8 d5,d0,d2,#6 133 L0x14c: 134 VHADD.U8 d6,d3,d5 135 VRHADD.U8 d6,d6,d4 136 VST1.32 {d6[0]},[r3],r5 137 VEXT.8 d6,d6,d6,#1 138 VST1.32 {d6[0]},[r3],r5 139 VEXT.8 d6,d6,d6,#1 140 VST1.32 {d6[0]},[r3],r5 141 VEXT.8 d6,d6,d6,#1 142 VST1.32 {d6[0]},[r3] 143 B L0x348 144 OMX_VC_4x4_DIAG_DR: 145 VLD1.32 {d0[0]},[r1] 146 VLD1.8 {d1[7]},[r2] 147 ADD r9,r0,r4 148 ADD r10,r4,r4 149 ADD r1,r3,r5 150 VLD1.8 {d1[6]},[r0],r10 151 VLD1.8 {d1[5]},[r9],r10 152 VLD1.8 {d1[4]},[r0] 153 VLD1.8 {d1[3]},[r9] 154 VEXT.8 d3,d1,d0,#3 155 ADD r4,r1,r5 156 VEXT.8 d4,d1,d0,#4 157 ADD r6,r4,r5 158 VEXT.8 d5,d1,d0,#5 159 VHADD.U8 d6,d3,d5 160 VRHADD.U8 d6,d6,d4 161 VST1.32 {d6[0]},[r6] 162 VEXT.8 d6,d6,d6,#1 163 VST1.32 {d6[0]},[r4] 164 VEXT.8 d6,d6,d6,#1 165 VST1.32 {d6[0]},[r1] 166 VEXT.8 d6,d6,d6,#1 167 VST1.32 {d6[0]},[r3] 168 B L0x348 169 OMX_VC_4x4_VR: 170 VLD1.32 {d0[0]},[r1] 171 VLD1.8 {d0[7]},[r2] 172 VLD1.8 {d1[7]},[r0],r4 173 VLD1.8 {d2[7]},[r0],r4 174 VLD1.8 {d1[6]},[r0] 175 VEXT.8 d12,d0,d0,#7 176 VEXT.8 d3,d1,d12,#6 177 VEXT.8 d4,d2,d12,#7 178 VEXT.8 d5,d1,d0,#7 179 VEXT.8 d6,d2,d0,#7 180 VEXT.8 d11,d1,d12,#7 181 VHADD.U8 d8,d6,d12 182 VRHADD.U8 d8,d8,d11 183 VHADD.U8 d7,d3,d5 184 VRHADD.U8 d7,d7,d4 185 VEXT.8 d10,d8,d8,#1 186 ADD r11,r3,r5 187 ADD r12,r5,r5 188 VEXT.8 d9,d7,d7,#1 189 VST1.32 {d10[0]},[r3],r12 190 VST1.32 {d9[0]},[r11],r12 191 VST1.32 {d8[0]},[r3],r12 192 VST1.32 {d7[0]},[r11] 193 B L0x348 194 OMX_VC_4x4_HD: 195 VLD1.8 {d0},[r1] 196 VLD1.8 {d1[7]},[r2] 197 ADD r9,r0,r4 198 ADD r10,r4,r4 199 VLD1.8 {d1[6]},[r0],r10 200 VLD1.8 {d1[5]},[r9],r10 201 VLD1.8 {d1[4]},[r0] 202 VLD1.8 {d1[3]},[r9] 203 VEXT.8 d3,d1,d0,#3 204 VEXT.8 d4,d1,d0,#2 205 VEXT.8 d5,d1,d0,#1 206 VHADD.U8 d7,d3,d5 207 VRHADD.U8 d7,d7,d4 208 VRHADD.U8 d8,d4,d3 209 VSHL.I64 d8,d8,#24 210 VSHL.I64 d6,d7,#16 211 VZIP.8 d8,d6 212 VEXT.8 d7,d7,d7,#6 213 VEXT.8 d8,d6,d7,#2 214 ADD r11,r3,r5 215 ADD r12,r5,r5 216 VST1.32 {d8[1]},[r3],r12 217 VST1.32 {d6[1]},[r11],r12 218 VST1.32 {d8[0]},[r3] 219 VST1.32 {d6[0]},[r11] 220 B L0x348 221 OMX_VC_4x4_VL: 222 TST r7,#0x40 223 BEQ L0x2b4 224 VLD1.8 {d3},[r1] 225 VEXT.8 d4,d3,d3,#1 226 VEXT.8 d5,d4,d4,#1 227 B L0x2c8 228 L0x2b4: 229 VLD1.32 {d0[1]},[r1] 230 VDUP.8 d2,d0[7] 231 VEXT.8 d3,d0,d2,#4 232 VEXT.8 d4,d0,d2,#5 233 VEXT.8 d5,d0,d2,#6 234 L0x2c8: 235 VRHADD.U8 d7,d4,d3 236 VHADD.U8 d10,d3,d5 237 VRHADD.U8 d10,d10,d4 238 VEXT.8 d8,d7,d7,#1 239 ADD r11,r3,r5 240 ADD r12,r5,r5 241 VEXT.8 d9,d10,d8,#1 242 VST1.32 {d7[0]},[r3],r12 243 VST1.32 {d10[0]},[r11],r12 244 VST1.32 {d8[0]},[r3] 245 VST1.32 {d9[0]},[r11] 246 B L0x348 247 OMX_VC_4x4_HU: 248 ADD r9,r0,r4 249 ADD r10,r4,r4 250 VLD1.8 {d1[4]},[r0],r10 251 VLD1.8 {d1[5]},[r9],r10 252 VLD1.8 {d1[6]},[r0] 253 VLD1.8 {d1[7]},[r9] 254 VDUP.8 d2,d1[7] 255 VEXT.8 d3,d1,d2,#4 256 VEXT.8 d4,d1,d2,#5 257 VEXT.8 d5,d1,d2,#6 258 VHADD.U8 d7,d3,d5 259 VRHADD.U8 d7,d7,d4 260 VRHADD.U8 d8,d4,d3 261 VZIP.8 d8,d7 262 VST1.32 {d8[0]},[r3],r5 263 VEXT.8 d8,d8,d8,#2 264 VST1.32 {d8[0]},[r3],r5 265 VEXT.8 d8,d8,d8,#2 266 VST1.32 {d8[0]},[r3],r5 267 VST1.32 {d7[0]},[r3] 268 L0x348: 269 MOV r0,#0 270 VPOP {d8-d12} 271 POP {r4-r12,pc} 272 273 .end 274