1 /* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 /* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 27 .text 28 .align 4 29 30 armVCM4P10_WidthBranchTableMVIsNotZero: 31 .word WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8) 32 .word WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8) 33 .word WidthIs8MVIsNotZero-(P0+8) 34 35 armVCM4P10_WidthBranchTableMVIsZero: 36 .word WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8) 37 .word WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8) 38 .word WidthIs8MVIsZero-(P0+8) 39 40 .global armVCM4P10_Interpolate_Chroma 41 armVCM4P10_Interpolate_Chroma: 42 PUSH {r4-r12,lr} 43 VPUSH {d8-d15} 44 LDRD r6,r7,[sp,#0x70] 45 LDRD r4,r5,[sp,#0x68] 46 RSB r8,r6,#8 47 RSB r9,r7,#8 48 CMN r6,r7 49 MOV r10,#1 50 ADREQ r11, armVCM4P10_WidthBranchTableMVIsZero 51 SUB lr,r1,r10 52 ADRNE r11, armVCM4P10_WidthBranchTableMVIsNotZero 53 VLD1.8 {d0},[r0],r10 54 SMULBB r12,r8,r9 55 SMULBB r9,r6,r9 56 VLD1.8 {d1},[r0],lr 57 SMULBB r8,r8,r7 58 SMULBB r6,r6,r7 59 VDUP.8 d12,r12 60 VDUP.8 d13,r9 61 VDUP.8 d14,r8 62 VDUP.8 d15,r6 63 LDR r11,[r11, r4, lsl #1] 64 P0: ADD pc,r11 65 66 WidthIs8MVIsNotZero: 67 VLD1.8 {d2},[r0],r10 68 VMULL.U8 q2,d0,d12 69 VLD1.8 {d3},[r0],lr 70 VMULL.U8 q3,d2,d12 71 VLD1.8 {d16},[r0],r10 72 VMLAL.U8 q2,d1,d13 73 VLD1.8 {d17},[r0],lr 74 VMULL.U8 q11,d16,d12 75 VMLAL.U8 q3,d3,d13 76 VLD1.8 {d18},[r0],r10 77 VMLAL.U8 q2,d2,d14 78 VMLAL.U8 q11,d17,d13 79 VMULL.U8 q12,d18,d12 80 VLD1.8 {d19},[r0],lr 81 VMLAL.U8 q3,d16,d14 82 VLD1.8 {d0},[r0],r10 83 VMLAL.U8 q12,d19,d13 84 VMLAL.U8 q11,d18,d14 85 VMLAL.U8 q2,d3,d15 86 VLD1.8 {d1},[r0],lr 87 VMLAL.U8 q12,d0,d14 88 VMLAL.U8 q3,d17,d15 89 VMLAL.U8 q11,d19,d15 90 SUBS r5,r5,#4 91 VMLAL.U8 q12,d1,d15 92 VQRSHRN.U16 d8,q2,#6 93 VQRSHRN.U16 d9,q3,#6 94 VQRSHRN.U16 d20,q11,#6 95 VST1.64 {d8},[r2],r3 96 VQRSHRN.U16 d21,q12,#6 97 VST1.64 {d9},[r2],r3 98 VST1.64 {d20},[r2],r3 99 VST1.64 {d21},[r2],r3 100 BGT WidthIs8MVIsNotZero 101 MOV r0,#0 102 VPOP {d8-d15} 103 POP {r4-r12,pc} 104 105 WidthIs4MVIsNotZero: 106 VLD1.8 {d2},[r0],r10 107 VMULL.U8 q2,d0,d12 108 VMULL.U8 q3,d2,d12 109 VLD1.8 {d3},[r0],lr 110 VMLAL.U8 q2,d1,d13 111 VMLAL.U8 q3,d3,d13 112 VLD1.8 {d0},[r0],r10 113 VMLAL.U8 q2,d2,d14 114 VMLAL.U8 q3,d0,d14 115 VLD1.8 {d1},[r0],lr 116 SUBS r5,r5,#2 117 VMLAL.U8 q3,d1,d15 118 VMLAL.U8 q2,d3,d15 119 VQRSHRN.U16 d9,q3,#6 120 VQRSHRN.U16 d8,q2,#6 121 VST1.32 {d8[0]},[r2],r3 122 VST1.32 {d9[0]},[r2],r3 123 BGT WidthIs4MVIsNotZero 124 MOV r0,#0 125 VPOP {d8-d15} 126 POP {r4-r12,pc} 127 128 WidthIs2MVIsNotZero: 129 VLD1.8 {d2},[r0],r10 130 VMULL.U8 q2,d0,d12 131 VMULL.U8 q3,d2,d12 132 VLD1.8 {d3},[r0],lr 133 VMLAL.U8 q2,d1,d13 134 VMLAL.U8 q3,d3,d13 135 VLD1.8 {d0},[r0],r10 136 VMLAL.U8 q2,d2,d14 137 VMLAL.U8 q3,d0,d14 138 VLD1.8 {d1},[r0],lr 139 SUBS r5,r5,#2 140 VMLAL.U8 q3,d1,d15 141 VMLAL.U8 q2,d3,d15 142 VQRSHRN.U16 d9,q3,#6 143 VQRSHRN.U16 d8,q2,#6 144 VST1.16 {d8[0]},[r2],r3 145 VST1.16 {d9[0]},[r2],r3 146 BGT WidthIs2MVIsNotZero 147 MOV r0,#0 148 VPOP {d8-d15} 149 POP {r4-r12,pc} 150 151 WidthIs8MVIsZero: 152 SUB r0,r0,r1 153 WidthIs8LoopMVIsZero: 154 VLD1.8 {d0},[r0],r1 155 SUBS r5,r5,#2 156 VLD1.8 {d1},[r0],r1 157 VST1.64 {d0},[r2],r3 158 VST1.64 {d1},[r2],r3 159 BGT WidthIs8LoopMVIsZero 160 MOV r0,#0 161 VPOP {d8-d15} 162 POP {r4-r12,pc} 163 164 WidthIs4MVIsZero: 165 VLD1.8 {d1},[r0],r1 166 SUBS r5,r5,#2 167 VST1.32 {d0[0]},[r2],r3 168 VLD1.8 {d0},[r0],r1 169 VST1.32 {d1[0]},[r2],r3 170 BGT WidthIs4MVIsZero 171 MOV r0,#0 172 VPOP {d8-d15} 173 POP {r4-r12,pc} 174 175 WidthIs2MVIsZero: 176 VLD1.8 {d1},[r0],r1 177 SUBS r5,r5,#2 178 VST1.16 {d0[0]},[r2],r3 179 VLD1.8 {d0},[r0],r1 180 VST1.16 {d1[0]},[r2],r3 181 BGT WidthIs2MVIsZero 182 MOV r0,#0 183 VPOP {d8-d15} 184 POP {r4-r12,pc} 185 186 .end 187 188