1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS ARM1136JS 31 32 EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 33 EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 34 35 DEBUG_ON SETL {FALSE} 36 37 IF ARM1136JS 38 39 ;// Declare input registers 40 pSrc RN 0 41 srcStep RN 1 42 pDst RN 8 43 iHeight RN 9 44 45 ;// Declare inner loop registers 46 x RN 7 47 x0 RN 7 48 x1 RN 10 49 x2 RN 11 50 Scratch RN 12 51 52 ;// Function: 53 ;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 54 ;// 55 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 56 ;// destination pointed by (pDst) for horizontal interpolation. 57 ;// This function needs to copy 9 bytes in horizontal direction. 58 ;// 59 ;// Registers used as input for this function 60 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 61 ;// 62 ;// Registers preserved for top level function 63 ;// r2,r3,r4,r5,r6 64 ;// 65 ;// Registers modified by the function 66 ;// r7,r8,r9,r10,r11,r12 67 ;// 68 ;// Output registers 69 ;// r0 - pointer to the new aligned location which will be used as pSrc 70 ;// r1 - step size to this aligned location 71 72 ;// Function header 73 M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 74 75 ;// Copy pDst to scratch 76 MOV Scratch, pDst 77 78 StartAlignedStackCopy 79 AND x, pSrc, #3 80 BIC pSrc, pSrc, #3 81 82 M_SWITCH x 83 M_CASE Copy0toAligned 84 M_CASE Copy1toAligned 85 M_CASE Copy2toAligned 86 M_CASE Copy3toAligned 87 M_ENDSWITCH 88 89 Copy0toAligned 90 LDM pSrc, {x0, x1, x2} 91 SUBS iHeight, iHeight, #1 92 ADD pSrc, pSrc, srcStep 93 94 ;// One cycle stall 95 96 STM pDst!, {x0, x1, x2} ;// Store aligned output row 97 BGT Copy0toAligned 98 B CopyEnd 99 100 Copy1toAligned 101 LDM pSrc, {x0, x1, x2} 102 SUBS iHeight, iHeight, #1 103 ADD pSrc, pSrc, srcStep 104 105 ;// One cycle stall 106 107 MOV x0, x0, LSR #8 108 ORR x0, x0, x1, LSL #24 109 MOV x1, x1, LSR #8 110 ORR x1, x1, x2, LSL #24 111 MOV x2, x2, LSR #8 112 STM pDst!, {x0, x1, x2} ;// Store aligned output row 113 BGT Copy1toAligned 114 B CopyEnd 115 116 Copy2toAligned 117 LDM pSrc, {x0, x1, x2} 118 SUBS iHeight, iHeight, #1 119 ADD pSrc, pSrc, srcStep 120 121 ;// One cycle stall 122 123 MOV x0, x0, LSR #16 124 ORR x0, x0, x1, LSL #16 125 MOV x1, x1, LSR #16 126 ORR x1, x1, x2, LSL #16 127 MOV x2, x2, LSR #16 128 STM pDst!, {x0, x1, x2} ;// Store aligned output row 129 BGT Copy2toAligned 130 B CopyEnd 131 132 Copy3toAligned 133 LDM pSrc, {x0, x1, x2} 134 SUBS iHeight, iHeight, #1 135 ADD pSrc, pSrc, srcStep 136 137 ;// One cycle stall 138 139 MOV x0, x0, LSR #24 140 ORR x0, x0, x1, LSL #8 141 MOV x1, x1, LSR #24 142 ORR x1, x1, x2, LSL #8 143 MOV x2, x2, LSR #24 144 STM pDst!, {x0, x1, x2} ;// Store aligned output row 145 BGT Copy3toAligned 146 147 CopyEnd 148 149 MOV pSrc, Scratch 150 MOV srcStep, #12 151 152 M_END 153 154 155 ;// Function: 156 ;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 157 ;// 158 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 159 ;// destination pointed by (pDst) for vertical interpolation. 160 ;// This function needs to copy 4 bytes in horizontal direction 161 ;// 162 ;// Registers used as input for this function 163 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 164 ;// 165 ;// Registers preserved for top level function 166 ;// r2,r3,r4,r5,r6 167 ;// 168 ;// Registers modified by the function 169 ;// r7,r8,r9,r10,r11,r12 170 ;// 171 ;// Output registers 172 ;// r0 - pointer to the new aligned location which will be used as pSrc 173 ;// r1 - step size to this aligned location 174 175 ;// Function header 176 M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 177 178 ;// Copy pSrc to stack 179 StartVAlignedStackCopy 180 AND x, pSrc, #3 181 BIC pSrc, pSrc, #3 182 183 184 M_SWITCH x 185 M_CASE Copy0toVAligned 186 M_CASE Copy1toVAligned 187 M_CASE Copy2toVAligned 188 M_CASE Copy3toVAligned 189 M_ENDSWITCH 190 191 Copy0toVAligned 192 M_LDR x0, [pSrc], srcStep 193 SUBS iHeight, iHeight, #1 194 195 ;// One cycle stall 196 197 STR x0, [pDst], #4 ;// Store aligned output row 198 BGT Copy0toVAligned 199 B CopyVEnd 200 201 Copy1toVAligned 202 LDR x1, [pSrc, #4] 203 M_LDR x0, [pSrc], srcStep 204 SUBS iHeight, iHeight, #1 205 206 ;// One cycle stall 207 208 MOV x1, x1, LSL #24 209 ORR x0, x1, x0, LSR #8 210 STR x0, [pDst], #4 ;// Store aligned output row 211 BGT Copy1toVAligned 212 B CopyVEnd 213 214 Copy2toVAligned 215 LDR x1, [pSrc, #4] 216 M_LDR x0, [pSrc], srcStep 217 SUBS iHeight, iHeight, #1 218 219 ;// One cycle stall 220 221 MOV x1, x1, LSL #16 222 ORR x0, x1, x0, LSR #16 223 STR x0, [pDst], #4 ;// Store aligned output row 224 BGT Copy2toVAligned 225 B CopyVEnd 226 227 Copy3toVAligned 228 LDR x1, [pSrc, #4] 229 M_LDR x0, [pSrc], srcStep 230 SUBS iHeight, iHeight, #1 231 232 ;// One cycle stall 233 234 MOV x1, x1, LSL #8 235 ORR x0, x1, x0, LSR #24 236 STR x0, [pDst], #4 ;// Store aligned output row 237 BGT Copy3toVAligned 238 239 CopyVEnd 240 241 SUB pSrc, pDst, #28 242 MOV srcStep, #4 243 244 M_END 245 246 247 ENDIF 248 249 END 250 251