1 ;// 2 ;// 3 ;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 19 EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 20 21 DEBUG_ON SETL {FALSE} 22 23 IF ARM1136JS 24 25 ;// Declare input registers 26 pSrc RN 0 27 srcStep RN 1 28 pDst RN 8 29 iHeight RN 9 30 31 ;// Declare inner loop registers 32 x RN 7 33 x0 RN 7 34 x1 RN 10 35 x2 RN 11 36 Scratch RN 12 37 38 ;// Function: 39 ;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 40 ;// 41 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 42 ;// destination pointed by (pDst) for horizontal interpolation. 43 ;// This function needs to copy 9 bytes in horizontal direction. 44 ;// 45 ;// Registers used as input for this function 46 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 47 ;// 48 ;// Registers preserved for top level function 49 ;// r2,r3,r4,r5,r6 50 ;// 51 ;// Registers modified by the function 52 ;// r7,r8,r9,r10,r11,r12 53 ;// 54 ;// Output registers 55 ;// r0 - pointer to the new aligned location which will be used as pSrc 56 ;// r1 - step size to this aligned location 57 58 ;// Function header 59 M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 60 61 ;// Copy pDst to scratch 62 MOV Scratch, pDst 63 64 StartAlignedStackCopy 65 AND x, pSrc, #3 66 BIC pSrc, pSrc, #3 67 68 M_SWITCH x 69 M_CASE Copy0toAligned 70 M_CASE Copy1toAligned 71 M_CASE Copy2toAligned 72 M_CASE Copy3toAligned 73 M_ENDSWITCH 74 75 Copy0toAligned 76 LDM pSrc, {x0, x1, x2} 77 SUBS iHeight, iHeight, #1 78 ADD pSrc, pSrc, srcStep 79 80 ;// One cycle stall 81 82 STM pDst!, {x0, x1, x2} ;// Store aligned output row 83 BGT Copy0toAligned 84 B CopyEnd 85 86 Copy1toAligned 87 LDM pSrc, {x0, x1, x2} 88 SUBS iHeight, iHeight, #1 89 ADD pSrc, pSrc, srcStep 90 91 ;// One cycle stall 92 93 MOV x0, x0, LSR #8 94 ORR x0, x0, x1, LSL #24 95 MOV x1, x1, LSR #8 96 ORR x1, x1, x2, LSL #24 97 MOV x2, x2, LSR #8 98 STM pDst!, {x0, x1, x2} ;// Store aligned output row 99 BGT Copy1toAligned 100 B CopyEnd 101 102 Copy2toAligned 103 LDM pSrc, {x0, x1, x2} 104 SUBS iHeight, iHeight, #1 105 ADD pSrc, pSrc, srcStep 106 107 ;// One cycle stall 108 109 MOV x0, x0, LSR #16 110 ORR x0, x0, x1, LSL #16 111 MOV x1, x1, LSR #16 112 ORR x1, x1, x2, LSL #16 113 MOV x2, x2, LSR #16 114 STM pDst!, {x0, x1, x2} ;// Store aligned output row 115 BGT Copy2toAligned 116 B CopyEnd 117 118 Copy3toAligned 119 LDM pSrc, {x0, x1, x2} 120 SUBS iHeight, iHeight, #1 121 ADD pSrc, pSrc, srcStep 122 123 ;// One cycle stall 124 125 MOV x0, x0, LSR #24 126 ORR x0, x0, x1, LSL #8 127 MOV x1, x1, LSR #24 128 ORR x1, x1, x2, LSL #8 129 MOV x2, x2, LSR #24 130 STM pDst!, {x0, x1, x2} ;// Store aligned output row 131 BGT Copy3toAligned 132 133 CopyEnd 134 135 MOV pSrc, Scratch 136 MOV srcStep, #12 137 138 M_END 139 140 141 ;// Function: 142 ;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 143 ;// 144 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 145 ;// destination pointed by (pDst) for vertical interpolation. 146 ;// This function needs to copy 4 bytes in horizontal direction 147 ;// 148 ;// Registers used as input for this function 149 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 150 ;// 151 ;// Registers preserved for top level function 152 ;// r2,r3,r4,r5,r6 153 ;// 154 ;// Registers modified by the function 155 ;// r7,r8,r9,r10,r11,r12 156 ;// 157 ;// Output registers 158 ;// r0 - pointer to the new aligned location which will be used as pSrc 159 ;// r1 - step size to this aligned location 160 161 ;// Function header 162 M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 163 164 ;// Copy pSrc to stack 165 StartVAlignedStackCopy 166 AND x, pSrc, #3 167 BIC pSrc, pSrc, #3 168 169 170 M_SWITCH x 171 M_CASE Copy0toVAligned 172 M_CASE Copy1toVAligned 173 M_CASE Copy2toVAligned 174 M_CASE Copy3toVAligned 175 M_ENDSWITCH 176 177 Copy0toVAligned 178 M_LDR x0, [pSrc], srcStep 179 SUBS iHeight, iHeight, #1 180 181 ;// One cycle stall 182 183 STR x0, [pDst], #4 ;// Store aligned output row 184 BGT Copy0toVAligned 185 B CopyVEnd 186 187 Copy1toVAligned 188 LDR x1, [pSrc, #4] 189 M_LDR x0, [pSrc], srcStep 190 SUBS iHeight, iHeight, #1 191 192 ;// One cycle stall 193 194 MOV x1, x1, LSL #24 195 ORR x0, x1, x0, LSR #8 196 STR x0, [pDst], #4 ;// Store aligned output row 197 BGT Copy1toVAligned 198 B CopyVEnd 199 200 Copy2toVAligned 201 LDR x1, [pSrc, #4] 202 M_LDR x0, [pSrc], srcStep 203 SUBS iHeight, iHeight, #1 204 205 ;// One cycle stall 206 207 MOV x1, x1, LSL #16 208 ORR x0, x1, x0, LSR #16 209 STR x0, [pDst], #4 ;// Store aligned output row 210 BGT Copy2toVAligned 211 B CopyVEnd 212 213 Copy3toVAligned 214 LDR x1, [pSrc, #4] 215 M_LDR x0, [pSrc], srcStep 216 SUBS iHeight, iHeight, #1 217 218 ;// One cycle stall 219 220 MOV x1, x1, LSL #8 221 ORR x0, x1, x0, LSR #24 222 STR x0, [pDst], #4 ;// Store aligned output row 223 BGT Copy3toVAligned 224 225 CopyVEnd 226 227 SUB pSrc, pDst, #28 228 MOV srcStep, #4 229 230 M_END 231 232 233 ENDIF 234 235 END 236 237