1 ;// 2 ;// 3 ;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 12290 6 ;// Date: Wednesday, April 9, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 ;// Functions: 15 ;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe 16 ;// 17 ;// Implements Average of 4x4 with equation c = (a+b+1)>>1. 18 ;// First operand will be at offset ALIGNMENT from aligned address 19 ;// Second operand will be at aligned location and will be used as output. 20 ;// destination pointed by (pDst) for vertical interpolation. 21 ;// This function needs to copy 4 bytes in horizontal direction 22 ;// 23 ;// Registers used as input for this function 24 ;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size 25 ;// 26 ;// Registers preserved for top level function 27 ;// r4,r5,r6,r8,r9,r14 28 ;// 29 ;// Registers modified by the function 30 ;// r7,r10,r11,r12 31 ;// 32 ;// Output registers 33 ;// r2 - pointer to the aligned location 34 ;// r3 - step size to this aligned location 35 36 INCLUDE omxtypes_s.h 37 INCLUDE armCOMM_s.h 38 39 M_VARIANTS ARM1136JS 40 41 EXPORT armVCM4P10_Average_4x4_Align0_unsafe 42 EXPORT armVCM4P10_Average_4x4_Align2_unsafe 43 EXPORT armVCM4P10_Average_4x4_Align3_unsafe 44 45 DEBUG_ON SETL {FALSE} 46 47 ;// Declare input registers 48 pPred0 RN 0 49 iPredStep0 RN 1 50 pPred1 RN 2 51 iPredStep1 RN 3 52 pDstPred RN 2 53 iDstStep RN 3 54 55 ;// Declare other intermediate registers 56 iPredA0 RN 10 57 iPredA1 RN 11 58 iPredB0 RN 12 59 iPredB1 RN 14 60 Temp1 RN 4 61 Temp2 RN 5 62 ResultA RN 5 63 ResultB RN 4 64 r0x80808080 RN 7 65 66 IF ARM1136JS 67 68 ;// This function calculates average of 4x4 block 69 ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 70 71 ;// Function header 72 M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 73 74 ;// Code start 75 LDR r0x80808080, =0x80808080 76 77 ;// 1st load 78 M_LDR iPredB0, [pPred1] 79 M_LDR iPredA0, [pPred0], iPredStep0 80 M_LDR iPredB1, [pPred1, iPredStep1] 81 M_LDR iPredA1, [pPred0], iPredStep0 82 83 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 84 MVN iPredB0, iPredB0 85 MVN iPredB1, iPredB1 86 UHSUB8 ResultA, iPredA0, iPredB0 87 UHSUB8 ResultB, iPredA1, iPredB1 88 EOR ResultA, ResultA, r0x80808080 89 M_STR ResultA, [pDstPred], iDstStep 90 EOR ResultB, ResultB, r0x80808080 91 M_STR ResultB, [pDstPred], iDstStep 92 93 ;// 2nd load 94 M_LDR iPredA0, [pPred0], iPredStep0 95 M_LDR iPredB0, [pPred1] 96 M_LDR iPredA1, [pPred0], iPredStep0 97 M_LDR iPredB1, [pPred1, iPredStep1] 98 99 MVN iPredB0, iPredB0 100 UHSUB8 ResultA, iPredA0, iPredB0 101 MVN iPredB1, iPredB1 102 UHSUB8 ResultB, iPredA1, iPredB1 103 EOR ResultA, ResultA, r0x80808080 104 M_STR ResultA, [pDstPred], iDstStep 105 EOR ResultB, ResultB, r0x80808080 106 M_STR ResultB, [pDstPred], iDstStep 107 End0 108 M_END 109 110 ;// This function calculates average of 4x4 block 111 ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 112 113 ;// Function header 114 M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 115 116 ;// Code start 117 LDR r0x80808080, =0x80808080 118 119 ;// 1st load 120 LDR Temp1, [pPred0, #4] 121 M_LDR iPredA0, [pPred0], iPredStep0 122 M_LDR iPredB0, [pPred1] 123 M_LDR iPredB1, [pPred1, iPredStep1] 124 M_LDR Temp2, [pPred0, #4] 125 M_LDR iPredA1, [pPred0], iPredStep0 126 MVN iPredB0, iPredB0 127 MVN iPredB1, iPredB1 128 MOV iPredA0, iPredA0, LSR #16 129 ORR iPredA0, iPredA0, Temp1, LSL #16 130 MOV iPredA1, iPredA1, LSR #16 131 ORR iPredA1, iPredA1, Temp2, LSL #16 132 133 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 134 UHSUB8 ResultA, iPredA0, iPredB0 135 UHSUB8 ResultB, iPredA1, iPredB1 136 EOR ResultA, ResultA, r0x80808080 137 M_STR ResultA, [pDstPred], iDstStep 138 EOR ResultB, ResultB, r0x80808080 139 M_STR ResultB, [pDstPred], iDstStep 140 141 ;// 2nd load 142 LDR Temp1, [pPred0, #4] 143 M_LDR iPredA0, [pPred0], iPredStep0 144 LDR iPredB0, [pPred1] 145 LDR iPredB1, [pPred1, iPredStep1] 146 LDR Temp2, [pPred0, #4] 147 M_LDR iPredA1, [pPred0], iPredStep0 148 MVN iPredB0, iPredB0 149 MVN iPredB1, iPredB1 150 MOV iPredA0, iPredA0, LSR #16 151 ORR iPredA0, iPredA0, Temp1, LSL #16 152 MOV iPredA1, iPredA1, LSR #16 153 ORR iPredA1, iPredA1, Temp2, LSL #16 154 155 UHSUB8 ResultA, iPredA0, iPredB0 156 UHSUB8 ResultB, iPredA1, iPredB1 157 EOR ResultA, ResultA, r0x80808080 158 M_STR ResultA, [pDstPred], iDstStep 159 EOR ResultB, ResultB, r0x80808080 160 M_STR ResultB, [pDstPred], iDstStep 161 End2 162 M_END 163 164 165 ;// This function calculates average of 4x4 block 166 ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 167 168 ;// Function header 169 M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 170 171 ;// Code start 172 LDR r0x80808080, =0x80808080 173 174 ;// 1st load 175 LDR Temp1, [pPred0, #4] 176 M_LDR iPredA0, [pPred0], iPredStep0 177 LDR iPredB0, [pPred1] 178 LDR iPredB1, [pPred1, iPredStep1] 179 LDR Temp2, [pPred0, #4] 180 M_LDR iPredA1, [pPred0], iPredStep0 181 182 MVN iPredB0, iPredB0 183 MVN iPredB1, iPredB1 184 MOV iPredA0, iPredA0, LSR #24 185 ORR iPredA0, iPredA0, Temp1, LSL #8 186 MOV iPredA1, iPredA1, LSR #24 187 ORR iPredA1, iPredA1, Temp2, LSL #8 188 UHSUB8 ResultA, iPredA0, iPredB0 189 UHSUB8 ResultB, iPredA1, iPredB1 190 EOR ResultA, ResultA, r0x80808080 191 M_STR ResultA, [pDstPred], iDstStep 192 EOR ResultB, ResultB, r0x80808080 193 M_STR ResultB, [pDstPred], iDstStep 194 195 ;// 2nd load 196 LDR Temp1, [pPred0, #4] 197 M_LDR iPredA0, [pPred0], iPredStep0 198 LDR iPredB0, [pPred1] 199 LDR iPredB1, [pPred1, iPredStep1] 200 LDR Temp2, [pPred0, #4] 201 M_LDR iPredA1, [pPred0], iPredStep0 202 203 MVN iPredB0, iPredB0 204 MVN iPredB1, iPredB1 205 MOV iPredA0, iPredA0, LSR #24 206 ORR iPredA0, iPredA0, Temp1, LSL #8 207 MOV iPredA1, iPredA1, LSR #24 208 ORR iPredA1, iPredA1, Temp2, LSL #8 209 210 UHSUB8 ResultA, iPredA0, iPredB0 211 UHSUB8 ResultB, iPredA1, iPredB1 212 EOR ResultA, ResultA, r0x80808080 213 M_STR ResultA, [pDstPred], iDstStep 214 EOR ResultB, ResultB, r0x80808080 215 M_STR ResultB, [pDstPred], iDstStep 216 End3 217 M_END 218 219 ENDIF 220 221 END 222