1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS ARM1136JS 31 32 EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 33 EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 34 35 ;// Functions: 36 ;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and 37 ;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 38 ;// 39 ;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf. 40 ;// This will do the convertion of data from 16 bit to 8 bit and it also 41 ;// remove offset and check for saturation. 42 ;// 43 ;// Registers used as input for this function 44 ;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer 45 ;// 46 ;// Registers preserved for top level function 47 ;// r4,r5,r6,r8,r9,r14 48 ;// 49 ;// Registers modified by the function 50 ;// r7,r10,r11,r12 51 ;// 52 ;// Output registers 53 ;// r0 - pointer to the destination location 54 ;// r1 - step size to this destination location 55 56 57 DEBUG_ON SETL {FALSE} 58 59 MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2 60 61 ;// Declare input registers 62 63 pSrc0 RN 0 64 srcStep0 RN 1 65 66 ;// Declare other intermediate registers 67 Temp1 RN 4 68 Temp2 RN 5 69 Temp3 RN 10 70 Temp4 RN 11 71 pBuf RN 7 72 r0x0fe00fe0 RN 6 73 r0x00ff00ff RN 12 74 Count RN 14 75 ValueA0 RN 10 76 ValueA1 RN 11 77 78 IF ARM1136JS 79 80 81 ;// Function header 82 M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6 83 84 ;// Code start 85 MOV Count, #4 86 LDR r0x0fe00fe0, =0x0fe00fe0 87 LDR r0x00ff00ff, =0x00ff00ff 88 LoopStart1 89 LDR Temp4, [pSrc0, #12] 90 LDR Temp3, [pSrc0, #8] 91 LDR Temp2, [pSrc0, #4] 92 M_LDR Temp1, [pSrc0], srcStep0 93 UQSUB16 Temp4, Temp4, r0x0fe00fe0 94 UQSUB16 Temp3, Temp3, r0x0fe00fe0 95 UQSUB16 Temp2, Temp2, r0x0fe00fe0 96 UQSUB16 Temp1, Temp1, r0x0fe00fe0 97 USAT16 Temp4, #13, Temp4 98 USAT16 Temp3, #13, Temp3 99 USAT16 Temp2, #13, Temp2 100 USAT16 Temp1, #13, Temp1 101 AND Temp4, r0x00ff00ff, Temp4, LSR #5 102 AND Temp3, r0x00ff00ff, Temp3, LSR #5 103 AND Temp2, r0x00ff00ff, Temp2, LSR #5 104 AND Temp1, r0x00ff00ff, Temp1, LSR #5 105 ORR ValueA1, Temp3, Temp4, LSL #8 106 ORR ValueA0, Temp1, Temp2, LSL #8 107 SUBS Count, Count, #1 108 STRD ValueA0, [pBuf], #8 109 BGT LoopStart1 110 End1 111 SUB pSrc0, pBuf, #32 112 MOV srcStep0, #8 113 114 M_END 115 116 117 ;// Function header 118 M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6 119 120 ;// Code start 121 LDR r0x0fe00fe0, =0x0fe00fe0 122 LDR r0x00ff00ff, =0x00ff00ff 123 MOV Count, #2 124 125 LoopStart 126 LDR Temp4, [pSrc0, #12] 127 LDR Temp3, [pSrc0, #8] 128 LDR Temp2, [pSrc0, #4] 129 M_LDR Temp1, [pSrc0], srcStep0 130 131 UQSUB16 Temp4, Temp4, r0x0fe00fe0 132 UQSUB16 Temp3, Temp3, r0x0fe00fe0 133 UQSUB16 Temp2, Temp2, r0x0fe00fe0 134 UQSUB16 Temp1, Temp1, r0x0fe00fe0 135 136 USAT16 Temp4, #13, Temp4 137 USAT16 Temp3, #13, Temp3 138 USAT16 Temp2, #13, Temp2 139 USAT16 Temp1, #13, Temp1 140 141 AND Temp4, r0x00ff00ff, Temp4, LSR #5 142 AND Temp3, r0x00ff00ff, Temp3, LSR #5 143 AND Temp2, r0x00ff00ff, Temp2, LSR #5 144 AND Temp1, r0x00ff00ff, Temp1, LSR #5 145 ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] 146 ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] 147 148 PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] 149 150 STR Temp1, [pBuf], #8 151 PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] 152 STR Temp2, [pBuf], #-4 153 154 LDR Temp4, [pSrc0, #12] 155 LDR Temp3, [pSrc0, #8] 156 LDR Temp2, [pSrc0, #4] 157 M_LDR Temp1, [pSrc0], srcStep0 158 159 UQSUB16 Temp4, Temp4, r0x0fe00fe0 160 UQSUB16 Temp3, Temp3, r0x0fe00fe0 161 UQSUB16 Temp2, Temp2, r0x0fe00fe0 162 UQSUB16 Temp1, Temp1, r0x0fe00fe0 163 164 USAT16 Temp4, #13, Temp4 165 USAT16 Temp3, #13, Temp3 166 USAT16 Temp2, #13, Temp2 167 USAT16 Temp1, #13, Temp1 168 169 AND Temp4, r0x00ff00ff, Temp4, LSR #5 170 AND Temp3, r0x00ff00ff, Temp3, LSR #5 171 AND Temp2, r0x00ff00ff, Temp2, LSR #5 172 AND Temp1, r0x00ff00ff, Temp1, LSR #5 173 ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] 174 ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] 175 176 PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] 177 SUBS Count, Count, #1 178 STR Temp1, [pBuf], #8 179 PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] 180 STR Temp2, [pBuf], #4 181 182 BGT LoopStart 183 End2 184 SUB pSrc0, pBuf, #32-8 185 MOV srcStep0, #4 186 187 M_END 188 189 ENDIF 190 191 END 192 193