1 ;// 2 ;// 3 ;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 12290 6 ;// Date: Wednesday, April 9, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 IMPORT armVCM4P10_QPDivTable 18 IMPORT armVCM4P10_VMatrixQPModTable 19 20 M_VARIANTS CortexA8 21 22 23 24 25 IF CortexA8 26 27 ;// ARM Registers 28 ;//-------------------------------------- 29 ;// Declare input registers 30 ;//-------------------------------------- 31 ppSrc RN 0 32 pDst RN 1 33 QP RN 2 34 35 ;//-------------------------------- 36 ;// Scratch variable for Unpack2x2 37 ;//-------------------------------- 38 pSrc RN 9 39 Value RN 4 40 Value2 RN 5 41 Flag RN 6 42 strOffset RN 7 43 cstOffset RN 8 44 45 ;//-------------------------------- 46 ;// Scratch variable 47 ;//-------------------------------- 48 r0w0 RN 3 49 r0w1 RN 4 50 51 c0w0 RN 5 52 c1w0 RN 6 53 54 return RN 0 55 pQPDivTable RN 5 56 pQPModTable RN 6 57 Shift RN 9 58 Scale RN 2 59 60 61 62 ;// Neon Registers 63 64 dZero DN D0.U16 65 dInvTrCoeff DN D0.S16 66 dScale DN D1.S16 67 qDqntCoeff QN Q1.S32 68 dDqntCoeff DN D2.S16 69 70 71 ;// Write function header 72 M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 73 74 LDR pSrc, [ppSrc] ;// Load pSrc 75 VMOV dZero, #0 76 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 77 78 ;//----------------------------------------------------------------------- 79 ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero 80 ;//----------------------------------------------------------------------- 81 82 VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 83 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 84 85 86 unpackLoop 87 TST Flag, #0x10 ;// Computing (Flag & 0x10) 88 LDRSBNE Value2,[pSrc,#1] 89 LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access 90 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 91 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 92 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 93 94 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 95 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 96 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 97 BEQ unpackLoop ;// Branch to the loop beginning 98 99 ;//-------------------------------------------------- 100 ;//InvTransformDC2x2: Inlined (Implemented in ARM V6) 101 ;//-------------------------------------------------- 102 103 LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| 104 105 STR pSrc, [ppSrc] ;// Update the bitstream pointer 106 107 LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer 108 LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer 109 110 SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] 111 SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] 112 113 LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] 114 LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] 115 116 SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] 117 SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] 118 119 ;//------------------------------------------------- 120 ;//DequantChromaDC2x2: Inlined (Neon Implementation) 121 ;//------------------------------------------------- 122 123 LSL Scale, Scale, Shift ;// Scale = Scale << Shift 124 VMOV dInvTrCoeff, c0w0, c1w0 125 VREV32 dInvTrCoeff,dInvTrCoeff 126 VDUP dScale,Scale 127 128 VMULL qDqntCoeff,dInvTrCoeff,dScale 129 VSHRN dDqntCoeff,qDqntCoeff,#1 130 131 132 VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once 133 134 MOV return, #OMX_Sts_NoErr 135 M_END 136 137 ENDIF ;// CortexA8 138 139 140 END 141