1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 IMPORT armVCM4P10_QPDivTable 32 IMPORT armVCM4P10_VMatrixQPModTable 33 34 M_VARIANTS ARM1136JS 35 36 37 IF ARM1136JS 38 39 ;//-------------------------------------- 40 ;// Declare input registers 41 ;//-------------------------------------- 42 ppSrc RN 0 43 pDst RN 1 44 QP RN 2 45 46 ;//-------------------------------- 47 ;// Scratch variable for Unpack2x2 48 ;//-------------------------------- 49 pSrc RN 9 50 Value RN 4 51 Value2 RN 5 52 Flag RN 6 53 strOffset RN 7 54 cstOffset RN 8 55 56 ;//-------------------------------- 57 ;// Scratch variable 58 ;//-------------------------------- 59 r0w0 RN 3 60 r0w1 RN 4 61 62 c0w0 RN 5 63 c1w0 RN 6 64 65 return RN 0 66 pQPDivTable RN 5 67 pQPModTable RN 6 68 Shift RN 9 69 Scale RN 2 70 71 Temp1 RN 3 72 Temp2 RN 4 73 Temp3 RN 7 74 Temp4 RN 8 75 76 ;// Write function header 77 M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 78 79 80 LDR pSrc, [ppSrc] ;// Load pSrc 81 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 82 83 ;//----------------------------------------------------------------------- 84 ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero 85 ;//----------------------------------------------------------------------- 86 87 MOV Value, #0 ;// Initialize the zero value 88 MOV Value2, #0 ;// Initialize the zero value 89 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 90 STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 91 92 93 unpackLoop 94 TST Flag, #0x10 ;// Computing (Flag & 0x10) 95 LDRSBNE Value2,[pSrc,#1] 96 LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access 97 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 98 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 99 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 100 101 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 102 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 103 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 104 BEQ unpackLoop ;// Branch to the loop beginning 105 106 LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| 107 108 109 STR pSrc, [ppSrc] ;// Update the bitstream pointer 110 111 LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer 112 LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer 113 114 SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] 115 SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] 116 117 LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] 118 LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] 119 120 SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] 121 SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] 122 123 LSL Scale, Scale, Shift ;// Scale = Scale << Shift 124 125 SMULTB Temp2, c0w0, Scale ;// Temp2 = T(c0w0) * Scale 126 SMULTB Temp4, c1w0, Scale ;// Temp4 = T(c1w0) * Scale 127 SMULBB Temp1, c0w0, Scale ;// Temp1 = B(c0w0) * Scale 128 SMULBB Temp3, c1w0, Scale ;// Temp3 = B(c1w0) * Scale 129 MOV Temp2, Temp2, ASR #1 ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16 130 MOV Temp4, Temp4, ASR #1 ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16 131 PKHBT c0w0, Temp2, Temp1, LSL #15 ;// c0w0 = | Temp1 | Temp2 | 132 PKHBT c1w0, Temp4, Temp3, LSL #15 ;// c1w0 = | Temp3 | Temp4 | 133 STMIA pDst, {c0w0, c1w0} ;// Storing all the coefficients at once 134 MOV return, #OMX_Sts_NoErr 135 M_END 136 137 ENDIF ;// ARM1136JS 138 139 140 141 142 END 143