1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <stdio.h> 13 14 #include "./vpx_config.h" 15 #include "./vp9_rtcd.h" 16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_blockd.h" 18 #include "vp9/common/vp9_idct.h" 19 #include "vpx_dsp/mips/inv_txfm_dspr2.h" 20 #include "vpx_dsp/txfm_common.h" 21 #include "vpx_ports/mem.h" 22 23 #if HAVE_DSPR2 24 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride, 25 int tx_type) { 26 int i, j; 27 DECLARE_ALIGNED(32, int16_t, out[4 * 4]); 28 int16_t *outptr = out; 29 int16_t temp_in[4 * 4], temp_out[4]; 30 uint32_t pos = 45; 31 32 /* bit positon for extract from acc */ 33 __asm__ __volatile__("wrdsp %[pos], 1 \n\t" 34 : 35 : [pos] "r"(pos)); 36 37 switch (tx_type) { 38 case DCT_DCT: // DCT in both horizontal and vertical 39 vpx_idct4_rows_dspr2(input, outptr); 40 vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride); 41 break; 42 case ADST_DCT: // ADST in vertical, DCT in horizontal 43 vpx_idct4_rows_dspr2(input, outptr); 44 45 outptr = out; 46 47 for (i = 0; i < 4; ++i) { 48 iadst4_dspr2(outptr, temp_out); 49 50 for (j = 0; j < 4; ++j) 51 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + 52 dest[j * stride + i]); 53 54 outptr += 4; 55 } 56 break; 57 case DCT_ADST: // DCT in vertical, ADST in horizontal 58 for (i = 0; i < 4; ++i) { 59 iadst4_dspr2(input, outptr); 60 input += 4; 61 outptr += 4; 62 } 63 64 for (i = 0; i < 4; ++i) { 65 for (j = 0; j < 4; ++j) { 66 temp_in[i * 4 + j] = out[j * 4 + i]; 67 } 68 } 69 vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride); 70 break; 71 case ADST_ADST: // ADST in both directions 72 for (i = 0; i < 4; ++i) { 73 iadst4_dspr2(input, outptr); 74 input += 4; 75 outptr += 4; 76 } 77 78 for (i = 0; i < 4; ++i) { 79 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; 80 iadst4_dspr2(temp_in, temp_out); 81 82 for (j = 0; j < 4; ++j) 83 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + 84 dest[j * stride + i]); 85 } 86 break; 87 default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break; 88 } 89 } 90 #endif // #if HAVE_DSPR2 91