1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <stdio.h> 13 14 #include "./vpx_config.h" 15 #include "./vp9_rtcd.h" 16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_blockd.h" 18 #include "vpx_dsp/mips/inv_txfm_dspr2.h" 19 #include "vpx_dsp/txfm_common.h" 20 #include "vpx_ports/mem.h" 21 22 #if HAVE_DSPR2 23 void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride, 24 int tx_type) { 25 int i, j; 26 DECLARE_ALIGNED(32, int16_t, out[8 * 8]); 27 int16_t *outptr = out; 28 int16_t temp_in[8 * 8], temp_out[8]; 29 uint32_t pos = 45; 30 31 /* bit positon for extract from acc */ 32 __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); 33 34 switch (tx_type) { 35 case DCT_DCT: // DCT in both horizontal and vertical 36 idct8_rows_dspr2(input, outptr, 8); 37 idct8_columns_add_blk_dspr2(&out[0], dest, stride); 38 break; 39 case ADST_DCT: // ADST in vertical, DCT in horizontal 40 idct8_rows_dspr2(input, outptr, 8); 41 42 for (i = 0; i < 8; ++i) { 43 iadst8_dspr2(&out[i * 8], temp_out); 44 45 for (j = 0; j < 8; ++j) 46 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + 47 dest[j * stride + i]); 48 } 49 break; 50 case DCT_ADST: // DCT in vertical, ADST in horizontal 51 for (i = 0; i < 8; ++i) { 52 iadst8_dspr2(input, outptr); 53 input += 8; 54 outptr += 8; 55 } 56 57 for (i = 0; i < 8; ++i) { 58 for (j = 0; j < 8; ++j) { 59 temp_in[i * 8 + j] = out[j * 8 + i]; 60 } 61 } 62 idct8_columns_add_blk_dspr2(&temp_in[0], dest, stride); 63 break; 64 case ADST_ADST: // ADST in both directions 65 for (i = 0; i < 8; ++i) { 66 iadst8_dspr2(input, outptr); 67 input += 8; 68 outptr += 8; 69 } 70 71 for (i = 0; i < 8; ++i) { 72 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; 73 74 iadst8_dspr2(temp_in, temp_out); 75 76 for (j = 0; j < 8; ++j) 77 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + 78 dest[j * stride + i]); 79 } 80 break; 81 default: printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break; 82 } 83 } 84 #endif // #if HAVE_DSPR2 85