1 /* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <stdio.h> 13 14 #include "./vpx_config.h" 15 #include "./vp9_rtcd.h" 16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_blockd.h" 18 #include "vp9/common/vp9_idct.h" 19 #include "vpx_dsp/mips/inv_txfm_dspr2.h" 20 #include "vpx_dsp/txfm_common.h" 21 #include "vpx_ports/mem.h" 22 23 #if HAVE_DSPR2 24 void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, 25 int pitch, int tx_type) { 26 int i, j; 27 DECLARE_ALIGNED(32, int16_t, out[16 * 16]); 28 int16_t *outptr = out; 29 int16_t temp_out[16]; 30 uint32_t pos = 45; 31 32 /* bit positon for extract from acc */ 33 __asm__ __volatile__ ( 34 "wrdsp %[pos], 1 \n\t" 35 : 36 : [pos] "r" (pos) 37 ); 38 39 switch (tx_type) { 40 case DCT_DCT: // DCT in both horizontal and vertical 41 idct16_rows_dspr2(input, outptr, 16); 42 idct16_cols_add_blk_dspr2(out, dest, pitch); 43 break; 44 case ADST_DCT: // ADST in vertical, DCT in horizontal 45 idct16_rows_dspr2(input, outptr, 16); 46 47 outptr = out; 48 49 for (i = 0; i < 16; ++i) { 50 iadst16_dspr2(outptr, temp_out); 51 52 for (j = 0; j < 16; ++j) 53 dest[j * pitch + i] = 54 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 55 + dest[j * pitch + i]); 56 outptr += 16; 57 } 58 break; 59 case DCT_ADST: // DCT in vertical, ADST in horizontal 60 { 61 int16_t temp_in[16 * 16]; 62 63 for (i = 0; i < 16; ++i) { 64 /* prefetch row */ 65 prefetch_load((const uint8_t *)(input + 16)); 66 67 iadst16_dspr2(input, outptr); 68 input += 16; 69 outptr += 16; 70 } 71 72 for (i = 0; i < 16; ++i) 73 for (j = 0; j < 16; ++j) 74 temp_in[j * 16 + i] = out[i * 16 + j]; 75 76 idct16_cols_add_blk_dspr2(temp_in, dest, pitch); 77 } 78 break; 79 case ADST_ADST: // ADST in both directions 80 { 81 int16_t temp_in[16]; 82 83 for (i = 0; i < 16; ++i) { 84 /* prefetch row */ 85 prefetch_load((const uint8_t *)(input + 16)); 86 87 iadst16_dspr2(input, outptr); 88 input += 16; 89 outptr += 16; 90 } 91 92 for (i = 0; i < 16; ++i) { 93 for (j = 0; j < 16; ++j) 94 temp_in[j] = out[j * 16 + i]; 95 iadst16_dspr2(temp_in, temp_out); 96 for (j = 0; j < 16; ++j) 97 dest[j * pitch + i] = 98 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 99 + dest[j * pitch + i]); 100 } 101 } 102 break; 103 default: 104 printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); 105 break; 106 } 107 } 108 #endif // #if HAVE_DSPR2 109