Home | History | Annotate | Download | only in dspr2
      1 /*
      2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <stdio.h>
     13 
     14 #include "./vpx_config.h"
     15 #include "./vp9_rtcd.h"
     16 #include "vp9/common/vp9_common.h"
     17 #include "vp9/common/vp9_blockd.h"
     18 #include "vp9/common/vp9_idct.h"
     19 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
     20 #include "vpx_dsp/txfm_common.h"
     21 #include "vpx_ports/mem.h"
     22 
     23 #if HAVE_DSPR2
     24 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride,
     25                              int tx_type) {
     26   int i, j;
     27   DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
     28   int16_t *outptr = out;
     29   int16_t temp_in[4 * 4], temp_out[4];
     30   uint32_t pos = 45;
     31 
     32   /* bit positon for extract from acc */
     33   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
     34                        :
     35                        : [pos] "r"(pos));
     36 
     37   switch (tx_type) {
     38     case DCT_DCT:  // DCT in both horizontal and vertical
     39       vpx_idct4_rows_dspr2(input, outptr);
     40       vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride);
     41       break;
     42     case ADST_DCT:  // ADST in vertical, DCT in horizontal
     43       vpx_idct4_rows_dspr2(input, outptr);
     44 
     45       outptr = out;
     46 
     47       for (i = 0; i < 4; ++i) {
     48         iadst4_dspr2(outptr, temp_out);
     49 
     50         for (j = 0; j < 4; ++j)
     51           dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
     52                                             dest[j * stride + i]);
     53 
     54         outptr += 4;
     55       }
     56       break;
     57     case DCT_ADST:  // DCT in vertical, ADST in horizontal
     58       for (i = 0; i < 4; ++i) {
     59         iadst4_dspr2(input, outptr);
     60         input += 4;
     61         outptr += 4;
     62       }
     63 
     64       for (i = 0; i < 4; ++i) {
     65         for (j = 0; j < 4; ++j) {
     66           temp_in[i * 4 + j] = out[j * 4 + i];
     67         }
     68       }
     69       vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride);
     70       break;
     71     case ADST_ADST:  // ADST in both directions
     72       for (i = 0; i < 4; ++i) {
     73         iadst4_dspr2(input, outptr);
     74         input += 4;
     75         outptr += 4;
     76       }
     77 
     78       for (i = 0; i < 4; ++i) {
     79         for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
     80         iadst4_dspr2(temp_in, temp_out);
     81 
     82         for (j = 0; j < 4; ++j)
     83           dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) +
     84                                             dest[j * stride + i]);
     85       }
     86       break;
     87     default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
     88   }
     89 }
     90 #endif  // #if HAVE_DSPR2
     91