Home | History | Annotate | Download | only in armv8
      1 //******************************************************************************
      2 //*
      3 //* Copyright (C) 2015 The Android Open Source Project
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************
     18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 //*/
     20 
     21 //******************************************************************************
     22 //*
     23 //*
     24 //* @brief
     25 //*  This file contains definitions of routines for variance caclulation
     26 //*
     27 //* @author
     28 //*  Ittiam
     29 //*
     30 //* @par List of Functions:
     31 //*  - icv_variance_8x4_av8()
     32 //*
     33 //* @remarks
     34 //*  None
     35 //*
     36 //*******************************************************************************
     37 
     38 
     39 //******************************************************************************
     40 //*
     41 //*  @brief computes variance of a 8x4  block
     42 //*
     43 //*
     44 //*  @par   Description
     45 //*   This functions computes variance of a 8x4  block
     46 //*
     47 //* @param[in] pu1_src
     48 //*  UWORD8 pointer to the source
     49 //*
     50 //* @param[in] src_strd
     51 //*  integer source stride
     52 //*
     53 //* @param[in] wd
     54 //*  Width (assumed to be 8)
     55 //*
     56 //* @param[in] ht
     57 //*  Height (assumed to be 4)
     58 //*
     59 //* @returns
     60 //*     variance value in x0
     61 //*
     62 //* @remarks
     63 //*
     64 //******************************************************************************
     65 
     66     .global icv_variance_8x4_av8
     67 
     68 icv_variance_8x4_av8:
     69 
     70     // Load 8x4 source
     71     ld1     {v0.8b},    [x0],     x1
     72     ld1     {v1.8b},    [x0],     x1
     73     ld1     {v2.8b},    [x0],     x1
     74     ld1     {v3.8b},    [x0],     x1
     75 
     76     // Calculate Sum(values)
     77     uaddl   v4.8h,  v0.8b,  v1.8b
     78     uaddl   v6.8h,  v2.8b,  v3.8b
     79     add     v4.8h,  v4.8h,  v6.8h
     80 
     81     addp    v4.8h,  v4.8h,  v4.8h
     82     addp    v4.4h,  v4.4h,  v4.4h
     83     addp    v4.4h,  v4.4h,  v4.4h
     84 
     85     // Calculate SumOfSquares
     86     umull   v20.8h, v0.8b,  v0.8b
     87     umull   v22.8h, v1.8b,  v1.8b
     88     umull   v24.8h, v2.8b,  v2.8b
     89     umull   v26.8h, v3.8b,  v3.8b
     90 
     91     uaddl   v21.4s,    v20.4h,    v22.4h
     92     uaddl   v25.4s,    v24.4h,    v26.4h
     93     uaddl2  v20.4s,    v20.8h,    v22.8h
     94     uaddl2  v24.4s,    v24.8h,    v26.8h
     95 
     96     add     v20.4s,     v20.4s,  v21.4s
     97     add     v22.4s,     v24.4s,  v25.4s
     98     add     v20.4s,     v20.4s,  v22.4s
     99     addp    v20.4s,     v20.4s,  v20.4s
    100     addp    v20.2s,     v20.2s,  v20.2s
    101 
    102     // Sum(values)
    103     smov    x0,     v4.h[0]
    104 
    105     // SumOfSquares
    106     smov    x1,     v20.s[0]
    107 
    108     // SquareOfSums
    109     mul     x3,     x0,     x0
    110 
    111     // SumOfSquares * 8 * 4 - SquareOfSums
    112     sub     x1,     x3,     x1,        LSL #5
    113     neg     x0,     x1
    114 
    115     // Divide by 32 * 32
    116 
    117     ASR     x0,     x0,     #10
    118     ret
    119