Home | History | Annotate | Download | only in aom_dsp
      1 /*
      2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <math.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <string.h>
     16 
     17 #include "aom_dsp/aom_dsp_common.h"
     18 #include "aom_dsp/noise_model.h"
     19 #include "aom_dsp/noise_util.h"
     20 #include "aom_mem/aom_mem.h"
     21 #include "av1/common/common.h"
     22 #include "av1/encoder/mathutils.h"
     23 
     24 #define kLowPolyNumParams 3
     25 
     26 static const int kMaxLag = 4;
     27 
     28 // Defines a function that can be used to obtain the mean of a block for the
     29 // provided data type (uint8_t, or uint16_t)
     30 #define GET_BLOCK_MEAN(INT_TYPE, suffix)                                    \
     31   static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
     32                                         int stride, int x_o, int y_o,       \
     33                                         int block_size) {                   \
     34     const int max_h = AOMMIN(h - y_o, block_size);                          \
     35     const int max_w = AOMMIN(w - x_o, block_size);                          \
     36     double block_mean = 0;                                                  \
     37     for (int y = 0; y < max_h; ++y) {                                       \
     38       for (int x = 0; x < max_w; ++x) {                                     \
     39         block_mean += data[(y_o + y) * stride + x_o + x];                   \
     40       }                                                                     \
     41     }                                                                       \
     42     return block_mean / (max_w * max_h);                                    \
     43   }
     44 
     45 GET_BLOCK_MEAN(uint8_t, lowbd);
     46 GET_BLOCK_MEAN(uint16_t, highbd);
     47 
     48 static INLINE double get_block_mean(const uint8_t *data, int w, int h,
     49                                     int stride, int x_o, int y_o,
     50                                     int block_size, int use_highbd) {
     51   if (use_highbd)
     52     return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
     53                                  block_size);
     54   return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
     55 }
     56 
     57 // Defines a function that can be used to obtain the variance of a block
     58 // for the provided data type (uint8_t, or uint16_t)
     59 #define GET_NOISE_VAR(INT_TYPE, suffix)                                  \
     60   static double get_noise_var_##suffix(                                  \
     61       const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
     62       int h, int x_o, int y_o, int block_size_x, int block_size_y) {     \
     63     const int max_h = AOMMIN(h - y_o, block_size_y);                     \
     64     const int max_w = AOMMIN(w - x_o, block_size_x);                     \
     65     double noise_var = 0;                                                \
     66     double noise_mean = 0;                                               \
     67     for (int y = 0; y < max_h; ++y) {                                    \
     68       for (int x = 0; x < max_w; ++x) {                                  \
     69         double noise = (double)data[(y_o + y) * stride + x_o + x] -      \
     70                        denoised[(y_o + y) * stride + x_o + x];           \
     71         noise_mean += noise;                                             \
     72         noise_var += noise * noise;                                      \
     73       }                                                                  \
     74     }                                                                    \
     75     noise_mean /= (max_w * max_h);                                       \
     76     return noise_var / (max_w * max_h) - noise_mean * noise_mean;        \
     77   }
     78 
     79 GET_NOISE_VAR(uint8_t, lowbd);
     80 GET_NOISE_VAR(uint16_t, highbd);
     81 
     82 static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
     83                                    int w, int h, int stride, int x_o, int y_o,
     84                                    int block_size_x, int block_size_y,
     85                                    int use_highbd) {
     86   if (use_highbd)
     87     return get_noise_var_highbd((const uint16_t *)data,
     88                                 (const uint16_t *)denoised, w, h, stride, x_o,
     89                                 y_o, block_size_x, block_size_y);
     90   return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
     91                              block_size_x, block_size_y);
     92 }
     93 
     94 static void equation_system_clear(aom_equation_system_t *eqns) {
     95   const int n = eqns->n;
     96   memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
     97   memset(eqns->x, 0, sizeof(*eqns->x) * n);
     98   memset(eqns->b, 0, sizeof(*eqns->b) * n);
     99 }
    100 
    101 static void equation_system_copy(aom_equation_system_t *dst,
    102                                  const aom_equation_system_t *src) {
    103   const int n = dst->n;
    104   memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
    105   memcpy(dst->x, src->x, sizeof(*dst->x) * n);
    106   memcpy(dst->b, src->b, sizeof(*dst->b) * n);
    107 }
    108 
    109 static int equation_system_init(aom_equation_system_t *eqns, int n) {
    110   eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
    111   eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
    112   eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
    113   eqns->n = n;
    114   if (!eqns->A || !eqns->b || !eqns->x) {
    115     fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
    116     aom_free(eqns->A);
    117     aom_free(eqns->b);
    118     aom_free(eqns->x);
    119     memset(eqns, 0, sizeof(*eqns));
    120     return 0;
    121   }
    122   equation_system_clear(eqns);
    123   return 1;
    124 }
    125 
    126 static int equation_system_solve(aom_equation_system_t *eqns) {
    127   const int n = eqns->n;
    128   double *b = (double *)aom_malloc(sizeof(*b) * n);
    129   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
    130   int ret = 0;
    131   if (A == NULL || b == NULL) {
    132     fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
    133     aom_free(b);
    134     aom_free(A);
    135     return 0;
    136   }
    137   memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
    138   memcpy(b, eqns->b, sizeof(*eqns->b) * n);
    139   ret = linsolve(n, A, eqns->n, b, eqns->x);
    140   aom_free(b);
    141   aom_free(A);
    142 
    143   if (ret == 0) {
    144     return 0;
    145   }
    146   return 1;
    147 }
    148 
    149 static void equation_system_add(aom_equation_system_t *dest,
    150                                 aom_equation_system_t *src) {
    151   const int n = dest->n;
    152   int i, j;
    153   for (i = 0; i < n; ++i) {
    154     for (j = 0; j < n; ++j) {
    155       dest->A[i * n + j] += src->A[i * n + j];
    156     }
    157     dest->b[i] += src->b[i];
    158   }
    159 }
    160 
    161 static void equation_system_free(aom_equation_system_t *eqns) {
    162   if (!eqns) return;
    163   aom_free(eqns->A);
    164   aom_free(eqns->b);
    165   aom_free(eqns->x);
    166   memset(eqns, 0, sizeof(*eqns));
    167 }
    168 
    169 static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
    170   equation_system_clear(&solver->eqns);
    171   solver->num_equations = 0;
    172   solver->total = 0;
    173 }
    174 
    175 static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
    176                                       aom_noise_strength_solver_t *src) {
    177   equation_system_add(&dest->eqns, &src->eqns);
    178   dest->num_equations += src->num_equations;
    179   dest->total += src->total;
    180 }
    181 
    182 // Return the number of coefficients required for the given parameters
    183 static int num_coeffs(const aom_noise_model_params_t params) {
    184   const int n = 2 * params.lag + 1;
    185   switch (params.shape) {
    186     case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
    187     case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
    188   }
    189   return 0;
    190 }
    191 
    192 static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
    193   const int kNumBins = 20;
    194   if (!equation_system_init(&state->eqns, n)) {
    195     fprintf(stderr, "Failed initialization noise state with size %d\n", n);
    196     return 0;
    197   }
    198   state->ar_gain = 1.0;
    199   state->num_observations = 0;
    200   return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
    201                                         bit_depth);
    202 }
    203 
    204 static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
    205   const double kTolerance = 1e-6;
    206   const int last = eqns->n - 1;
    207   // Set all of the AR coefficients to zero, but try to solve for correlation
    208   // with the luma channel
    209   memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
    210   if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
    211     eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
    212   }
    213 }
    214 
    215 int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
    216   if (!lut) return 0;
    217   lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
    218   if (!lut->points) return 0;
    219   lut->num_points = num_points;
    220   memset(lut->points, 0, sizeof(*lut->points) * num_points);
    221   return 1;
    222 }
    223 
    224 void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
    225   if (!lut) return;
    226   aom_free(lut->points);
    227   memset(lut, 0, sizeof(*lut));
    228 }
    229 
    230 double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
    231                                    double x) {
    232   int i = 0;
    233   // Constant extrapolation for x <  x_0.
    234   if (x < lut->points[0][0]) return lut->points[0][1];
    235   for (i = 0; i < lut->num_points - 1; ++i) {
    236     if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
    237       const double a =
    238           (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
    239       return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
    240     }
    241   }
    242   // Constant extrapolation for x > x_{n-1}
    243   return lut->points[lut->num_points - 1][1];
    244 }
    245 
    246 static double noise_strength_solver_get_bin_index(
    247     const aom_noise_strength_solver_t *solver, double value) {
    248   const double val =
    249       fclamp(value, solver->min_intensity, solver->max_intensity);
    250   const double range = solver->max_intensity - solver->min_intensity;
    251   return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
    252 }
    253 
    254 static double noise_strength_solver_get_value(
    255     const aom_noise_strength_solver_t *solver, double x) {
    256   const double bin = noise_strength_solver_get_bin_index(solver, x);
    257   const int bin_i0 = (int)floor(bin);
    258   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
    259   const double a = bin - bin_i0;
    260   return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
    261 }
    262 
    263 void aom_noise_strength_solver_add_measurement(
    264     aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
    265   const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
    266   const int bin_i0 = (int)floor(bin);
    267   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
    268   const double a = bin - bin_i0;
    269   const int n = solver->num_bins;
    270   solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
    271   solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
    272   solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
    273   solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
    274   solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
    275   solver->eqns.b[bin_i1] += a * noise_std;
    276   solver->total += noise_std;
    277   solver->num_equations++;
    278 }
    279 
    280 int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
    281   // Add regularization proportional to the number of constraints
    282   const int n = solver->num_bins;
    283   const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
    284   int result = 0;
    285   double mean = 0;
    286 
    287   // Do this in a non-destructive manner so it is not confusing to the caller
    288   double *old_A = solver->eqns.A;
    289   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
    290   if (!A) {
    291     fprintf(stderr, "Unable to allocate copy of A\n");
    292     return 0;
    293   }
    294   memcpy(A, old_A, sizeof(*A) * n * n);
    295 
    296   for (int i = 0; i < n; ++i) {
    297     const int i_lo = AOMMAX(0, i - 1);
    298     const int i_hi = AOMMIN(n - 1, i + 1);
    299     A[i * n + i_lo] -= kAlpha;
    300     A[i * n + i] += 2 * kAlpha;
    301     A[i * n + i_hi] -= kAlpha;
    302   }
    303 
    304   // Small regularization to give average noise strength
    305   mean = solver->total / solver->num_equations;
    306   for (int i = 0; i < n; ++i) {
    307     A[i * n + i] += 1.0 / 8192.;
    308     solver->eqns.b[i] += mean / 8192.;
    309   }
    310   solver->eqns.A = A;
    311   result = equation_system_solve(&solver->eqns);
    312   solver->eqns.A = old_A;
    313 
    314   aom_free(A);
    315   return result;
    316 }
    317 
    318 int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
    319                                    int num_bins, int bit_depth) {
    320   if (!solver) return 0;
    321   memset(solver, 0, sizeof(*solver));
    322   solver->num_bins = num_bins;
    323   solver->min_intensity = 0;
    324   solver->max_intensity = (1 << bit_depth) - 1;
    325   solver->total = 0;
    326   solver->num_equations = 0;
    327   return equation_system_init(&solver->eqns, num_bins);
    328 }
    329 
    330 void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
    331   if (!solver) return;
    332   equation_system_free(&solver->eqns);
    333 }
    334 
    335 double aom_noise_strength_solver_get_center(
    336     const aom_noise_strength_solver_t *solver, int i) {
    337   const double range = solver->max_intensity - solver->min_intensity;
    338   const int n = solver->num_bins;
    339   return ((double)i) / (n - 1) * range + solver->min_intensity;
    340 }
    341 
    342 // Computes the residual if a point were to be removed from the lut. This is
    343 // calculated as the area between the output of the solver and the line segment
    344 // that would be formed between [x_{i - 1}, x_{i + 1}).
    345 static void update_piecewise_linear_residual(
    346     const aom_noise_strength_solver_t *solver,
    347     const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
    348   const double dx = 255. / solver->num_bins;
    349   for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
    350     const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
    351                                     solver, lut->points[i - 1][0])));
    352     const int upper = AOMMIN(solver->num_bins - 1,
    353                              (int)ceil(noise_strength_solver_get_bin_index(
    354                                  solver, lut->points[i + 1][0])));
    355     double r = 0;
    356     for (int j = lower; j <= upper; ++j) {
    357       const double x = aom_noise_strength_solver_get_center(solver, j);
    358       if (x < lut->points[i - 1][0]) continue;
    359       if (x >= lut->points[i + 1][0]) continue;
    360       const double y = solver->eqns.x[j];
    361       const double a = (x - lut->points[i - 1][0]) /
    362                        (lut->points[i + 1][0] - lut->points[i - 1][0]);
    363       const double estimate_y =
    364           lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
    365       r += fabs(y - estimate_y);
    366     }
    367     residual[i] = r * dx;
    368   }
    369 }
    370 
    371 int aom_noise_strength_solver_fit_piecewise(
    372     const aom_noise_strength_solver_t *solver, int max_output_points,
    373     aom_noise_strength_lut_t *lut) {
    374   // The tolerance is normalized to be give consistent results between
    375   // different bit-depths.
    376   const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
    377   if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
    378     fprintf(stderr, "Failed to init lut\n");
    379     return 0;
    380   }
    381   for (int i = 0; i < solver->num_bins; ++i) {
    382     lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
    383     lut->points[i][1] = solver->eqns.x[i];
    384   }
    385   if (max_output_points < 0) {
    386     max_output_points = solver->num_bins;
    387   }
    388 
    389   double *residual = aom_malloc(solver->num_bins * sizeof(*residual));
    390   memset(residual, 0, sizeof(*residual) * solver->num_bins);
    391 
    392   update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
    393 
    394   // Greedily remove points if there are too many or if it doesn't hurt local
    395   // approximation (never remove the end points)
    396   while (lut->num_points > 2) {
    397     int min_index = 1;
    398     for (int j = 1; j < lut->num_points - 1; ++j) {
    399       if (residual[j] < residual[min_index]) {
    400         min_index = j;
    401       }
    402     }
    403     const double dx =
    404         lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
    405     const double avg_residual = residual[min_index] / dx;
    406     if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
    407       break;
    408     }
    409 
    410     const int num_remaining = lut->num_points - min_index - 1;
    411     memmove(lut->points + min_index, lut->points + min_index + 1,
    412             sizeof(lut->points[0]) * num_remaining);
    413     lut->num_points--;
    414 
    415     update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
    416                                      min_index + 1);
    417   }
    418   aom_free(residual);
    419   return 1;
    420 }
    421 
    422 int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
    423                                int block_size, int bit_depth, int use_highbd) {
    424   const int n = block_size * block_size;
    425   aom_equation_system_t eqns;
    426   double *AtA_inv = 0;
    427   double *A = 0;
    428   int x = 0, y = 0, i = 0, j = 0;
    429   if (!equation_system_init(&eqns, kLowPolyNumParams)) {
    430     fprintf(stderr, "Failed to init equation system for block_size=%d\n",
    431             block_size);
    432     return 0;
    433   }
    434 
    435   AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
    436                                  sizeof(*AtA_inv));
    437   A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
    438   if (AtA_inv == NULL || A == NULL) {
    439     fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
    440             block_size);
    441     aom_free(AtA_inv);
    442     aom_free(A);
    443     equation_system_free(&eqns);
    444     return 0;
    445   }
    446 
    447   block_finder->A = A;
    448   block_finder->AtA_inv = AtA_inv;
    449   block_finder->block_size = block_size;
    450   block_finder->normalization = (1 << bit_depth) - 1;
    451   block_finder->use_highbd = use_highbd;
    452 
    453   for (y = 0; y < block_size; ++y) {
    454     const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
    455     for (x = 0; x < block_size; ++x) {
    456       const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
    457       const double coords[3] = { yd, xd, 1 };
    458       const int row = y * block_size + x;
    459       A[kLowPolyNumParams * row + 0] = yd;
    460       A[kLowPolyNumParams * row + 1] = xd;
    461       A[kLowPolyNumParams * row + 2] = 1;
    462 
    463       for (i = 0; i < kLowPolyNumParams; ++i) {
    464         for (j = 0; j < kLowPolyNumParams; ++j) {
    465           eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
    466         }
    467       }
    468     }
    469   }
    470 
    471   // Lazy inverse using existing equation solver.
    472   for (i = 0; i < kLowPolyNumParams; ++i) {
    473     memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
    474     eqns.b[i] = 1;
    475     equation_system_solve(&eqns);
    476 
    477     for (j = 0; j < kLowPolyNumParams; ++j) {
    478       AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
    479     }
    480   }
    481   equation_system_free(&eqns);
    482   return 1;
    483 }
    484 
    485 void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
    486   if (!block_finder) return;
    487   aom_free(block_finder->A);
    488   aom_free(block_finder->AtA_inv);
    489   memset(block_finder, 0, sizeof(*block_finder));
    490 }
    491 
    492 void aom_flat_block_finder_extract_block(
    493     const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
    494     int w, int h, int stride, int offsx, int offsy, double *plane,
    495     double *block) {
    496   const int block_size = block_finder->block_size;
    497   const int n = block_size * block_size;
    498   const double *A = block_finder->A;
    499   const double *AtA_inv = block_finder->AtA_inv;
    500   double plane_coords[kLowPolyNumParams];
    501   double AtA_inv_b[kLowPolyNumParams];
    502   int xi, yi, i;
    503 
    504   if (block_finder->use_highbd) {
    505     const uint16_t *const data16 = (const uint16_t *const)data;
    506     for (yi = 0; yi < block_size; ++yi) {
    507       const int y = clamp(offsy + yi, 0, h - 1);
    508       for (xi = 0; xi < block_size; ++xi) {
    509         const int x = clamp(offsx + xi, 0, w - 1);
    510         block[yi * block_size + xi] =
    511             ((double)data16[y * stride + x]) / block_finder->normalization;
    512       }
    513     }
    514   } else {
    515     for (yi = 0; yi < block_size; ++yi) {
    516       const int y = clamp(offsy + yi, 0, h - 1);
    517       for (xi = 0; xi < block_size; ++xi) {
    518         const int x = clamp(offsx + xi, 0, w - 1);
    519         block[yi * block_size + xi] =
    520             ((double)data[y * stride + x]) / block_finder->normalization;
    521       }
    522     }
    523   }
    524   multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
    525   multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
    526                kLowPolyNumParams, 1);
    527   multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
    528 
    529   for (i = 0; i < n; ++i) {
    530     block[i] -= plane[i];
    531   }
    532 }
    533 
    534 typedef struct {
    535   int index;
    536   float score;
    537 } index_and_score_t;
    538 
    539 static int compare_scores(const void *a, const void *b) {
    540   const float diff =
    541       ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
    542   if (diff < 0)
    543     return -1;
    544   else if (diff > 0)
    545     return 1;
    546   return 0;
    547 }
    548 
    549 int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
    550                               const uint8_t *const data, int w, int h,
    551                               int stride, uint8_t *flat_blocks) {
    552   // The gradient-based features used in this code are based on:
    553   //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
    554   //  correlation for improved video denoising," 2012 19th, ICIP.
    555   // The thresholds are more lenient to allow for correct grain modeling
    556   // if extreme cases.
    557   const int block_size = block_finder->block_size;
    558   const int n = block_size * block_size;
    559   const double kTraceThreshold = 0.15 / (32 * 32);
    560   const double kRatioThreshold = 1.25;
    561   const double kNormThreshold = 0.08 / (32 * 32);
    562   const double kVarThreshold = 0.005 / (double)n;
    563   const int num_blocks_w = (w + block_size - 1) / block_size;
    564   const int num_blocks_h = (h + block_size - 1) / block_size;
    565   int num_flat = 0;
    566   int bx = 0, by = 0;
    567   double *plane = (double *)aom_malloc(n * sizeof(*plane));
    568   double *block = (double *)aom_malloc(n * sizeof(*block));
    569   index_and_score_t *scores = (index_and_score_t *)aom_malloc(
    570       num_blocks_w * num_blocks_h * sizeof(*scores));
    571   if (plane == NULL || block == NULL || scores == NULL) {
    572     fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
    573     aom_free(plane);
    574     aom_free(block);
    575     aom_free(scores);
    576     return -1;
    577   }
    578 
    579 #ifdef NOISE_MODEL_LOG_SCORE
    580   fprintf(stderr, "score = [");
    581 #endif
    582   for (by = 0; by < num_blocks_h; ++by) {
    583     for (bx = 0; bx < num_blocks_w; ++bx) {
    584       // Compute gradient covariance matrix.
    585       double Gxx = 0, Gxy = 0, Gyy = 0;
    586       double var = 0;
    587       double mean = 0;
    588       int xi, yi;
    589       aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
    590                                           bx * block_size, by * block_size,
    591                                           plane, block);
    592 
    593       for (yi = 1; yi < block_size - 1; ++yi) {
    594         for (xi = 1; xi < block_size - 1; ++xi) {
    595           const double gx = (block[yi * block_size + xi + 1] -
    596                              block[yi * block_size + xi - 1]) /
    597                             2;
    598           const double gy = (block[yi * block_size + xi + block_size] -
    599                              block[yi * block_size + xi - block_size]) /
    600                             2;
    601           Gxx += gx * gx;
    602           Gxy += gx * gy;
    603           Gyy += gy * gy;
    604 
    605           mean += block[yi * block_size + xi];
    606           var += block[yi * block_size + xi] * block[yi * block_size + xi];
    607         }
    608       }
    609       mean /= (block_size - 2) * (block_size - 2);
    610 
    611       // Normalize gradients by block_size.
    612       Gxx /= ((block_size - 2) * (block_size - 2));
    613       Gxy /= ((block_size - 2) * (block_size - 2));
    614       Gyy /= ((block_size - 2) * (block_size - 2));
    615       var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
    616 
    617       {
    618         const double trace = Gxx + Gyy;
    619         const double det = Gxx * Gyy - Gxy * Gxy;
    620         const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
    621         const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
    622         const double norm = e1;  // Spectral norm
    623         const double ratio = (e1 / AOMMAX(e2, 1e-6));
    624         const int is_flat = (trace < kTraceThreshold) &&
    625                             (ratio < kRatioThreshold) &&
    626                             (norm < kNormThreshold) && (var > kVarThreshold);
    627         // The following weights are used to combine the above features to give
    628         // a sigmoid score for flatness. If the input was normalized to [0,100]
    629         // the magnitude of these values would be close to 1 (e.g., weights
    630         // corresponding to variance would be a factor of 10000x smaller).
    631         // The weights are given in the following order:
    632         //    [{var}, {ratio}, {trace}, {norm}, offset]
    633         // with one of the most discriminative being simply the variance.
    634         const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
    635         const float score =
    636             (float)(1.0 / (1 + exp(-(weights[0] * var + weights[1] * ratio +
    637                                      weights[2] * trace + weights[3] * norm +
    638                                      weights[4]))));
    639         flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
    640         scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
    641         scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
    642 #ifdef NOISE_MODEL_LOG_SCORE
    643         fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
    644                 is_flat);
    645 #endif
    646         num_flat += is_flat;
    647       }
    648     }
    649 #ifdef NOISE_MODEL_LOG_SCORE
    650     fprintf(stderr, "\n");
    651 #endif
    652   }
    653 #ifdef NOISE_MODEL_LOG_SCORE
    654   fprintf(stderr, "];\n");
    655 #endif
    656   // Find the top-scored blocks (most likely to be flat) and set the flat blocks
    657   // be the union of the thresholded results and the top 10th percentile of the
    658   // scored results.
    659   qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
    660   const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
    661   const float score_threshold = scores[top_nth_percentile].score;
    662   for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
    663     if (scores[i].score >= score_threshold) {
    664       num_flat += flat_blocks[scores[i].index] == 0;
    665       flat_blocks[scores[i].index] |= 1;
    666     }
    667   }
    668   aom_free(block);
    669   aom_free(plane);
    670   aom_free(scores);
    671   return num_flat;
    672 }
    673 
    674 int aom_noise_model_init(aom_noise_model_t *model,
    675                          const aom_noise_model_params_t params) {
    676   const int n = num_coeffs(params);
    677   const int lag = params.lag;
    678   const int bit_depth = params.bit_depth;
    679   int x = 0, y = 0, i = 0, c = 0;
    680 
    681   memset(model, 0, sizeof(*model));
    682   if (params.lag < 1) {
    683     fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
    684     return 0;
    685   }
    686   if (params.lag > kMaxLag) {
    687     fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
    688             kMaxLag);
    689     return 0;
    690   }
    691 
    692   memcpy(&model->params, &params, sizeof(params));
    693   for (c = 0; c < 3; ++c) {
    694     if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
    695       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
    696       aom_noise_model_free(model);
    697       return 0;
    698     }
    699     if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
    700       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
    701       aom_noise_model_free(model);
    702       return 0;
    703     }
    704   }
    705   model->n = n;
    706   model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
    707 
    708   for (y = -lag; y <= 0; ++y) {
    709     const int max_x = y == 0 ? -1 : lag;
    710     for (x = -lag; x <= max_x; ++x) {
    711       switch (params.shape) {
    712         case AOM_NOISE_SHAPE_DIAMOND:
    713           if (abs(x) <= y + lag) {
    714             model->coords[i][0] = x;
    715             model->coords[i][1] = y;
    716             ++i;
    717           }
    718           break;
    719         case AOM_NOISE_SHAPE_SQUARE:
    720           model->coords[i][0] = x;
    721           model->coords[i][1] = y;
    722           ++i;
    723           break;
    724         default:
    725           fprintf(stderr, "Invalid shape\n");
    726           aom_noise_model_free(model);
    727           return 0;
    728       }
    729     }
    730   }
    731   assert(i == n);
    732   return 1;
    733 }
    734 
    735 void aom_noise_model_free(aom_noise_model_t *model) {
    736   int c = 0;
    737   if (!model) return;
    738 
    739   aom_free(model->coords);
    740   for (c = 0; c < 3; ++c) {
    741     equation_system_free(&model->latest_state[c].eqns);
    742     equation_system_free(&model->combined_state[c].eqns);
    743 
    744     equation_system_free(&model->latest_state[c].strength_solver.eqns);
    745     equation_system_free(&model->combined_state[c].strength_solver.eqns);
    746   }
    747   memset(model, 0, sizeof(*model));
    748 }
    749 
    750 // Extracts the neighborhood defined by coords around point (x, y) from
    751 // the difference between the data and denoised images. Also extracts the
    752 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
    753 #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                   \
    754   static double extract_ar_row_##suffix(                                   \
    755       int(*coords)[2], int num_coords, const INT_TYPE *const data,         \
    756       const INT_TYPE *const denoised, int stride, int sub_log2[2],         \
    757       const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised,  \
    758       int alt_stride, int x, int y, double *buffer) {                      \
    759     for (int i = 0; i < num_coords; ++i) {                                 \
    760       const int x_i = x + coords[i][0], y_i = y + coords[i][1];            \
    761       buffer[i] =                                                          \
    762           (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
    763     }                                                                      \
    764     const double val =                                                     \
    765         (double)data[y * stride + x] - denoised[y * stride + x];           \
    766                                                                            \
    767     if (alt_data && alt_denoised) {                                        \
    768       double avg_data = 0, avg_denoised = 0;                               \
    769       int num_samples = 0;                                                 \
    770       for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {              \
    771         const int y_up = (y << sub_log2[1]) + dy_i;                        \
    772         for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {            \
    773           const int x_up = (x << sub_log2[0]) + dx_i;                      \
    774           avg_data += alt_data[y_up * alt_stride + x_up];                  \
    775           avg_denoised += alt_denoised[y_up * alt_stride + x_up];          \
    776           num_samples++;                                                   \
    777         }                                                                  \
    778       }                                                                    \
    779       buffer[num_coords] = (avg_data - avg_denoised) / num_samples;        \
    780     }                                                                      \
    781     return val;                                                            \
    782   }
    783 
    784 EXTRACT_AR_ROW(uint8_t, lowbd);
    785 EXTRACT_AR_ROW(uint16_t, highbd);
    786 
    787 static int add_block_observations(
    788     aom_noise_model_t *noise_model, int c, const uint8_t *const data,
    789     const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
    790     const uint8_t *const alt_data, const uint8_t *const alt_denoised,
    791     int alt_stride, const uint8_t *const flat_blocks, int block_size,
    792     int num_blocks_w, int num_blocks_h) {
    793   const int lag = noise_model->params.lag;
    794   const int num_coords = noise_model->n;
    795   const double normalization = (1 << noise_model->params.bit_depth) - 1;
    796   double *A = noise_model->latest_state[c].eqns.A;
    797   double *b = noise_model->latest_state[c].eqns.b;
    798   double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
    799   const int n = noise_model->latest_state[c].eqns.n;
    800 
    801   if (!buffer) {
    802     fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
    803     return 0;
    804   }
    805   for (int by = 0; by < num_blocks_h; ++by) {
    806     const int y_o = by * (block_size >> sub_log2[1]);
    807     for (int bx = 0; bx < num_blocks_w; ++bx) {
    808       const int x_o = bx * (block_size >> sub_log2[0]);
    809       if (!flat_blocks[by * num_blocks_w + bx]) {
    810         continue;
    811       }
    812       int y_start =
    813           (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
    814       int x_start =
    815           (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
    816       int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
    817                          block_size >> sub_log2[1]);
    818       int x_end = AOMMIN(
    819           (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
    820           (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
    821               ? (block_size >> sub_log2[0])
    822               : ((block_size >> sub_log2[0]) - lag));
    823       for (int y = y_start; y < y_end; ++y) {
    824         for (int x = x_start; x < x_end; ++x) {
    825           const double val =
    826               noise_model->params.use_highbd
    827                   ? extract_ar_row_highbd(noise_model->coords, num_coords,
    828                                           (const uint16_t *const)data,
    829                                           (const uint16_t *const)denoised,
    830                                           stride, sub_log2,
    831                                           (const uint16_t *const)alt_data,
    832                                           (const uint16_t *const)alt_denoised,
    833                                           alt_stride, x + x_o, y + y_o, buffer)
    834                   : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
    835                                          denoised, stride, sub_log2, alt_data,
    836                                          alt_denoised, alt_stride, x + x_o,
    837                                          y + y_o, buffer);
    838           for (int i = 0; i < n; ++i) {
    839             for (int j = 0; j < n; ++j) {
    840               A[i * n + j] +=
    841                   (buffer[i] * buffer[j]) / (normalization * normalization);
    842             }
    843             b[i] += (buffer[i] * val) / (normalization * normalization);
    844           }
    845           noise_model->latest_state[c].num_observations++;
    846         }
    847       }
    848     }
    849   }
    850   aom_free(buffer);
    851   return 1;
    852 }
    853 
    854 static void add_noise_std_observations(
    855     aom_noise_model_t *noise_model, int c, const double *coeffs,
    856     const uint8_t *const data, const uint8_t *const denoised, int w, int h,
    857     int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
    858     const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
    859     int num_blocks_h) {
    860   const int num_coords = noise_model->n;
    861   aom_noise_strength_solver_t *noise_strength_solver =
    862       &noise_model->latest_state[c].strength_solver;
    863 
    864   const aom_noise_strength_solver_t *noise_strength_luma =
    865       &noise_model->latest_state[0].strength_solver;
    866   const double luma_gain = noise_model->latest_state[0].ar_gain;
    867   const double noise_gain = noise_model->latest_state[c].ar_gain;
    868   for (int by = 0; by < num_blocks_h; ++by) {
    869     const int y_o = by * (block_size >> sub_log2[1]);
    870     for (int bx = 0; bx < num_blocks_w; ++bx) {
    871       const int x_o = bx * (block_size >> sub_log2[0]);
    872       if (!flat_blocks[by * num_blocks_w + bx]) {
    873         continue;
    874       }
    875       const int num_samples_h =
    876           AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
    877                  block_size >> sub_log2[1]);
    878       const int num_samples_w =
    879           AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
    880                  (block_size >> sub_log2[0]));
    881       // Make sure that we have a reasonable amount of samples to consider the
    882       // block
    883       if (num_samples_w * num_samples_h > block_size) {
    884         const double block_mean = get_block_mean(
    885             alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
    886             x_o << sub_log2[0], y_o << sub_log2[1], block_size,
    887             noise_model->params.use_highbd);
    888         const double noise_var = get_noise_var(
    889             data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
    890             y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
    891             noise_model->params.use_highbd);
    892         // We want to remove the part of the noise that came from being
    893         // correlated with luma. Note that the noise solver for luma must
    894         // have already been run.
    895         const double luma_strength =
    896             c > 0 ? luma_gain * noise_strength_solver_get_value(
    897                                     noise_strength_luma, block_mean)
    898                   : 0;
    899         const double corr = c > 0 ? coeffs[num_coords] : 0;
    900         // Chroma noise:
    901         //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
    902         // The uncorrelated component:
    903         //   uncorr_var = noise_var - (corr * luma_strength)^2
    904         // But don't allow fully correlated noise (hence the max), since the
    905         // synthesis cannot model it.
    906         const double uncorr_std = sqrt(
    907             AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
    908         // After we've removed correlation with luma, undo the gain that will
    909         // come from running the IIR filter.
    910         const double adjusted_strength = uncorr_std / noise_gain;
    911         aom_noise_strength_solver_add_measurement(
    912             noise_strength_solver, block_mean, adjusted_strength);
    913       }
    914     }
    915   }
    916 }
    917 
    918 // Return true if the noise estimate appears to be different from the combined
    919 // (multi-frame) estimate. The difference is measured by checking whether the
    920 // AR coefficients have diverged (using a threshold on normalized cross
    921 // correlation), or whether the noise strength has changed.
    922 static int is_noise_model_different(aom_noise_model_t *const noise_model) {
    923   // These thresholds are kind of arbitrary and will likely need further tuning
    924   // (or exported as parameters). The threshold on noise strength is a weighted
    925   // difference between the noise strength histograms
    926   const double kCoeffThreshold = 0.9;
    927   const double kStrengthThreshold =
    928       0.005 * (1 << (noise_model->params.bit_depth - 8));
    929   for (int c = 0; c < 1; ++c) {
    930     const double corr =
    931         aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
    932                                          noise_model->combined_state[c].eqns.x,
    933                                          noise_model->combined_state[c].eqns.n);
    934     if (corr < kCoeffThreshold) return 1;
    935 
    936     const double dx =
    937         1.0 / noise_model->latest_state[c].strength_solver.num_bins;
    938 
    939     const aom_equation_system_t *latest_eqns =
    940         &noise_model->latest_state[c].strength_solver.eqns;
    941     const aom_equation_system_t *combined_eqns =
    942         &noise_model->combined_state[c].strength_solver.eqns;
    943     double diff = 0;
    944     double total_weight = 0;
    945     for (int j = 0; j < latest_eqns->n; ++j) {
    946       double weight = 0;
    947       for (int i = 0; i < latest_eqns->n; ++i) {
    948         weight += latest_eqns->A[i * latest_eqns->n + j];
    949       }
    950       weight = sqrt(weight);
    951       diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
    952       total_weight += weight;
    953     }
    954     if (diff * dx / total_weight > kStrengthThreshold) return 1;
    955   }
    956   return 0;
    957 }
    958 
    959 static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
    960   const int ret = equation_system_solve(&state->eqns);
    961   state->ar_gain = 1.0;
    962   if (!ret) return ret;
    963 
    964   // Update the AR gain from the equation system as it will be used to fit
    965   // the noise strength as a function of intensity.  In the Yule-Walker
    966   // equations, the diagonal should be the variance of the correlated noise.
    967   // In the case of the least squares estimate, there will be some variability
    968   // in the diagonal. So use the mean of the diagonal as the estimate of
    969   // overall variance (this works for least squares or Yule-Walker formulation).
    970   double var = 0;
    971   const int n = state->eqns.n;
    972   for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
    973     var += state->eqns.A[i * n + i] / state->num_observations;
    974   }
    975   var /= (n - is_chroma);
    976 
    977   // Keep track of E(Y^2) = <b, x> + E(X^2)
    978   // In the case that we are using chroma and have an estimate of correlation
    979   // with luma we adjust that estimate slightly to remove the correlated bits by
    980   // subtracting out the last column of a scaled by our correlation estimate
    981   // from b. E(y^2) = <b - A(:, end)*x(end), x>
    982   double sum_covar = 0;
    983   for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
    984     double bi = state->eqns.b[i];
    985     if (is_chroma) {
    986       bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
    987     }
    988     sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
    989   }
    990   // Now, get an estimate of the variance of uncorrelated noise signal and use
    991   // it to determine the gain of the AR filter.
    992   const double noise_var = AOMMAX(var - sum_covar, 1e-6);
    993   state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
    994   return ret;
    995 }
    996 
    997 aom_noise_status_t aom_noise_model_update(
    998     aom_noise_model_t *const noise_model, const uint8_t *const data[3],
    999     const uint8_t *const denoised[3], int w, int h, int stride[3],
   1000     int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
   1001   const int num_blocks_w = (w + block_size - 1) / block_size;
   1002   const int num_blocks_h = (h + block_size - 1) / block_size;
   1003   int y_model_different = 0;
   1004   int num_blocks = 0;
   1005   int i = 0, channel = 0;
   1006 
   1007   if (block_size <= 1) {
   1008     fprintf(stderr, "block_size = %d must be > 1\n", block_size);
   1009     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
   1010   }
   1011 
   1012   if (block_size < noise_model->params.lag * 2 + 1) {
   1013     fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
   1014             noise_model->params.lag * 2 + 1);
   1015     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
   1016   }
   1017 
   1018   // Clear the latest equation system
   1019   for (i = 0; i < 3; ++i) {
   1020     equation_system_clear(&noise_model->latest_state[i].eqns);
   1021     noise_model->latest_state[i].num_observations = 0;
   1022     noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
   1023   }
   1024 
   1025   // Check that we have enough flat blocks
   1026   for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
   1027     if (flat_blocks[i]) {
   1028       num_blocks++;
   1029     }
   1030   }
   1031 
   1032   if (num_blocks <= 1) {
   1033     fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
   1034     return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
   1035   }
   1036 
   1037   for (channel = 0; channel < 3; ++channel) {
   1038     int no_subsampling[2] = { 0, 0 };
   1039     const uint8_t *alt_data = channel > 0 ? data[0] : 0;
   1040     const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
   1041     int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
   1042     const int is_chroma = channel != 0;
   1043     if (!data[channel] || !denoised[channel]) break;
   1044     if (!add_block_observations(noise_model, channel, data[channel],
   1045                                 denoised[channel], w, h, stride[channel], sub,
   1046                                 alt_data, alt_denoised, stride[0], flat_blocks,
   1047                                 block_size, num_blocks_w, num_blocks_h)) {
   1048       fprintf(stderr, "Adding block observation failed\n");
   1049       return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1050     }
   1051 
   1052     if (!ar_equation_system_solve(&noise_model->latest_state[channel],
   1053                                   is_chroma)) {
   1054       if (is_chroma) {
   1055         set_chroma_coefficient_fallback_soln(
   1056             &noise_model->latest_state[channel].eqns);
   1057       } else {
   1058         fprintf(stderr, "Solving latest noise equation system failed %d!\n",
   1059                 channel);
   1060         return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1061       }
   1062     }
   1063 
   1064     add_noise_std_observations(
   1065         noise_model, channel, noise_model->latest_state[channel].eqns.x,
   1066         data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
   1067         stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
   1068 
   1069     if (!aom_noise_strength_solver_solve(
   1070             &noise_model->latest_state[channel].strength_solver)) {
   1071       fprintf(stderr, "Solving latest noise strength failed!\n");
   1072       return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1073     }
   1074 
   1075     // Check noise characteristics and return if error.
   1076     if (channel == 0 &&
   1077         noise_model->combined_state[channel].strength_solver.num_equations >
   1078             0 &&
   1079         is_noise_model_different(noise_model)) {
   1080       y_model_different = 1;
   1081     }
   1082 
   1083     // Don't update the combined stats if the y model is different.
   1084     if (y_model_different) continue;
   1085 
   1086     noise_model->combined_state[channel].num_observations +=
   1087         noise_model->latest_state[channel].num_observations;
   1088     equation_system_add(&noise_model->combined_state[channel].eqns,
   1089                         &noise_model->latest_state[channel].eqns);
   1090     if (!ar_equation_system_solve(&noise_model->combined_state[channel],
   1091                                   is_chroma)) {
   1092       if (is_chroma) {
   1093         set_chroma_coefficient_fallback_soln(
   1094             &noise_model->combined_state[channel].eqns);
   1095       } else {
   1096         fprintf(stderr, "Solving combined noise equation system failed %d!\n",
   1097                 channel);
   1098         return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1099       }
   1100     }
   1101 
   1102     noise_strength_solver_add(
   1103         &noise_model->combined_state[channel].strength_solver,
   1104         &noise_model->latest_state[channel].strength_solver);
   1105 
   1106     if (!aom_noise_strength_solver_solve(
   1107             &noise_model->combined_state[channel].strength_solver)) {
   1108       fprintf(stderr, "Solving combined noise strength failed!\n");
   1109       return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1110     }
   1111   }
   1112 
   1113   return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
   1114                            : AOM_NOISE_STATUS_OK;
   1115 }
   1116 
   1117 void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
   1118   for (int c = 0; c < 3; c++) {
   1119     equation_system_copy(&noise_model->combined_state[c].eqns,
   1120                          &noise_model->latest_state[c].eqns);
   1121     equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
   1122                          &noise_model->latest_state[c].strength_solver.eqns);
   1123     noise_model->combined_state[c].strength_solver.num_equations =
   1124         noise_model->latest_state[c].strength_solver.num_equations;
   1125     noise_model->combined_state[c].num_observations =
   1126         noise_model->latest_state[c].num_observations;
   1127     noise_model->combined_state[c].ar_gain =
   1128         noise_model->latest_state[c].ar_gain;
   1129   }
   1130 }
   1131 
   1132 int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
   1133                                          aom_film_grain_t *film_grain) {
   1134   if (noise_model->params.lag > 3) {
   1135     fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
   1136     return 0;
   1137   }
   1138   uint16_t random_seed = film_grain->random_seed;
   1139   memset(film_grain, 0, sizeof(*film_grain));
   1140   film_grain->random_seed = random_seed;
   1141 
   1142   film_grain->apply_grain = 1;
   1143   film_grain->update_parameters = 1;
   1144 
   1145   film_grain->ar_coeff_lag = noise_model->params.lag;
   1146 
   1147   // Convert the scaling functions to 8 bit values
   1148   aom_noise_strength_lut_t scaling_points[3];
   1149   aom_noise_strength_solver_fit_piecewise(
   1150       &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
   1151   aom_noise_strength_solver_fit_piecewise(
   1152       &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
   1153   aom_noise_strength_solver_fit_piecewise(
   1154       &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
   1155 
   1156   // Both the domain and the range of the scaling functions in the film_grain
   1157   // are normalized to 8-bit (e.g., they are implicitly scaled during grain
   1158   // synthesis).
   1159   const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
   1160   double max_scaling_value = 1e-4;
   1161   for (int c = 0; c < 3; ++c) {
   1162     for (int i = 0; i < scaling_points[c].num_points; ++i) {
   1163       scaling_points[c].points[i][0] =
   1164           AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
   1165       scaling_points[c].points[i][1] =
   1166           AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
   1167       max_scaling_value =
   1168           AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
   1169     }
   1170   }
   1171 
   1172   // Scaling_shift values are in the range [8,11]
   1173   const int max_scaling_value_log2 =
   1174       clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
   1175   film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
   1176 
   1177   const double scale_factor = 1 << (8 - max_scaling_value_log2);
   1178   film_grain->num_y_points = scaling_points[0].num_points;
   1179   film_grain->num_cb_points = scaling_points[1].num_points;
   1180   film_grain->num_cr_points = scaling_points[2].num_points;
   1181 
   1182   int(*film_grain_scaling[3])[2] = {
   1183     film_grain->scaling_points_y,
   1184     film_grain->scaling_points_cb,
   1185     film_grain->scaling_points_cr,
   1186   };
   1187   for (int c = 0; c < 3; c++) {
   1188     for (int i = 0; i < scaling_points[c].num_points; ++i) {
   1189       film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
   1190       film_grain_scaling[c][i][1] = clamp(
   1191           (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
   1192     }
   1193   }
   1194   aom_noise_strength_lut_free(scaling_points + 0);
   1195   aom_noise_strength_lut_free(scaling_points + 1);
   1196   aom_noise_strength_lut_free(scaling_points + 2);
   1197 
   1198   // Convert the ar_coeffs into 8-bit values
   1199   const int n_coeff = noise_model->combined_state[0].eqns.n;
   1200   double max_coeff = 1e-4, min_coeff = -1e-4;
   1201   double y_corr[2] = { 0, 0 };
   1202   double avg_luma_strength = 0;
   1203   for (int c = 0; c < 3; c++) {
   1204     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
   1205     for (int i = 0; i < n_coeff; ++i) {
   1206       max_coeff = AOMMAX(max_coeff, eqns->x[i]);
   1207       min_coeff = AOMMIN(min_coeff, eqns->x[i]);
   1208     }
   1209     // Since the correlation between luma/chroma was computed in an already
   1210     // scaled space, we adjust it in the un-scaled space.
   1211     aom_noise_strength_solver_t *solver =
   1212         &noise_model->combined_state[c].strength_solver;
   1213     // Compute a weighted average of the strength for the channel.
   1214     double average_strength = 0, total_weight = 0;
   1215     for (int i = 0; i < solver->eqns.n; ++i) {
   1216       double w = 0;
   1217       for (int j = 0; j < solver->eqns.n; ++j) {
   1218         w += solver->eqns.A[i * solver->eqns.n + j];
   1219       }
   1220       w = sqrt(w);
   1221       average_strength += solver->eqns.x[i] * w;
   1222       total_weight += w;
   1223     }
   1224     if (total_weight == 0)
   1225       average_strength = 1;
   1226     else
   1227       average_strength /= total_weight;
   1228     if (c == 0) {
   1229       avg_luma_strength = average_strength;
   1230     } else {
   1231       y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
   1232       max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
   1233       min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
   1234     }
   1235   }
   1236   // Shift value: AR coeffs range (values 6-9)
   1237   // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
   1238   film_grain->ar_coeff_shift =
   1239       clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
   1240             6, 9);
   1241   double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
   1242   int *ar_coeffs[3] = {
   1243     film_grain->ar_coeffs_y,
   1244     film_grain->ar_coeffs_cb,
   1245     film_grain->ar_coeffs_cr,
   1246   };
   1247   for (int c = 0; c < 3; ++c) {
   1248     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
   1249     for (int i = 0; i < n_coeff; ++i) {
   1250       ar_coeffs[c][i] =
   1251           clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
   1252     }
   1253     if (c > 0) {
   1254       ar_coeffs[c][n_coeff] =
   1255           clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
   1256     }
   1257   }
   1258 
   1259   // At the moment, the noise modeling code assumes that the chroma scaling
   1260   // functions are a function of luma.
   1261   film_grain->cb_mult = 128;       // 8 bits
   1262   film_grain->cb_luma_mult = 192;  // 8 bits
   1263   film_grain->cb_offset = 256;     // 9 bits
   1264 
   1265   film_grain->cr_mult = 128;       // 8 bits
   1266   film_grain->cr_luma_mult = 192;  // 8 bits
   1267   film_grain->cr_offset = 256;     // 9 bits
   1268 
   1269   film_grain->chroma_scaling_from_luma = 0;
   1270   film_grain->grain_scale_shift = 0;
   1271   film_grain->overlap_flag = 1;
   1272   return 1;
   1273 }
   1274 
   1275 static void pointwise_multiply(const float *a, float *b, int n) {
   1276   for (int i = 0; i < n; ++i) {
   1277     b[i] *= a[i];
   1278   }
   1279 }
   1280 
   1281 static float *get_half_cos_window(int block_size) {
   1282   float *window_function =
   1283       (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
   1284   for (int y = 0; y < block_size; ++y) {
   1285     const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
   1286     for (int x = 0; x < block_size; ++x) {
   1287       const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
   1288       window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
   1289     }
   1290   }
   1291   return window_function;
   1292 }
   1293 
   1294 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                               \
   1295   static void dither_and_quantize_##suffix(                                 \
   1296       float *result, int result_stride, INT_TYPE *denoised, int w, int h,   \
   1297       int stride, int chroma_sub_w, int chroma_sub_h, int block_size,       \
   1298       float block_normalization) {                                          \
   1299     for (int y = 0; y < (h >> chroma_sub_h); ++y) {                         \
   1300       for (int x = 0; x < (w >> chroma_sub_w); ++x) {                       \
   1301         const int result_idx =                                              \
   1302             (y + (block_size >> chroma_sub_h)) * result_stride + x +        \
   1303             (block_size >> chroma_sub_w);                                   \
   1304         INT_TYPE new_val = (INT_TYPE)AOMMIN(                                \
   1305             AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),     \
   1306             block_normalization);                                           \
   1307         const float err =                                                   \
   1308             -(((float)new_val) / block_normalization - result[result_idx]); \
   1309         denoised[y * stride + x] = new_val;                                 \
   1310         if (x + 1 < (w >> chroma_sub_w)) {                                  \
   1311           result[result_idx + 1] += err * 7.0f / 16.0f;                     \
   1312         }                                                                   \
   1313         if (y + 1 < (h >> chroma_sub_h)) {                                  \
   1314           if (x > 0) {                                                      \
   1315             result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;   \
   1316           }                                                                 \
   1317           result[result_idx + result_stride] += err * 5.0f / 16.0f;         \
   1318           if (x + 1 < (w >> chroma_sub_w)) {                                \
   1319             result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;   \
   1320           }                                                                 \
   1321         }                                                                   \
   1322       }                                                                     \
   1323     }                                                                       \
   1324   }
   1325 
   1326 DITHER_AND_QUANTIZE(uint8_t, lowbd);
   1327 DITHER_AND_QUANTIZE(uint16_t, highbd);
   1328 
   1329 int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
   1330                           int w, int h, int stride[3], int chroma_sub[2],
   1331                           float *noise_psd[3], int block_size, int bit_depth,
   1332                           int use_highbd) {
   1333   float *plane = NULL, *block = NULL, *window_full = NULL,
   1334         *window_chroma = NULL;
   1335   double *block_d = NULL, *plane_d = NULL;
   1336   struct aom_noise_tx_t *tx_full = NULL;
   1337   struct aom_noise_tx_t *tx_chroma = NULL;
   1338   const int num_blocks_w = (w + block_size - 1) / block_size;
   1339   const int num_blocks_h = (h + block_size - 1) / block_size;
   1340   const int result_stride = (num_blocks_w + 2) * block_size;
   1341   const int result_height = (num_blocks_h + 2) * block_size;
   1342   float *result = NULL;
   1343   int init_success = 1;
   1344   aom_flat_block_finder_t block_finder_full;
   1345   aom_flat_block_finder_t block_finder_chroma;
   1346   const float kBlockNormalization = (float)((1 << bit_depth) - 1);
   1347   if (chroma_sub[0] != chroma_sub[1]) {
   1348     fprintf(stderr,
   1349             "aom_wiener_denoise_2d doesn't handle different chroma "
   1350             "subsampling");
   1351     return 0;
   1352   }
   1353   init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
   1354                                              bit_depth, use_highbd);
   1355   result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
   1356                                sizeof(*result));
   1357   plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
   1358   block =
   1359       (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
   1360   block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
   1361   plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
   1362   window_full = get_half_cos_window(block_size);
   1363   tx_full = aom_noise_tx_malloc(block_size);
   1364 
   1365   if (chroma_sub[0] != 0) {
   1366     init_success &= aom_flat_block_finder_init(&block_finder_chroma,
   1367                                                block_size >> chroma_sub[0],
   1368                                                bit_depth, use_highbd);
   1369     window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
   1370     tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
   1371   } else {
   1372     window_chroma = window_full;
   1373     tx_chroma = tx_full;
   1374   }
   1375 
   1376   init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
   1377                   (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
   1378                   (window_full != NULL) && (window_chroma != NULL) &&
   1379                   (result != NULL);
   1380   for (int c = init_success ? 0 : 3; c < 3; ++c) {
   1381     float *window_function = c == 0 ? window_full : window_chroma;
   1382     aom_flat_block_finder_t *block_finder = &block_finder_full;
   1383     const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
   1384     const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
   1385     struct aom_noise_tx_t *tx =
   1386         (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
   1387     if (!data[c] || !denoised[c]) continue;
   1388     if (c > 0 && chroma_sub[0] != 0) {
   1389       block_finder = &block_finder_chroma;
   1390     }
   1391     memset(result, 0, sizeof(*result) * result_stride * result_height);
   1392     // Do overlapped block processing (half overlapped). The block rows can
   1393     // easily be done in parallel
   1394     for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
   1395          offsy += (block_size >> chroma_sub_h) / 2) {
   1396       for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
   1397            offsx += (block_size >> chroma_sub_w) / 2) {
   1398         // Pad the boundary when processing each block-set.
   1399         for (int by = -1; by < num_blocks_h; ++by) {
   1400           for (int bx = -1; bx < num_blocks_w; ++bx) {
   1401             const int pixels_per_block =
   1402                 (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
   1403             aom_flat_block_finder_extract_block(
   1404                 block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
   1405                 stride[c], bx * (block_size >> chroma_sub_w) + offsx,
   1406                 by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
   1407             for (int j = 0; j < pixels_per_block; ++j) {
   1408               block[j] = (float)block_d[j];
   1409               plane[j] = (float)plane_d[j];
   1410             }
   1411             pointwise_multiply(window_function, block, pixels_per_block);
   1412             aom_noise_tx_forward(tx, block);
   1413             aom_noise_tx_filter(tx, noise_psd[c]);
   1414             aom_noise_tx_inverse(tx, block);
   1415 
   1416             // Apply window function to the plane approximation (we will apply
   1417             // it to the sum of plane + block when composing the results).
   1418             pointwise_multiply(window_function, plane, pixels_per_block);
   1419 
   1420             for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
   1421               const int y_result =
   1422                   y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
   1423               for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
   1424                 const int x_result =
   1425                     x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
   1426                 result[y_result * result_stride + x_result] +=
   1427                     (block[y * (block_size >> chroma_sub_w) + x] +
   1428                      plane[y * (block_size >> chroma_sub_w) + x]) *
   1429                     window_function[y * (block_size >> chroma_sub_w) + x];
   1430               }
   1431             }
   1432           }
   1433         }
   1434       }
   1435     }
   1436     if (use_highbd) {
   1437       dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
   1438                                  w, h, stride[c], chroma_sub_w, chroma_sub_h,
   1439                                  block_size, kBlockNormalization);
   1440     } else {
   1441       dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
   1442                                 stride[c], chroma_sub_w, chroma_sub_h,
   1443                                 block_size, kBlockNormalization);
   1444     }
   1445   }
   1446   aom_free(result);
   1447   aom_free(plane);
   1448   aom_free(block);
   1449   aom_free(plane_d);
   1450   aom_free(block_d);
   1451   aom_free(window_full);
   1452 
   1453   aom_noise_tx_free(tx_full);
   1454 
   1455   aom_flat_block_finder_free(&block_finder_full);
   1456   if (chroma_sub[0] != 0) {
   1457     aom_flat_block_finder_free(&block_finder_chroma);
   1458     aom_free(window_chroma);
   1459     aom_noise_tx_free(tx_chroma);
   1460   }
   1461   return init_success;
   1462 }
   1463 
   1464 struct aom_denoise_and_model_t {
   1465   int block_size;
   1466   int bit_depth;
   1467   float noise_level;
   1468 
   1469   // Size of current denoised buffer and flat_block buffer
   1470   int width;
   1471   int height;
   1472   int y_stride;
   1473   int uv_stride;
   1474   int num_blocks_w;
   1475   int num_blocks_h;
   1476 
   1477   // Buffers for image and noise_psd allocated on the fly
   1478   float *noise_psd[3];
   1479   uint8_t *denoised[3];
   1480   uint8_t *flat_blocks;
   1481 
   1482   aom_flat_block_finder_t flat_block_finder;
   1483   aom_noise_model_t noise_model;
   1484 };
   1485 
   1486 struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
   1487                                                             int block_size,
   1488                                                             float noise_level) {
   1489   struct aom_denoise_and_model_t *ctx =
   1490       (struct aom_denoise_and_model_t *)aom_malloc(
   1491           sizeof(struct aom_denoise_and_model_t));
   1492   if (!ctx) {
   1493     fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
   1494     return NULL;
   1495   }
   1496   memset(ctx, 0, sizeof(*ctx));
   1497 
   1498   ctx->block_size = block_size;
   1499   ctx->noise_level = noise_level;
   1500   ctx->bit_depth = bit_depth;
   1501 
   1502   ctx->noise_psd[0] =
   1503       aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
   1504   ctx->noise_psd[1] =
   1505       aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
   1506   ctx->noise_psd[2] =
   1507       aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
   1508   if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
   1509     fprintf(stderr, "Unable to allocate noise PSD buffers\n");
   1510     aom_denoise_and_model_free(ctx);
   1511     return NULL;
   1512   }
   1513   return ctx;
   1514 }
   1515 
   1516 void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
   1517   aom_free(ctx->flat_blocks);
   1518   for (int i = 0; i < 3; ++i) {
   1519     aom_free(ctx->denoised[i]);
   1520     aom_free(ctx->noise_psd[i]);
   1521   }
   1522   aom_noise_model_free(&ctx->noise_model);
   1523   aom_flat_block_finder_free(&ctx->flat_block_finder);
   1524   aom_free(ctx);
   1525 }
   1526 
   1527 static int denoise_and_model_realloc_if_necessary(
   1528     struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
   1529   if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
   1530       ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
   1531     return 1;
   1532   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
   1533   const int block_size = ctx->block_size;
   1534 
   1535   ctx->width = sd->y_width;
   1536   ctx->height = sd->y_height;
   1537   ctx->y_stride = sd->y_stride;
   1538   ctx->uv_stride = sd->uv_stride;
   1539 
   1540   for (int i = 0; i < 3; ++i) {
   1541     aom_free(ctx->denoised[i]);
   1542     ctx->denoised[i] = NULL;
   1543   }
   1544   aom_free(ctx->flat_blocks);
   1545   ctx->flat_blocks = NULL;
   1546 
   1547   ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
   1548   ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
   1549   ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
   1550   if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
   1551     fprintf(stderr, "Unable to allocate denoise buffers\n");
   1552     return 0;
   1553   }
   1554   ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
   1555   ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
   1556   ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
   1557 
   1558   aom_flat_block_finder_free(&ctx->flat_block_finder);
   1559   if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
   1560                                   ctx->bit_depth, use_highbd)) {
   1561     fprintf(stderr, "Unable to init flat block finder\n");
   1562     return 0;
   1563   }
   1564 
   1565   const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
   1566                                             ctx->bit_depth, use_highbd };
   1567   aom_noise_model_free(&ctx->noise_model);
   1568   if (!aom_noise_model_init(&ctx->noise_model, params)) {
   1569     fprintf(stderr, "Unable to init noise model\n");
   1570     return 0;
   1571   }
   1572 
   1573   // Simply use a flat PSD (although we could use the flat blocks to estimate
   1574   // PSD) those to estimate an actual noise PSD)
   1575   const float y_noise_level =
   1576       aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
   1577   const float uv_noise_level = aom_noise_psd_get_default_value(
   1578       ctx->block_size >> sd->subsampling_x, ctx->noise_level);
   1579   for (int i = 0; i < block_size * block_size; ++i) {
   1580     ctx->noise_psd[0][i] = y_noise_level;
   1581     ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
   1582   }
   1583   return 1;
   1584 }
   1585 
   1586 int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
   1587                               YV12_BUFFER_CONFIG *sd,
   1588                               aom_film_grain_t *film_grain) {
   1589   const int block_size = ctx->block_size;
   1590   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
   1591   uint8_t *raw_data[3] = {
   1592     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
   1593     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
   1594     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
   1595   };
   1596   const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
   1597   int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
   1598   int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
   1599 
   1600   if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
   1601     fprintf(stderr, "Unable to realloc buffers\n");
   1602     return 0;
   1603   }
   1604 
   1605   aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
   1606                             sd->y_height, strides[0], ctx->flat_blocks);
   1607 
   1608   if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
   1609                              strides, chroma_sub_log2, ctx->noise_psd,
   1610                              block_size, ctx->bit_depth, use_highbd)) {
   1611     fprintf(stderr, "Unable to denoise image\n");
   1612     return 0;
   1613   }
   1614 
   1615   const aom_noise_status_t status = aom_noise_model_update(
   1616       &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
   1617       sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
   1618       block_size);
   1619   int have_noise_estimate = 0;
   1620   if (status == AOM_NOISE_STATUS_OK) {
   1621     have_noise_estimate = 1;
   1622   } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
   1623     aom_noise_model_save_latest(&ctx->noise_model);
   1624     have_noise_estimate = 1;
   1625   } else {
   1626     // Unable to update noise model; proceed if we have a previous estimate.
   1627     have_noise_estimate =
   1628         (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
   1629   }
   1630 
   1631   film_grain->apply_grain = 0;
   1632   if (have_noise_estimate) {
   1633     if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
   1634       fprintf(stderr, "Unable to get grain parameters.\n");
   1635       return 0;
   1636     }
   1637     if (!film_grain->random_seed) {
   1638       film_grain->random_seed = 7391;
   1639     }
   1640     memcpy(raw_data[0], ctx->denoised[0],
   1641            (strides[0] * sd->y_height) << use_highbd);
   1642     memcpy(raw_data[1], ctx->denoised[1],
   1643            (strides[1] * sd->uv_height) << use_highbd);
   1644     memcpy(raw_data[2], ctx->denoised[2],
   1645            (strides[2] * sd->uv_height) << use_highbd);
   1646   }
   1647   return 1;
   1648 }
   1649