1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <memory.h> 15 #include <math.h> 16 #include <assert.h> 17 18 #include "config/av1_rtcd.h" 19 20 #include "av1/common/warped_motion.h" 21 #include "av1/common/scale.h" 22 23 #define WARP_ERROR_BLOCK 32 24 25 /* clang-format off */ 26 static const int error_measure_lut[512] = { 27 // pow 0.7 28 16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068, 29 16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703, 30 15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335, 31 15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963, 32 14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587, 33 14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206, 34 14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822, 35 13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432, 36 13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038, 37 12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639, 38 12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234, 39 12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823, 40 11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406, 41 11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982, 42 10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552, 43 10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113, 44 10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666, 45 9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211, 46 9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745, 47 8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269, 48 8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780, 49 7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278, 50 7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760, 51 6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225, 52 6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670, 53 5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090, 54 5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480, 55 4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832, 56 3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133, 57 3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359, 58 2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452, 59 1323, 1187, 1045, 894, 731, 550, 339, 0, 60 339, 550, 731, 894, 1045, 1187, 1323, 1452, 61 1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359, 62 2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133, 63 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832, 64 3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480, 65 4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090, 66 5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670, 67 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225, 68 6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760, 69 6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278, 70 7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780, 71 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269, 72 8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745, 73 8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211, 74 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666, 75 9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113, 76 10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552, 77 10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982, 78 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406, 79 11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823, 80 11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234, 81 12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639, 82 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038, 83 13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432, 84 13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822, 85 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206, 86 14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587, 87 14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963, 88 15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335, 89 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703, 90 15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068, 91 16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384, 92 }; 93 /* clang-format on */ 94 95 // For warping, we really use a 6-tap filter, but we do blocks of 8 pixels 96 // at a time. The zoom/rotation/shear in the model are applied to the 97 // "fractional" position of each pixel, which therefore varies within 98 // [-1, 2) * WARPEDPIXEL_PREC_SHIFTS. 99 // We need an extra 2 taps to fit this in, for a total of 8 taps. 100 /* clang-format off */ 101 const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = { 102 #if WARPEDPIXEL_PREC_BITS == 6 103 // [-1, 0) 104 { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 }, 105 { 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 }, 106 { 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 }, 107 { 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 }, 108 { 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 }, 109 { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 }, 110 { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 }, 111 { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 }, 112 { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 }, 113 { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 }, 114 { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 }, 115 { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 }, 116 { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 }, 117 { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 }, 118 { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 }, 119 { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 }, 120 { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 }, 121 { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 }, 122 { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 }, 123 { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 }, 124 { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 }, 125 { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 }, 126 { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 }, 127 { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 }, 128 { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 }, 129 { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 }, 130 { 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 }, 131 { 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 }, 132 { 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 }, 133 { 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 }, 134 { 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 }, 135 { 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 }, 136 137 // [0, 1) 138 { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0}, 139 { 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0}, 140 { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1}, 141 {-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1}, 142 {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1}, 143 {-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1}, 144 {-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1}, 145 {-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1}, 146 {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2}, 147 {-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2}, 148 {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2}, 149 {-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2}, 150 {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2}, 151 {-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2}, 152 {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2}, 153 {-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2}, 154 {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2}, 155 {-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2}, 156 {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2}, 157 {-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2}, 158 {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2}, 159 {-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2}, 160 {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2}, 161 {-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2}, 162 {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2}, 163 {-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1}, 164 {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2}, 165 {-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1}, 166 {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1}, 167 {-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1}, 168 { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0}, 169 { 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0}, 170 171 // [1, 2) 172 { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 }, 173 { 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 }, 174 { 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 }, 175 { 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 }, 176 { 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 }, 177 { 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 }, 178 { 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 }, 179 { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 }, 180 { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 }, 181 { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 }, 182 { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 }, 183 { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 }, 184 { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 }, 185 { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 }, 186 { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 }, 187 { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 }, 188 { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 }, 189 { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 }, 190 { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 }, 191 { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 }, 192 { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 }, 193 { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 }, 194 { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 }, 195 { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 }, 196 { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 }, 197 { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 }, 198 { 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 }, 199 { 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 }, 200 { 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 }, 201 { 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 }, 202 { 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 }, 203 { 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 }, 204 // dummy (replicate row index 191) 205 { 0, 0, 0, 0, 2, 127, - 1, 0 }, 206 207 #elif WARPEDPIXEL_PREC_BITS == 5 208 // [-1, 0) 209 {0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0}, 210 {1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0}, 211 {2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0}, 212 {3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0}, 213 {3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0}, 214 {4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0}, 215 {4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0}, 216 {4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0}, 217 {4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0}, 218 {4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0}, 219 {3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0}, 220 {3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0}, 221 {3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0}, 222 {2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0}, 223 {1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0}, 224 {1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0}, 225 // [0, 1) 226 { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0}, 227 { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1}, 228 {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1}, 229 {-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1}, 230 {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2}, 231 {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2}, 232 {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2}, 233 {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2}, 234 {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2}, 235 {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2}, 236 {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2}, 237 {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2}, 238 {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2}, 239 {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1}, 240 {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1}, 241 { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0}, 242 // [1, 2) 243 {0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0}, 244 {0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1}, 245 {0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2}, 246 {0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3}, 247 {0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3}, 248 {0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3}, 249 {0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4}, 250 {0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4}, 251 {0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4}, 252 {0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4}, 253 {0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4}, 254 {0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3}, 255 {0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3}, 256 {0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2}, 257 {0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1}, 258 {0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1}, 259 // dummy (replicate row index 95) 260 {0, 0, 0, -1, 4, 127, -3, 1}, 261 262 #endif // WARPEDPIXEL_PREC_BITS == 6 263 }; 264 265 /* clang-format on */ 266 267 #define DIV_LUT_PREC_BITS 14 268 #define DIV_LUT_BITS 8 269 #define DIV_LUT_NUM (1 << DIV_LUT_BITS) 270 271 static const uint16_t div_lut[DIV_LUT_NUM + 1] = { 272 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, 273 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, 274 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, 275 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, 276 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, 277 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, 278 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, 279 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, 280 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, 281 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, 282 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, 283 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, 284 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, 285 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, 286 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, 287 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, 288 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, 289 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, 290 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, 291 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, 292 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, 293 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, 294 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, 295 8240, 8224, 8208, 8192, 296 }; 297 298 // Decomposes a divisor D such that 1/D = y/2^shift, where y is returned 299 // at precision of DIV_LUT_PREC_BITS along with the shift. 300 static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) { 301 int64_t f; 302 *shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32 303 : get_msb((unsigned int)D)); 304 // e is obtained from D after resetting the most significant 1 bit. 305 const int64_t e = D - ((uint64_t)1 << *shift); 306 // Get the most significant DIV_LUT_BITS (8) bits of e into f 307 if (*shift > DIV_LUT_BITS) 308 f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS); 309 else 310 f = e << (DIV_LUT_BITS - *shift); 311 assert(f <= DIV_LUT_NUM); 312 *shift += DIV_LUT_PREC_BITS; 313 // Use f as lookup into the precomputed table of multipliers 314 return div_lut[f]; 315 } 316 317 static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) { 318 int32_t f; 319 *shift = get_msb(D); 320 // e is obtained from D after resetting the most significant 1 bit. 321 const int32_t e = D - ((uint32_t)1 << *shift); 322 // Get the most significant DIV_LUT_BITS (8) bits of e into f 323 if (*shift > DIV_LUT_BITS) 324 f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS); 325 else 326 f = e << (DIV_LUT_BITS - *shift); 327 assert(f <= DIV_LUT_NUM); 328 *shift += DIV_LUT_PREC_BITS; 329 // Use f as lookup into the precomputed table of multipliers 330 return div_lut[f]; 331 } 332 333 static int is_affine_valid(const WarpedMotionParams *const wm) { 334 const int32_t *mat = wm->wmmat; 335 return (mat[2] > 0); 336 } 337 338 static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma, 339 int16_t delta) { 340 if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) || 341 (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS))) 342 return 0; 343 else 344 return 1; 345 } 346 347 // Returns 1 on success or 0 on an invalid affine set 348 int get_shear_params(WarpedMotionParams *wm) { 349 const int32_t *mat = wm->wmmat; 350 if (!is_affine_valid(wm)) return 0; 351 wm->alpha = 352 clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX); 353 wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX); 354 int16_t shift; 355 int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1); 356 int64_t v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y; 357 wm->gamma = 358 clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX); 359 v = ((int64_t)mat[3] * mat[4]) * y; 360 wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) - 361 (1 << WARPEDMODEL_PREC_BITS), 362 INT16_MIN, INT16_MAX); 363 364 wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) * 365 (1 << WARP_PARAM_REDUCE_BITS); 366 wm->beta = ROUND_POWER_OF_TWO_SIGNED(wm->beta, WARP_PARAM_REDUCE_BITS) * 367 (1 << WARP_PARAM_REDUCE_BITS); 368 wm->gamma = ROUND_POWER_OF_TWO_SIGNED(wm->gamma, WARP_PARAM_REDUCE_BITS) * 369 (1 << WARP_PARAM_REDUCE_BITS); 370 wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) * 371 (1 << WARP_PARAM_REDUCE_BITS); 372 373 if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta)) 374 return 0; 375 376 return 1; 377 } 378 379 static INLINE int highbd_error_measure(int err, int bd) { 380 const int b = bd - 8; 381 const int bmask = (1 << b) - 1; 382 const int v = (1 << b); 383 err = abs(err); 384 const int e1 = err >> b; 385 const int e2 = err & bmask; 386 return error_measure_lut[255 + e1] * (v - e2) + 387 error_measure_lut[256 + e1] * e2; 388 } 389 390 /* Note: For an explanation of the warp algorithm, and some notes on bit widths 391 for hardware implementations, see the comments above av1_warp_affine_c 392 */ 393 void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, 394 int width, int height, int stride, uint16_t *pred, 395 int p_col, int p_row, int p_width, int p_height, 396 int p_stride, int subsampling_x, 397 int subsampling_y, int bd, 398 ConvolveParams *conv_params, int16_t alpha, 399 int16_t beta, int16_t gamma, int16_t delta) { 400 int32_t tmp[15 * 8]; 401 const int reduce_bits_horiz = 402 conv_params->round_0 + 403 AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0); 404 const int reduce_bits_vert = conv_params->is_compound 405 ? conv_params->round_1 406 : 2 * FILTER_BITS - reduce_bits_horiz; 407 const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; 408 const int offset_bits_horiz = bd + FILTER_BITS - 1; 409 const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; 410 const int round_bits = 411 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 412 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 413 (void)max_bits_horiz; 414 assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); 415 416 for (int i = p_row; i < p_row + p_height; i += 8) { 417 for (int j = p_col; j < p_col + p_width; j += 8) { 418 // Calculate the center of this 8x8 block, 419 // project to luma coordinates (if in a subsampled chroma plane), 420 // apply the affine transformation, 421 // then convert back to the original coordinates (if necessary) 422 const int32_t src_x = (j + 4) << subsampling_x; 423 const int32_t src_y = (i + 4) << subsampling_y; 424 const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; 425 const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; 426 const int32_t x4 = dst_x >> subsampling_x; 427 const int32_t y4 = dst_y >> subsampling_y; 428 429 const int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; 430 int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); 431 const int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; 432 int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); 433 434 sx4 += alpha * (-4) + beta * (-4); 435 sy4 += gamma * (-4) + delta * (-4); 436 437 sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); 438 sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); 439 440 // Horizontal filter 441 for (int k = -7; k < 8; ++k) { 442 const int iy = clamp(iy4 + k, 0, height - 1); 443 444 int sx = sx4 + beta * (k + 4); 445 for (int l = -4; l < 4; ++l) { 446 int ix = ix4 + l - 3; 447 const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + 448 WARPEDPIXEL_PREC_SHIFTS; 449 assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); 450 const int16_t *coeffs = warped_filter[offs]; 451 452 int32_t sum = 1 << offset_bits_horiz; 453 for (int m = 0; m < 8; ++m) { 454 const int sample_x = clamp(ix + m, 0, width - 1); 455 sum += ref[iy * stride + sample_x] * coeffs[m]; 456 } 457 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); 458 assert(0 <= sum && sum < (1 << max_bits_horiz)); 459 tmp[(k + 7) * 8 + (l + 4)] = sum; 460 sx += alpha; 461 } 462 } 463 464 // Vertical filter 465 for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { 466 int sy = sy4 + delta * (k + 4); 467 for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { 468 const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + 469 WARPEDPIXEL_PREC_SHIFTS; 470 assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); 471 const int16_t *coeffs = warped_filter[offs]; 472 473 int32_t sum = 1 << offset_bits_vert; 474 for (int m = 0; m < 8; ++m) { 475 sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; 476 } 477 478 if (conv_params->is_compound) { 479 CONV_BUF_TYPE *p = 480 &conv_params 481 ->dst[(i - p_row + k + 4) * conv_params->dst_stride + 482 (j - p_col + l + 4)]; 483 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); 484 if (conv_params->do_average) { 485 uint16_t *dst16 = 486 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; 487 int32_t tmp32 = *p; 488 if (conv_params->use_dist_wtd_comp_avg) { 489 tmp32 = tmp32 * conv_params->fwd_offset + 490 sum * conv_params->bck_offset; 491 tmp32 = tmp32 >> DIST_PRECISION_BITS; 492 } else { 493 tmp32 += sum; 494 tmp32 = tmp32 >> 1; 495 } 496 tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) - 497 (1 << (offset_bits - conv_params->round_1 - 1)); 498 *dst16 = 499 clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd); 500 } else { 501 *p = sum; 502 } 503 } else { 504 uint16_t *p = 505 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; 506 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); 507 assert(0 <= sum && sum < (1 << (bd + 2))); 508 *p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd); 509 } 510 sy += gamma; 511 } 512 } 513 } 514 } 515 } 516 517 static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8, 518 int width, int height, int stride, 519 const uint8_t *const pred8, int p_col, int p_row, 520 int p_width, int p_height, int p_stride, 521 int subsampling_x, int subsampling_y, int bd, 522 ConvolveParams *conv_params) { 523 assert(wm->wmtype <= AFFINE); 524 if (wm->wmtype == ROTZOOM) { 525 wm->wmmat[5] = wm->wmmat[2]; 526 wm->wmmat[4] = -wm->wmmat[3]; 527 } 528 const int32_t *const mat = wm->wmmat; 529 const int16_t alpha = wm->alpha; 530 const int16_t beta = wm->beta; 531 const int16_t gamma = wm->gamma; 532 const int16_t delta = wm->delta; 533 534 const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8); 535 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); 536 av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, 537 p_width, p_height, p_stride, subsampling_x, 538 subsampling_y, bd, conv_params, alpha, beta, gamma, 539 delta); 540 } 541 542 static int64_t highbd_frame_error(const uint16_t *const ref, int stride, 543 const uint16_t *const dst, int p_width, 544 int p_height, int p_stride, int bd) { 545 int64_t sum_error = 0; 546 for (int i = 0; i < p_height; ++i) { 547 for (int j = 0; j < p_width; ++j) { 548 sum_error += 549 highbd_error_measure(dst[j + i * p_stride] - ref[j + i * stride], bd); 550 } 551 } 552 return sum_error; 553 } 554 555 static int64_t highbd_warp_error( 556 WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height, 557 int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width, 558 int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, 559 int64_t best_error) { 560 int64_t gm_sumerr = 0; 561 const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK); 562 const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK); 563 uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK]; 564 565 ConvolveParams conv_params = get_conv_params(0, 0, bd); 566 conv_params.use_dist_wtd_comp_avg = 0; 567 for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) { 568 for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) { 569 // avoid warping extra 8x8 blocks in the padded region of the frame 570 // when p_width and p_height are not multiples of WARP_ERROR_BLOCK 571 const int warp_w = AOMMIN(error_bsize_w, p_col + p_width - j); 572 const int warp_h = AOMMIN(error_bsize_h, p_row + p_height - i); 573 highbd_warp_plane(wm, ref8, width, height, stride, 574 CONVERT_TO_BYTEPTR(tmp), j, i, warp_w, warp_h, 575 WARP_ERROR_BLOCK, subsampling_x, subsampling_y, bd, 576 &conv_params); 577 578 gm_sumerr += highbd_frame_error( 579 tmp, WARP_ERROR_BLOCK, CONVERT_TO_SHORTPTR(dst8) + j + i * p_stride, 580 warp_w, warp_h, p_stride, bd); 581 if (gm_sumerr > best_error) return gm_sumerr; 582 } 583 } 584 return gm_sumerr; 585 } 586 587 static INLINE int error_measure(int err) { 588 return error_measure_lut[255 + err]; 589 } 590 591 /* The warp filter for ROTZOOM and AFFINE models works as follows: 592 * Split the input into 8x8 blocks 593 * For each block, project the point (4, 4) within the block, to get the 594 overall block position. Split into integer and fractional coordinates, 595 maintaining full WARPEDMODEL precision 596 * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a 597 variable horizontal offset. This means that, while the rows of the 598 intermediate buffer align with the rows of the *reference* image, the 599 columns align with the columns of the *destination* image. 600 * Filter vertically: Generate the output block (up to 8x8 pixels, but if the 601 destination is too small we crop the output at this stage). Each pixel has 602 a variable vertical offset, so that the resulting rows are aligned with 603 the rows of the destination image. 604 605 To accomplish these alignments, we factor the warp matrix as a 606 product of two shear / asymmetric zoom matrices: 607 / a b \ = / 1 0 \ * / 1+alpha beta \ 608 \ c d / \ gamma 1+delta / \ 0 1 / 609 where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively. 610 The horizontal shear (with alpha and beta) is applied first, 611 then the vertical shear (with gamma and delta) is applied second. 612 613 The only limitation is that, to fit this in a fixed 8-tap filter size, 614 the fractional pixel offsets must be at most +-1. Since the horizontal filter 615 generates 15 rows of 8 columns, and the initial point we project is at (4, 4) 616 within the block, the parameters must satisfy 617 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 4 * |delta| <= 1 618 for this filter to be applicable. 619 620 Note: This function assumes that the caller has done all of the relevant 621 checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5] 622 are set appropriately (if using a ROTZOOM model), and that alpha, beta, 623 gamma, delta are all in range. 624 625 TODO(david.barker): Maybe support scaled references? 626 */ 627 /* A note on hardware implementation: 628 The warp filter is intended to be implementable using the same hardware as 629 the high-precision convolve filters from the loop-restoration and 630 convolve-round experiments. 631 632 For a single filter stage, considering all of the coefficient sets for the 633 warp filter and the regular convolution filter, an input in the range 634 [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)] 635 before rounding. 636 637 Allowing for some changes to the filter coefficient sets, call the range 638 [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k, 639 we can replace this by the range [0, 256 * 2^k], which can be stored in an 640 unsigned value with 8 + k bits. 641 642 This allows the derivation of the appropriate bit widths and offsets for 643 the various intermediate values: If 644 645 F := FILTER_BITS = 7 (or else the above ranges need adjusting) 646 So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit 647 intermediate value. 648 H := ROUND0_BITS 649 V := VERSHEAR_REDUCE_PREC_BITS 650 (and note that we must have H + V = 2*F for the output to have the same 651 scale as the input) 652 653 then we end up with the following offsets and ranges: 654 Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a 655 uint{bd + F + 1} 656 After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}. 657 Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a 658 uint{bd + 2*F + 2 - H} 659 After rounding: The final value, before undoing the offset, fits into a 660 uint{bd + 2}. 661 662 Then we need to undo the offsets before clamping to a pixel. Note that, 663 if we do this at the end, the amount to subtract is actually independent 664 of H and V: 665 666 offset to subtract = (1 << ((bd + F - 1) - H + F - V)) + 667 (1 << ((bd + 2*F - H) - V)) 668 == (1 << (bd - 1)) + (1 << bd) 669 670 This allows us to entirely avoid clamping in both the warp filter and 671 the convolve-round experiment. As of the time of writing, the Wiener filter 672 from loop-restoration can encode a central coefficient up to 216, which 673 leads to a maximum value of about 282 * 2^k after applying the offset. 674 So in that case we still need to clamp. 675 */ 676 void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, 677 int height, int stride, uint8_t *pred, int p_col, 678 int p_row, int p_width, int p_height, int p_stride, 679 int subsampling_x, int subsampling_y, 680 ConvolveParams *conv_params, int16_t alpha, int16_t beta, 681 int16_t gamma, int16_t delta) { 682 int32_t tmp[15 * 8]; 683 const int bd = 8; 684 const int reduce_bits_horiz = conv_params->round_0; 685 const int reduce_bits_vert = conv_params->is_compound 686 ? conv_params->round_1 687 : 2 * FILTER_BITS - reduce_bits_horiz; 688 const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; 689 const int offset_bits_horiz = bd + FILTER_BITS - 1; 690 const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; 691 const int round_bits = 692 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 693 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 694 (void)max_bits_horiz; 695 assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); 696 assert(IMPLIES(conv_params->do_average, conv_params->is_compound)); 697 698 for (int i = p_row; i < p_row + p_height; i += 8) { 699 for (int j = p_col; j < p_col + p_width; j += 8) { 700 // Calculate the center of this 8x8 block, 701 // project to luma coordinates (if in a subsampled chroma plane), 702 // apply the affine transformation, 703 // then convert back to the original coordinates (if necessary) 704 const int32_t src_x = (j + 4) << subsampling_x; 705 const int32_t src_y = (i + 4) << subsampling_y; 706 const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; 707 const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; 708 const int32_t x4 = dst_x >> subsampling_x; 709 const int32_t y4 = dst_y >> subsampling_y; 710 711 int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; 712 int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); 713 int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; 714 int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); 715 716 sx4 += alpha * (-4) + beta * (-4); 717 sy4 += gamma * (-4) + delta * (-4); 718 719 sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); 720 sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); 721 722 // Horizontal filter 723 for (int k = -7; k < 8; ++k) { 724 // Clamp to top/bottom edge of the frame 725 const int iy = clamp(iy4 + k, 0, height - 1); 726 727 int sx = sx4 + beta * (k + 4); 728 729 for (int l = -4; l < 4; ++l) { 730 int ix = ix4 + l - 3; 731 // At this point, sx = sx4 + alpha * l + beta * k 732 const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + 733 WARPEDPIXEL_PREC_SHIFTS; 734 assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); 735 const int16_t *coeffs = warped_filter[offs]; 736 737 int32_t sum = 1 << offset_bits_horiz; 738 for (int m = 0; m < 8; ++m) { 739 // Clamp to left/right edge of the frame 740 const int sample_x = clamp(ix + m, 0, width - 1); 741 742 sum += ref[iy * stride + sample_x] * coeffs[m]; 743 } 744 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); 745 assert(0 <= sum && sum < (1 << max_bits_horiz)); 746 tmp[(k + 7) * 8 + (l + 4)] = sum; 747 sx += alpha; 748 } 749 } 750 751 // Vertical filter 752 for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { 753 int sy = sy4 + delta * (k + 4); 754 for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { 755 // At this point, sy = sy4 + gamma * l + delta * k 756 const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + 757 WARPEDPIXEL_PREC_SHIFTS; 758 assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); 759 const int16_t *coeffs = warped_filter[offs]; 760 761 int32_t sum = 1 << offset_bits_vert; 762 for (int m = 0; m < 8; ++m) { 763 sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; 764 } 765 766 if (conv_params->is_compound) { 767 CONV_BUF_TYPE *p = 768 &conv_params 769 ->dst[(i - p_row + k + 4) * conv_params->dst_stride + 770 (j - p_col + l + 4)]; 771 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); 772 if (conv_params->do_average) { 773 uint8_t *dst8 = 774 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; 775 int32_t tmp32 = *p; 776 if (conv_params->use_dist_wtd_comp_avg) { 777 tmp32 = tmp32 * conv_params->fwd_offset + 778 sum * conv_params->bck_offset; 779 tmp32 = tmp32 >> DIST_PRECISION_BITS; 780 } else { 781 tmp32 += sum; 782 tmp32 = tmp32 >> 1; 783 } 784 tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) - 785 (1 << (offset_bits - conv_params->round_1 - 1)); 786 *dst8 = clip_pixel(ROUND_POWER_OF_TWO(tmp32, round_bits)); 787 } else { 788 *p = sum; 789 } 790 } else { 791 uint8_t *p = 792 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; 793 sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); 794 assert(0 <= sum && sum < (1 << (bd + 2))); 795 *p = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd)); 796 } 797 sy += gamma; 798 } 799 } 800 } 801 } 802 } 803 804 static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, 805 int width, int height, int stride, uint8_t *pred, 806 int p_col, int p_row, int p_width, int p_height, 807 int p_stride, int subsampling_x, int subsampling_y, 808 ConvolveParams *conv_params) { 809 assert(wm->wmtype <= AFFINE); 810 if (wm->wmtype == ROTZOOM) { 811 wm->wmmat[5] = wm->wmmat[2]; 812 wm->wmmat[4] = -wm->wmmat[3]; 813 } 814 const int32_t *const mat = wm->wmmat; 815 const int16_t alpha = wm->alpha; 816 const int16_t beta = wm->beta; 817 const int16_t gamma = wm->gamma; 818 const int16_t delta = wm->delta; 819 av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, 820 p_height, p_stride, subsampling_x, subsampling_y, conv_params, 821 alpha, beta, gamma, delta); 822 } 823 824 static int64_t frame_error(const uint8_t *const ref, int stride, 825 const uint8_t *const dst, int p_width, int p_height, 826 int p_stride) { 827 int64_t sum_error = 0; 828 for (int i = 0; i < p_height; ++i) { 829 for (int j = 0; j < p_width; ++j) { 830 sum_error += 831 (int64_t)error_measure(dst[j + i * p_stride] - ref[j + i * stride]); 832 } 833 } 834 return sum_error; 835 } 836 837 static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref, 838 int width, int height, int stride, 839 const uint8_t *const dst, int p_col, int p_row, 840 int p_width, int p_height, int p_stride, 841 int subsampling_x, int subsampling_y, 842 int64_t best_error) { 843 int64_t gm_sumerr = 0; 844 int warp_w, warp_h; 845 int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK); 846 int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK); 847 uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK]; 848 ConvolveParams conv_params = get_conv_params(0, 0, 8); 849 conv_params.use_dist_wtd_comp_avg = 0; 850 851 for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) { 852 for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) { 853 // avoid warping extra 8x8 blocks in the padded region of the frame 854 // when p_width and p_height are not multiples of WARP_ERROR_BLOCK 855 warp_w = AOMMIN(error_bsize_w, p_col + p_width - j); 856 warp_h = AOMMIN(error_bsize_h, p_row + p_height - i); 857 warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w, warp_h, 858 WARP_ERROR_BLOCK, subsampling_x, subsampling_y, &conv_params); 859 860 gm_sumerr += frame_error(tmp, WARP_ERROR_BLOCK, dst + j + i * p_stride, 861 warp_w, warp_h, p_stride); 862 if (gm_sumerr > best_error) return gm_sumerr; 863 } 864 } 865 return gm_sumerr; 866 } 867 868 int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride, 869 uint8_t *dst, int p_width, int p_height, int p_stride) { 870 if (use_hbd) { 871 return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride, 872 CONVERT_TO_SHORTPTR(dst), p_width, p_height, 873 p_stride, bd); 874 } 875 return frame_error(ref, stride, dst, p_width, p_height, p_stride); 876 } 877 878 int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd, 879 const uint8_t *ref, int width, int height, int stride, 880 uint8_t *dst, int p_col, int p_row, int p_width, 881 int p_height, int p_stride, int subsampling_x, 882 int subsampling_y, int64_t best_error) { 883 if (wm->wmtype <= AFFINE) 884 if (!get_shear_params(wm)) return 1; 885 if (use_hbd) 886 return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row, 887 p_width, p_height, p_stride, subsampling_x, 888 subsampling_y, bd, best_error); 889 return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width, 890 p_height, p_stride, subsampling_x, subsampling_y, 891 best_error); 892 } 893 894 void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd, 895 const uint8_t *ref, int width, int height, int stride, 896 uint8_t *pred, int p_col, int p_row, int p_width, 897 int p_height, int p_stride, int subsampling_x, 898 int subsampling_y, ConvolveParams *conv_params) { 899 if (use_hbd) 900 highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, 901 p_width, p_height, p_stride, subsampling_x, subsampling_y, 902 bd, conv_params); 903 else 904 warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width, 905 p_height, p_stride, subsampling_x, subsampling_y, conv_params); 906 } 907 908 #define LS_MV_MAX 256 // max mv in 1/8-pel 909 // Use LS_STEP = 8 so that 2 less bits needed for A, Bx, By. 910 #define LS_STEP 8 911 912 // Assuming LS_MV_MAX is < MAX_SB_SIZE * 8, 913 // the precision needed is: 914 // (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] + 915 // (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] + 916 // 1 [for sign] + 917 // LEAST_SQUARES_SAMPLES_MAX_BITS 918 // [for adding up to LEAST_SQUARES_SAMPLES_MAX samples] 919 // The value is 23 920 #define LS_MAT_RANGE_BITS \ 921 ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS) 922 923 // Bit-depth reduction from the full-range 924 #define LS_MAT_DOWN_BITS 2 925 926 // bits range of A, Bx and By after downshifting 927 #define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS) 928 #define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1))) 929 #define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1) 930 931 // By setting LS_STEP = 8, the least 2 bits of every elements in A, Bx, By are 932 // 0. So, we can reduce LS_MAT_RANGE_BITS(2) bits here. 933 #define LS_SQUARE(a) \ 934 (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> \ 935 (2 + LS_MAT_DOWN_BITS)) 936 #define LS_PRODUCT1(a, b) \ 937 (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> \ 938 (2 + LS_MAT_DOWN_BITS)) 939 #define LS_PRODUCT2(a, b) \ 940 (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> \ 941 (2 + LS_MAT_DOWN_BITS)) 942 943 #define USE_LIMITED_PREC_MULT 0 944 945 #if USE_LIMITED_PREC_MULT 946 947 #define MUL_PREC_BITS 16 948 static uint16_t resolve_multiplier_64(uint64_t D, int16_t *shift) { 949 int msb = 0; 950 uint16_t mult = 0; 951 *shift = 0; 952 if (D != 0) { 953 msb = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32 954 : get_msb((unsigned int)D)); 955 if (msb >= MUL_PREC_BITS) { 956 mult = (uint16_t)ROUND_POWER_OF_TWO_64(D, msb + 1 - MUL_PREC_BITS); 957 *shift = msb + 1 - MUL_PREC_BITS; 958 } else { 959 mult = (uint16_t)D; 960 *shift = 0; 961 } 962 } 963 return mult; 964 } 965 966 static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) { 967 int32_t ret; 968 int16_t mshift; 969 uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift); 970 int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1); 971 shift -= mshift; 972 if (shift > 0) { 973 return (int32_t)clamp(ROUND_POWER_OF_TWO_SIGNED(v, shift), 974 -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 975 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 976 } else { 977 return (int32_t)clamp(v * (1 << (-shift)), 978 -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 979 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 980 } 981 return ret; 982 } 983 984 static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) { 985 int16_t mshift; 986 uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift); 987 int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1); 988 shift -= mshift; 989 if (shift > 0) { 990 return (int32_t)clamp( 991 ROUND_POWER_OF_TWO_SIGNED(v, shift), 992 (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 993 (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 994 } else { 995 return (int32_t)clamp( 996 v * (1 << (-shift)), 997 (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 998 (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 999 } 1000 } 1001 1002 #else 1003 1004 static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) { 1005 int64_t v = Px * (int64_t)iDet; 1006 return (int32_t)clamp64(ROUND_POWER_OF_TWO_SIGNED_64(v, shift), 1007 -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 1008 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 1009 } 1010 1011 static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) { 1012 int64_t v = Px * (int64_t)iDet; 1013 return (int32_t)clamp64( 1014 ROUND_POWER_OF_TWO_SIGNED_64(v, shift), 1015 (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, 1016 (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); 1017 } 1018 #endif // USE_LIMITED_PREC_MULT 1019 1020 static int find_affine_int(int np, const int *pts1, const int *pts2, 1021 BLOCK_SIZE bsize, int mvy, int mvx, 1022 WarpedMotionParams *wm, int mi_row, int mi_col) { 1023 int32_t A[2][2] = { { 0, 0 }, { 0, 0 } }; 1024 int32_t Bx[2] = { 0, 0 }; 1025 int32_t By[2] = { 0, 0 }; 1026 int i; 1027 1028 const int bw = block_size_wide[bsize]; 1029 const int bh = block_size_high[bsize]; 1030 const int rsuy = (AOMMAX(bh, MI_SIZE) / 2 - 1); 1031 const int rsux = (AOMMAX(bw, MI_SIZE) / 2 - 1); 1032 const int suy = rsuy * 8; 1033 const int sux = rsux * 8; 1034 const int duy = suy + mvy; 1035 const int dux = sux + mvx; 1036 const int isuy = (mi_row * MI_SIZE + rsuy); 1037 const int isux = (mi_col * MI_SIZE + rsux); 1038 1039 // Assume the center pixel of the block has exactly the same motion vector 1040 // as transmitted for the block. First shift the origin of the source 1041 // points to the block center, and the origin of the destination points to 1042 // the block center added to the motion vector transmitted. 1043 // Let (xi, yi) denote the source points and (xi', yi') denote destination 1044 // points after origin shfifting, for i = 0, 1, 2, .... n-1. 1045 // Then if P = [x0, y0, 1046 // x1, y1 1047 // x2, y1, 1048 // .... 1049 // ] 1050 // q = [x0', x1', x2', ... ]' 1051 // r = [y0', y1', y2', ... ]' 1052 // the least squares problems that need to be solved are: 1053 // [h1, h2]' = inv(P'P)P'q and 1054 // [h3, h4]' = inv(P'P)P'r 1055 // where the affine transformation is given by: 1056 // x' = h1.x + h2.y 1057 // y' = h3.x + h4.y 1058 // 1059 // The loop below computes: A = P'P, Bx = P'q, By = P'r 1060 // We need to just compute inv(A).Bx and inv(A).By for the solutions. 1061 // Contribution from neighbor block 1062 for (i = 0; i < np; i++) { 1063 const int dx = pts2[i * 2] - dux; 1064 const int dy = pts2[i * 2 + 1] - duy; 1065 const int sx = pts1[i * 2] - sux; 1066 const int sy = pts1[i * 2 + 1] - suy; 1067 // (TODO)yunqing: This comparison wouldn't be necessary if the sample 1068 // selection is done in find_samples(). Also, global offset can be removed 1069 // while collecting samples. 1070 if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) { 1071 A[0][0] += LS_SQUARE(sx); 1072 A[0][1] += LS_PRODUCT1(sx, sy); 1073 A[1][1] += LS_SQUARE(sy); 1074 Bx[0] += LS_PRODUCT2(sx, dx); 1075 Bx[1] += LS_PRODUCT1(sy, dx); 1076 By[0] += LS_PRODUCT1(sx, dy); 1077 By[1] += LS_PRODUCT2(sy, dy); 1078 } 1079 } 1080 1081 // Just for debugging, and can be removed later. 1082 assert(A[0][0] >= LS_MAT_MIN && A[0][0] <= LS_MAT_MAX); 1083 assert(A[0][1] >= LS_MAT_MIN && A[0][1] <= LS_MAT_MAX); 1084 assert(A[1][1] >= LS_MAT_MIN && A[1][1] <= LS_MAT_MAX); 1085 assert(Bx[0] >= LS_MAT_MIN && Bx[0] <= LS_MAT_MAX); 1086 assert(Bx[1] >= LS_MAT_MIN && Bx[1] <= LS_MAT_MAX); 1087 assert(By[0] >= LS_MAT_MIN && By[0] <= LS_MAT_MAX); 1088 assert(By[1] >= LS_MAT_MIN && By[1] <= LS_MAT_MAX); 1089 1090 int64_t Det; 1091 int16_t iDet, shift; 1092 1093 // Compute Determinant of A 1094 Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1]; 1095 if (Det == 0) return 1; 1096 iDet = resolve_divisor_64(llabs(Det), &shift) * (Det < 0 ? -1 : 1); 1097 shift -= WARPEDMODEL_PREC_BITS; 1098 if (shift < 0) { 1099 iDet <<= (-shift); 1100 shift = 0; 1101 } 1102 1103 int64_t Px[2], Py[2]; 1104 1105 // These divided by the Det, are the least squares solutions 1106 Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1]; 1107 Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1]; 1108 Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1]; 1109 Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1]; 1110 1111 wm->wmmat[2] = get_mult_shift_diag(Px[0], iDet, shift); 1112 wm->wmmat[3] = get_mult_shift_ndiag(Px[1], iDet, shift); 1113 wm->wmmat[4] = get_mult_shift_ndiag(Py[0], iDet, shift); 1114 wm->wmmat[5] = get_mult_shift_diag(Py[1], iDet, shift); 1115 1116 // Note: In the vx, vy expressions below, the max value of each of the 1117 // 2nd and 3rd terms are (2^16 - 1) * (2^13 - 1). That leaves enough room 1118 // for the first term so that the overall sum in the worst case fits 1119 // within 32 bits overall. 1120 int32_t vx = mvx * (1 << (WARPEDMODEL_PREC_BITS - 3)) - 1121 (isux * (wm->wmmat[2] - (1 << WARPEDMODEL_PREC_BITS)) + 1122 isuy * wm->wmmat[3]); 1123 int32_t vy = mvy * (1 << (WARPEDMODEL_PREC_BITS - 3)) - 1124 (isux * wm->wmmat[4] + 1125 isuy * (wm->wmmat[5] - (1 << WARPEDMODEL_PREC_BITS))); 1126 wm->wmmat[0] = 1127 clamp(vx, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1); 1128 wm->wmmat[1] = 1129 clamp(vy, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1); 1130 1131 wm->wmmat[6] = wm->wmmat[7] = 0; 1132 return 0; 1133 } 1134 1135 int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy, 1136 int mvx, WarpedMotionParams *wm_params, int mi_row, 1137 int mi_col) { 1138 assert(wm_params->wmtype == AFFINE); 1139 1140 if (find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params, mi_row, 1141 mi_col)) 1142 return 1; 1143 1144 // check compatibility with the fast warp filter 1145 if (!get_shear_params(wm_params)) return 1; 1146 1147 return 0; 1148 } 1149