1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "SkConvolver.h" 6 #include "SkOpts.h" 7 #include "SkTArray.h" 8 9 namespace { 10 // Stores a list of rows in a circular buffer. The usage is you write into it 11 // by calling AdvanceRow. It will keep track of which row in the buffer it 12 // should use next, and the total number of rows added. 13 class CircularRowBuffer { 14 public: 15 // The number of pixels in each row is given in |sourceRowPixelWidth|. 16 // The maximum number of rows needed in the buffer is |maxYFilterSize| 17 // (we only need to store enough rows for the biggest filter). 18 // 19 // We use the |firstInputRow| to compute the coordinates of all of the 20 // following rows returned by Advance(). 21 CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, 22 int firstInputRow) 23 : fRowByteWidth(destRowPixelWidth * 4), 24 fNumRows(maxYFilterSize), 25 fNextRow(0), 26 fNextRowCoordinate(firstInputRow) { 27 fBuffer.reset(fRowByteWidth * maxYFilterSize); 28 fRowAddresses.reset(fNumRows); 29 } 30 31 // Moves to the next row in the buffer, returning a pointer to the beginning 32 // of it. 33 unsigned char* advanceRow() { 34 unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; 35 fNextRowCoordinate++; 36 37 // Set the pointer to the next row to use, wrapping around if necessary. 38 fNextRow++; 39 if (fNextRow == fNumRows) { 40 fNextRow = 0; 41 } 42 return row; 43 } 44 45 // Returns a pointer to an "unrolled" array of rows. These rows will start 46 // at the y coordinate placed into |*firstRowIndex| and will continue in 47 // order for the maximum number of rows in this circular buffer. 48 // 49 // The |firstRowIndex_| may be negative. This means the circular buffer 50 // starts before the top of the image (it hasn't been filled yet). 51 unsigned char* const* GetRowAddresses(int* firstRowIndex) { 52 // Example for a 4-element circular buffer holding coords 6-9. 53 // Row 0 Coord 8 54 // Row 1 Coord 9 55 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. 56 // Row 3 Coord 7 57 // 58 // The "next" row is also the first (lowest) coordinate. This computation 59 // may yield a negative value, but that's OK, the math will work out 60 // since the user of this buffer will compute the offset relative 61 // to the firstRowIndex and the negative rows will never be used. 62 *firstRowIndex = fNextRowCoordinate - fNumRows; 63 64 int curRow = fNextRow; 65 for (int i = 0; i < fNumRows; i++) { 66 fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; 67 68 // Advance to the next row, wrapping if necessary. 69 curRow++; 70 if (curRow == fNumRows) { 71 curRow = 0; 72 } 73 } 74 return &fRowAddresses[0]; 75 } 76 77 private: 78 // The buffer storing the rows. They are packed, each one fRowByteWidth. 79 SkTArray<unsigned char> fBuffer; 80 81 // Number of bytes per row in the |buffer|. 82 int fRowByteWidth; 83 84 // The number of rows available in the buffer. 85 int fNumRows; 86 87 // The next row index we should write into. This wraps around as the 88 // circular buffer is used. 89 int fNextRow; 90 91 // The y coordinate of the |fNextRow|. This is incremented each time a 92 // new row is appended and does not wrap. 93 int fNextRowCoordinate; 94 95 // Buffer used by GetRowAddresses(). 96 SkTArray<unsigned char*> fRowAddresses; 97 }; 98 99 } // namespace 100 101 // SkConvolutionFilter1D --------------------------------------------------------- 102 103 SkConvolutionFilter1D::SkConvolutionFilter1D() 104 : fMaxFilter(0) { 105 } 106 107 SkConvolutionFilter1D::~SkConvolutionFilter1D() { 108 } 109 110 void SkConvolutionFilter1D::AddFilter(int filterOffset, 111 const ConvolutionFixed* filterValues, 112 int filterLength) { 113 // It is common for leading/trailing filter values to be zeros. In such 114 // cases it is beneficial to only store the central factors. 115 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on 116 // a 1080p image this optimization gives a ~10% speed improvement. 117 int filterSize = filterLength; 118 int firstNonZero = 0; 119 while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { 120 firstNonZero++; 121 } 122 123 if (firstNonZero < filterLength) { 124 // Here we have at least one non-zero factor. 125 int lastNonZero = filterLength - 1; 126 while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { 127 lastNonZero--; 128 } 129 130 filterOffset += firstNonZero; 131 filterLength = lastNonZero + 1 - firstNonZero; 132 SkASSERT(filterLength > 0); 133 134 fFilterValues.append(filterLength, &filterValues[firstNonZero]); 135 } else { 136 // Here all the factors were zeroes. 137 filterLength = 0; 138 } 139 140 FilterInstance instance; 141 142 // We pushed filterLength elements onto fFilterValues 143 instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - 144 filterLength); 145 instance.fOffset = filterOffset; 146 instance.fTrimmedLength = filterLength; 147 instance.fLength = filterSize; 148 fFilters.push(instance); 149 150 fMaxFilter = SkTMax(fMaxFilter, filterLength); 151 } 152 153 const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( 154 int* specifiedFilterlength, 155 int* filterOffset, 156 int* filterLength) const { 157 const FilterInstance& filter = fFilters[0]; 158 *filterOffset = filter.fOffset; 159 *filterLength = filter.fTrimmedLength; 160 *specifiedFilterlength = filter.fLength; 161 if (filter.fTrimmedLength == 0) { 162 return nullptr; 163 } 164 165 return &fFilterValues[filter.fDataLocation]; 166 } 167 168 bool BGRAConvolve2D(const unsigned char* sourceData, 169 int sourceByteRowStride, 170 bool sourceHasAlpha, 171 const SkConvolutionFilter1D& filterX, 172 const SkConvolutionFilter1D& filterY, 173 int outputByteRowStride, 174 unsigned char* output) { 175 176 int maxYFilterSize = filterY.maxFilter(); 177 178 // The next row in the input that we will generate a horizontally 179 // convolved row for. If the filter doesn't start at the beginning of the 180 // image (this is the case when we are only resizing a subset), then we 181 // don't want to generate any output rows before that. Compute the starting 182 // row for convolution as the first pixel for the first vertical filter. 183 int filterOffset, filterLength; 184 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 185 filterY.FilterForValue(0, &filterOffset, &filterLength); 186 int nextXRow = filterOffset; 187 188 // We loop over each row in the input doing a horizontal convolution. This 189 // will result in a horizontally convolved image. We write the results into 190 // a circular buffer of convolved rows and do vertical convolution as rows 191 // are available. This prevents us from having to store the entire 192 // intermediate image and helps cache coherency. 193 // We will need four extra rows to allow horizontal convolution could be done 194 // simultaneously. We also pad each row in row buffer to be aligned-up to 195 // 32 bytes. 196 // TODO(jiesun): We do not use aligned load from row buffer in vertical 197 // convolution pass yet. Somehow Windows does not like it. 198 int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F; 199 int rowBufferHeight = maxYFilterSize + 200 (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0); 201 202 // check for too-big allocation requests : crbug.com/528628 203 { 204 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); 205 // need some limit, to avoid over-committing success from malloc, but then 206 // crashing when we try to actually use the memory. 207 // 100meg seems big enough to allow "normal" zoom factors and image sizes through 208 // while avoiding the crash seen by the bug (crbug.com/528628) 209 if (size > 100 * 1024 * 1024) { 210 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); 211 return false; 212 } 213 } 214 215 CircularRowBuffer rowBuffer(rowBufferWidth, 216 rowBufferHeight, 217 filterOffset); 218 219 // Loop over every possible output row, processing just enough horizontal 220 // convolutions to run each subsequent vertical convolution. 221 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); 222 int numOutputRows = filterY.numValues(); 223 224 // We need to check which is the last line to convolve before we advance 4 225 // lines in one iteration. 226 int lastFilterOffset, lastFilterLength; 227 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 228 &lastFilterLength); 229 230 for (int outY = 0; outY < numOutputRows; outY++) { 231 filterValues = filterY.FilterForValue(outY, 232 &filterOffset, &filterLength); 233 234 // Generate output rows until we have enough to run the current filter. 235 while (nextXRow < filterOffset + filterLength) { 236 if (SkOpts::convolve_4_rows_horizontally != nullptr && 237 nextXRow + 3 < lastFilterOffset + lastFilterLength) { 238 const unsigned char* src[4]; 239 unsigned char* outRow[4]; 240 for (int i = 0; i < 4; ++i) { 241 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride]; 242 outRow[i] = rowBuffer.advanceRow(); 243 } 244 SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth); 245 nextXRow += 4; 246 } else { 247 SkOpts::convolve_horizontally( 248 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], 249 filterX, rowBuffer.advanceRow(), sourceHasAlpha); 250 nextXRow++; 251 } 252 } 253 254 // Compute where in the output image this row of final data will go. 255 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; 256 257 // Get the list of rows that the circular buffer has, in order. 258 int firstRowInCircularBuffer; 259 unsigned char* const* rowsToConvolve = 260 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 261 262 // Now compute the start of the subset of those rows that the filter needs. 263 unsigned char* const* firstRowForFilter = 264 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 265 266 SkOpts::convolve_vertically(filterValues, filterLength, 267 firstRowForFilter, 268 filterX.numValues(), curOutputRow, 269 sourceHasAlpha); 270 } 271 return true; 272 } 273