Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "OptimalLineBreaker.h"
     18 
     19 #include <algorithm>
     20 #include <limits>
     21 
     22 #include "minikin/Characters.h"
     23 #include "minikin/Layout.h"
     24 #include "minikin/Range.h"
     25 #include "minikin/U16StringPiece.h"
     26 
     27 #include "HyphenatorMap.h"
     28 #include "LayoutUtils.h"
     29 #include "LineBreakerUtil.h"
     30 #include "Locale.h"
     31 #include "LocaleListCache.h"
     32 #include "MinikinInternal.h"
     33 #include "WordBreaker.h"
     34 
     35 namespace minikin {
     36 
     37 namespace {
     38 
     39 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
     40 // constants are larger than any reasonable actual width score.
     41 constexpr float SCORE_INFTY = std::numeric_limits<float>::max();
     42 constexpr float SCORE_OVERFULL = 1e12f;
     43 constexpr float SCORE_DESPERATE = 1e10f;
     44 
     45 // Multiplier for hyphen penalty on last line.
     46 constexpr float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
     47 // Penalty assigned to each line break (to try to minimize number of lines)
     48 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
     49 // probably not the most appropriate method.
     50 constexpr float LINE_PENALTY_MULTIPLIER = 2.0f;
     51 
     52 // Penalty assigned to shrinking the whitepsace.
     53 constexpr float SHRINK_PENALTY_MULTIPLIER = 4.0f;
     54 
     55 // Maximum amount that spaces can shrink, in justified text.
     56 constexpr float SHRINKABILITY = 1.0 / 3.0;
     57 
     58 // A single candidate break
     59 struct Candidate {
     60     uint32_t offset;  // offset to text buffer, in code units
     61 
     62     ParaWidth preBreak;       // width of text until this point, if we decide to not break here:
     63                               // preBreak is used as an optimized way to calculate the width
     64                               // between two candidates. The line width between two line break
     65                               // candidates i and j is calculated as postBreak(j) - preBreak(i).
     66     ParaWidth postBreak;      // width of text until this point, if we decide to break here
     67     float penalty;            // penalty of this break (for example, hyphen penalty)
     68     uint32_t preSpaceCount;   // preceding space count before breaking
     69     uint32_t postSpaceCount;  // preceding space count after breaking
     70     HyphenationType hyphenType;
     71     bool isRtl;  // The direction of the bidi run containing or ending in this candidate
     72 
     73     Candidate(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
     74               uint32_t preSpaceCount, uint32_t postSpaceCount, HyphenationType hyphenType,
     75               bool isRtl)
     76             : offset(offset),
     77               preBreak(preBreak),
     78               postBreak(postBreak),
     79               penalty(penalty),
     80               preSpaceCount(preSpaceCount),
     81               postSpaceCount(postSpaceCount),
     82               hyphenType(hyphenType),
     83               isRtl(isRtl) {}
     84 };
     85 
     86 // A context of line break optimization.
     87 struct OptimizeContext {
     88     // The break candidates.
     89     std::vector<Candidate> candidates;
     90 
     91     // The penalty for the number of lines.
     92     float linePenalty = 0.0f;
     93 
     94     // The width of a space. May be 0 if there are no spaces.
     95     // Note: if there are multiple different widths for spaces (for example, because of mixing of
     96     // fonts), it's only guaranteed to pick one.
     97     float spaceWidth = 0.0f;
     98 
     99     // Append desperate break point to the candidates.
    100     inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, uint32_t spaceCount,
    101                               bool isRtl) {
    102         candidates.emplace_back(offset, sumOfCharWidths, sumOfCharWidths, SCORE_DESPERATE,
    103                                 spaceCount, spaceCount,
    104                                 HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, isRtl);
    105     }
    106 
    107     // Append hyphenation break point to the candidates.
    108     inline void pushHyphenation(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
    109                                 float penalty, uint32_t spaceCount, HyphenationType type,
    110                                 bool isRtl) {
    111         candidates.emplace_back(offset, preBreak, postBreak, penalty, spaceCount, spaceCount, type,
    112                                 isRtl);
    113     }
    114 
    115     // Append word break point to the candidates.
    116     inline void pushWordBreak(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
    117                               float penalty, uint32_t preSpaceCount, uint32_t postSpaceCount,
    118                               bool isRtl) {
    119         candidates.emplace_back(offset, preBreak, postBreak, penalty, preSpaceCount, postSpaceCount,
    120                                 HyphenationType::DONT_BREAK, isRtl);
    121     }
    122 
    123     OptimizeContext() {
    124         candidates.emplace_back(0, 0.0f, 0.0f, 0.0f, 0, 0, HyphenationType::DONT_BREAK, false);
    125     }
    126 };
    127 
    128 // Compute the penalty for the run and returns penalty for hyphenation and number of lines.
    129 std::pair<float, float> computePenalties(const Run& run, const LineWidth& lineWidth,
    130                                          HyphenationFrequency frequency, bool justified) {
    131     float linePenalty = 0.0;
    132     const MinikinPaint* paint = run.getPaint();
    133     // a heuristic that seems to perform well
    134     float hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
    135     if (frequency == HyphenationFrequency::Normal) {
    136         hyphenPenalty *= 4.0;  // TODO: Replace with a better value after some testing
    137     }
    138 
    139     if (justified) {
    140         // Make hyphenation more aggressive for fully justified text (so that "normal" in
    141         // justified mode is the same as "full" in ragged-right).
    142         hyphenPenalty *= 0.25;
    143     } else {
    144         // Line penalty is zero for justified text.
    145         linePenalty = hyphenPenalty * LINE_PENALTY_MULTIPLIER;
    146     }
    147 
    148     return std::make_pair(hyphenPenalty, linePenalty);
    149 }
    150 
    151 // Represents a desperate break point.
    152 struct DesperateBreak {
    153     // The break offset.
    154     uint32_t offset;
    155 
    156     // The sum of the character width from the beginning of the word.
    157     ParaWidth sumOfChars;
    158 
    159     DesperateBreak(uint32_t offset, ParaWidth sumOfChars)
    160             : offset(offset), sumOfChars(sumOfChars){};
    161 };
    162 
    163 // Retrieves desperate break points from a word.
    164 std::vector<DesperateBreak> populateDesperatePoints(const MeasuredText& measured,
    165                                                     const Range& range) {
    166     std::vector<DesperateBreak> out;
    167     ParaWidth width = measured.widths[range.getStart()];
    168     for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
    169         const float w = measured.widths[i];
    170         if (w == 0) {
    171             continue;  // w == 0 means here is not a grapheme bounds. Don't break here.
    172         }
    173         out.emplace_back(i, width);
    174         width += w;
    175     }
    176     return out;
    177 }
    178 
    179 // Append hyphenation break points and desperate break points.
    180 // If an offset is a both candidate for hyphenation and desperate break points, place desperate
    181 // break candidate first and hyphenation break points second since the result width of the desperate
    182 // break is shorter than hyphenation break.
    183 // This is important since DP in computeBreaksOptimal assumes that the result line width is
    184 // increased by break offset.
    185 void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,
    186                        std::vector<HyphenBreak>::const_iterator endHyIter,
    187                        const std::vector<DesperateBreak>& desperates, const CharProcessor& proc,
    188                        float hyphenPenalty, bool isRtl, OptimizeContext* out) {
    189     auto d = desperates.begin();
    190     while (hyIter != endHyIter || d != desperates.end()) {
    191         // If both hyphen breaks and desperate breaks point to the same offset, push desperate
    192         // breaks first.
    193         if (d != desperates.end() && (hyIter == endHyIter || d->offset <= hyIter->offset)) {
    194             out->pushDesperate(d->offset, proc.sumOfCharWidthsAtPrevWordBreak + d->sumOfChars,
    195                                proc.effectiveSpaceCount, isRtl);
    196             d++;
    197         } else {
    198             out->pushHyphenation(hyIter->offset, proc.sumOfCharWidths - hyIter->second,
    199                                  proc.sumOfCharWidthsAtPrevWordBreak + hyIter->first, hyphenPenalty,
    200                                  proc.effectiveSpaceCount, hyIter->type, isRtl);
    201             hyIter++;
    202         }
    203     }
    204 }
    205 
    206 // Enumerate all line break candidates.
    207 OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured,
    208                                    const LineWidth& lineWidth, HyphenationFrequency frequency,
    209                                    bool isJustified) {
    210     const ParaWidth minLineWidth = lineWidth.getMin();
    211     CharProcessor proc(textBuf);
    212 
    213     OptimizeContext result;
    214 
    215     const bool doHyphenation = frequency != HyphenationFrequency::None;
    216     auto hyIter = std::begin(measured.hyphenBreaks);
    217 
    218     for (const auto& run : measured.runs) {
    219         const bool isRtl = run->isRtl();
    220         const Range& range = run->getRange();
    221 
    222         // Compute penalty parameters.
    223         float hyphenPenalty = 0.0f;
    224         if (run->canHyphenate()) {
    225             auto penalties = computePenalties(*run, lineWidth, frequency, isJustified);
    226             hyphenPenalty = penalties.first;
    227             result.linePenalty = std::max(penalties.second, result.linePenalty);
    228         }
    229 
    230         proc.updateLocaleIfNecessary(*run);
    231 
    232         for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
    233             MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker");
    234             proc.feedChar(i, textBuf[i], measured.widths[i]);
    235 
    236             const uint32_t nextCharOffset = i + 1;
    237             if (nextCharOffset != proc.nextWordBreak) {
    238                 continue;  // Wait until word break point.
    239             }
    240 
    241             // Add hyphenation and desperate break points.
    242             std::vector<HyphenBreak> hyphenedBreaks;
    243             std::vector<DesperateBreak> desperateBreaks;
    244             const Range contextRange = proc.contextRange();
    245 
    246             auto beginHyIter = hyIter;
    247             while (hyIter != std::end(measured.hyphenBreaks) &&
    248                    hyIter->offset < contextRange.getEnd()) {
    249                 hyIter++;
    250             }
    251             if (proc.widthFromLastWordBreak() > minLineWidth) {
    252                 desperateBreaks = populateDesperatePoints(measured, contextRange);
    253             }
    254             appendWithMerging(beginHyIter, doHyphenation ? hyIter : beginHyIter, desperateBreaks,
    255                               proc, hyphenPenalty, isRtl, &result);
    256 
    257             // We skip breaks for zero-width characters inside replacement spans.
    258             if (nextCharOffset == range.getEnd() || measured.widths[nextCharOffset] > 0) {
    259                 const float penalty = hyphenPenalty * proc.wordBreakPenalty();
    260                 result.pushWordBreak(nextCharOffset, proc.sumOfCharWidths, proc.effectiveWidth,
    261                                      penalty, proc.rawSpaceCount, proc.effectiveSpaceCount, isRtl);
    262             }
    263         }
    264     }
    265     result.spaceWidth = proc.spaceWidth;
    266     return result;
    267 }
    268 
    269 class LineBreakOptimizer {
    270 public:
    271     LineBreakOptimizer() {}
    272 
    273     LineBreakResult computeBreaks(const OptimizeContext& context, const U16StringPiece& textBuf,
    274                                   const MeasuredText& measuredText, const LineWidth& lineWidth,
    275                                   BreakStrategy strategy, bool justified);
    276 
    277 private:
    278     // Data used to compute optimal line breaks
    279     struct OptimalBreaksData {
    280         float score;          // best score found for this break
    281         uint32_t prev;        // index to previous break
    282         uint32_t lineNumber;  // the computed line number of the candidate
    283     };
    284     LineBreakResult finishBreaksOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
    285                                         const std::vector<OptimalBreaksData>& breaksData,
    286                                         const std::vector<Candidate>& candidates);
    287 
    288     MinikinExtent computeMaxExtent(const U16StringPiece& textBuf, const MeasuredText& measured,
    289                                    uint32_t start, uint32_t end) const;
    290 };
    291 
    292 // Find the needed extent between the start and end ranges. start is inclusive and end is exclusive.
    293 // Both are indices of the source string.
    294 MinikinExtent LineBreakOptimizer::computeMaxExtent(const U16StringPiece& textBuf,
    295                                                    const MeasuredText& measured, uint32_t start,
    296                                                    uint32_t end) const {
    297     MinikinExtent res = {0.0, 0.0, 0.0};
    298     for (uint32_t j = start; j < end; j++) {
    299         if (!isLineSpaceExcludeChar(textBuf[j])) {
    300             res.extendBy(measured.extents[j]);
    301         }
    302     }
    303     return res;
    304 }
    305 
    306 // Follow "prev" links in candidates array, and copy to result arrays.
    307 LineBreakResult LineBreakOptimizer::finishBreaksOptimal(
    308         const U16StringPiece& textBuf, const MeasuredText& measured,
    309         const std::vector<OptimalBreaksData>& breaksData,
    310         const std::vector<Candidate>& candidates) {
    311     LineBreakResult result;
    312     const uint32_t nCand = candidates.size();
    313     uint32_t prevIndex;
    314     for (uint32_t i = nCand - 1; i > 0; i = prevIndex) {
    315         prevIndex = breaksData[i].prev;
    316         const Candidate& cand = candidates[i];
    317         const Candidate& prev = candidates[prevIndex];
    318 
    319         result.breakPoints.push_back(cand.offset);
    320         result.widths.push_back(cand.postBreak - prev.preBreak);
    321         MinikinExtent extent = computeMaxExtent(textBuf, measured, prev.offset, cand.offset);
    322         result.ascents.push_back(extent.ascent);
    323         result.descents.push_back(extent.descent);
    324 
    325         const HyphenEdit edit =
    326                 packHyphenEdit(editForNextLine(prev.hyphenType), editForThisLine(cand.hyphenType));
    327         result.flags.push_back(static_cast<int>(edit));
    328     }
    329     result.reverse();
    330     return result;
    331 }
    332 
    333 LineBreakResult LineBreakOptimizer::computeBreaks(const OptimizeContext& context,
    334                                                   const U16StringPiece& textBuf,
    335                                                   const MeasuredText& measured,
    336                                                   const LineWidth& lineWidth,
    337                                                   BreakStrategy strategy, bool justified) {
    338     const std::vector<Candidate>& candidates = context.candidates;
    339     uint32_t active = 0;
    340     const uint32_t nCand = candidates.size();
    341     const float maxShrink = justified ? SHRINKABILITY * context.spaceWidth : 0.0f;
    342 
    343     std::vector<OptimalBreaksData> breaksData;
    344     breaksData.reserve(nCand);
    345     breaksData.push_back({0.0, 0, 0});  // The first candidate is always at the first line.
    346 
    347     // "i" iterates through candidates for the end of the line.
    348     for (uint32_t i = 1; i < nCand; i++) {
    349         const bool atEnd = i == nCand - 1;
    350         float best = SCORE_INFTY;
    351         uint32_t bestPrev = 0;
    352 
    353         uint32_t lineNumberLast = breaksData[active].lineNumber;
    354         float width = lineWidth.getAt(lineNumberLast);
    355 
    356         ParaWidth leftEdge = candidates[i].postBreak - width;
    357         float bestHope = 0;
    358 
    359         // "j" iterates through candidates for the beginning of the line.
    360         for (uint32_t j = active; j < i; j++) {
    361             const uint32_t lineNumber = breaksData[j].lineNumber;
    362             if (lineNumber != lineNumberLast) {
    363                 const float widthNew = lineWidth.getAt(lineNumber);
    364                 if (widthNew != width) {
    365                     leftEdge = candidates[i].postBreak - width;
    366                     bestHope = 0;
    367                     width = widthNew;
    368                 }
    369                 lineNumberLast = lineNumber;
    370             }
    371             const float jScore = breaksData[j].score;
    372             if (jScore + bestHope >= best) continue;
    373             const float delta = candidates[j].preBreak - leftEdge;
    374 
    375             // compute width score for line
    376 
    377             // Note: the "bestHope" optimization makes the assumption that, when delta is
    378             // non-negative, widthScore will increase monotonically as successive candidate
    379             // breaks are considered.
    380             float widthScore = 0.0f;
    381             float additionalPenalty = 0.0f;
    382             if ((atEnd || !justified) && delta < 0) {
    383                 widthScore = SCORE_OVERFULL;
    384             } else if (atEnd && strategy != BreakStrategy::Balanced) {
    385                 // increase penalty for hyphen on last line
    386                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * candidates[j].penalty;
    387             } else {
    388                 widthScore = delta * delta;
    389                 if (delta < 0) {
    390                     if (-delta <
    391                         maxShrink * (candidates[i].postSpaceCount - candidates[j].preSpaceCount)) {
    392                         widthScore *= SHRINK_PENALTY_MULTIPLIER;
    393                     } else {
    394                         widthScore = SCORE_OVERFULL;
    395                     }
    396                 }
    397             }
    398 
    399             if (delta < 0) {
    400                 active = j + 1;
    401             } else {
    402                 bestHope = widthScore;
    403             }
    404 
    405             const float score = jScore + widthScore + additionalPenalty;
    406             if (score <= best) {
    407                 best = score;
    408                 bestPrev = j;
    409             }
    410         }
    411         breaksData.push_back({best + candidates[i].penalty + context.linePenalty,  // score
    412                               bestPrev,                                            // prev
    413                               breaksData[bestPrev].lineNumber + 1});               // lineNumber
    414     }
    415     return finishBreaksOptimal(textBuf, measured, breaksData, candidates);
    416 }
    417 
    418 }  // namespace
    419 
    420 LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
    421                                  const LineWidth& lineWidth, BreakStrategy strategy,
    422                                  HyphenationFrequency frequency, bool justified) {
    423     if (textBuf.size() == 0) {
    424         return LineBreakResult();
    425     }
    426     const OptimizeContext context =
    427             populateCandidates(textBuf, measured, lineWidth, frequency, justified);
    428     LineBreakOptimizer optimizer;
    429     return optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, justified);
    430 }
    431 
    432 }  // namespace minikin
    433