Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /**
     18  * A wrapper around ICU's line break iterator, that gives customized line
     19  * break opportunities, as well as identifying words for the purpose of
     20  * hyphenation.
     21  */
     22 
     23 #ifndef MINIKIN_WORD_BREAKER_H
     24 #define MINIKIN_WORD_BREAKER_H
     25 
     26 #include "unicode/brkiter.h"
     27 #include <memory>
     28 
     29 namespace android {
     30 
     31 class WordBreaker {
     32 public:
     33     ~WordBreaker() {
     34         finish();
     35     }
     36 
     37     void setLocale(const icu::Locale& locale);
     38 
     39     void setText(const uint16_t* data, size_t size);
     40 
     41     // Advance iterator to next word break. Return offset, or -1 if EOT
     42     ssize_t next();
     43 
     44     // Current offset of iterator, equal to 0 at BOT or last return from next()
     45     ssize_t current() const;
     46 
     47     // After calling next(), wordStart() and wordEnd() are offsets defining the previous
     48     // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
     49     ssize_t wordStart() const;
     50 
     51     ssize_t wordEnd() const;
     52 
     53     int breakBadness() const;
     54 
     55     void finish();
     56 
     57 private:
     58     std::unique_ptr<icu::BreakIterator> mBreakIterator;
     59     UText mUText = UTEXT_INITIALIZER;
     60     const uint16_t* mText = nullptr;
     61     size_t mTextSize;
     62     ssize_t mLast;
     63     ssize_t mCurrent;
     64     bool mIteratorWasReset;
     65 
     66     // state for the email address / url detector
     67     ssize_t mScanOffset;
     68     bool mInEmailOrUrl;
     69 };
     70 
     71 }  // namespace
     72 
     73 #endif  // MINIKIN_WORD_BREAKER_H
     74