Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2013 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #ifndef StringUTF8Adaptor_h
     32 #define StringUTF8Adaptor_h
     33 
     34 #include "wtf/text/CString.h"
     35 #include "wtf/text/TextEncoding.h"
     36 #include "wtf/text/WTFString.h"
     37 
     38 namespace WTF {
     39 
     40 // This class lets you get UTF-8 data out of a String without mallocing a
     41 // separate buffer to hold the data if the String happens to be 8 bit and
     42 // contain only ASCII characters.
     43 class StringUTF8Adaptor {
     44 public:
     45     enum ShouldNormalize {
     46         DoNotNormalize,
     47         Normalize
     48     };
     49 
     50     explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize = DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables)
     51         : m_data(0)
     52         , m_length(0)
     53     {
     54         if (string.isEmpty())
     55             return;
     56         // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses UTF-8
     57         // when processing 8 bit strings. If |relative| is entirely ASCII, we luck out
     58         // and can avoid mallocing a new buffer to hold the UTF-8 data because UTF-8
     59         // and Latin-1 use the same code units for ASCII code points.
     60         if (string.is8Bit() && string.containsOnlyASCII()) {
     61             m_data = reinterpret_cast<const char*>(string.characters8());
     62             m_length = string.length();
     63         } else {
     64             if (normalize == Normalize)
     65                 m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handling);
     66             else
     67                 m_utf8Buffer = string.utf8();
     68             m_data = m_utf8Buffer.data();
     69             m_length = m_utf8Buffer.length();
     70         }
     71     }
     72 
     73     const char* data() const { return m_data; }
     74     size_t length() const { return m_length; }
     75 
     76 private:
     77     CString m_utf8Buffer;
     78     const char* m_data;
     79     size_t m_length;
     80 };
     81 
     82 } // namespace WTF
     83 
     84 using WTF::StringUTF8Adaptor;
     85 
     86 #endif // StringUTF8Adaptor_h
     87