Home | History | Annotate | Download | only in publicsuffix
      1 /*
      2  * Copyright (C) 2008 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.thirdparty.publicsuffix;
     18 
     19 import com.google.common.annotations.GwtCompatible;
     20 import com.google.common.base.Joiner;
     21 import com.google.common.collect.ImmutableMap;
     22 import com.google.common.collect.Lists;
     23 
     24 import java.util.List;
     25 
     26 /**
     27  * Parser for a map of reversed domain names stored as a serialized radix tree.
     28  */
     29 @GwtCompatible
     30 class TrieParser {
     31 
     32   private static final Joiner PREFIX_JOINER = Joiner.on("");
     33 
     34   /**
     35    * Parses a serialized trie representation of a map of reversed public
     36    * suffixes into an immutable map of public suffixes.
     37    */
     38   static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
     39     ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder();
     40     int encodedLen = encoded.length();
     41     int idx = 0;
     42     while (idx < encodedLen) {
     43       idx += doParseTrieToBuilder(
     44           Lists.<CharSequence>newLinkedList(),
     45           encoded.subSequence(idx, encodedLen),
     46           builder);
     47     }
     48     return builder.build();
     49   }
     50 
     51   /**
     52    * Parses a trie node and returns the number of characters consumed.
     53    *
     54    * @param stack The prefixes that preceed the characters represented by this
     55    *     node. Each entry of the stack is in reverse order.
     56    * @param encoded The serialized trie.
     57    * @param builder A map builder to which all entries will be added.
     58    * @return The number of characters consumed from {@code encoded}.
     59    */
     60   private static int doParseTrieToBuilder(
     61       List<CharSequence> stack,
     62       CharSequence encoded,
     63       ImmutableMap.Builder<String, PublicSuffixType> builder) {
     64 
     65     int encodedLen = encoded.length();
     66     int idx = 0;
     67     char c = '\0';
     68 
     69     // Read all of the characters for this node.
     70     for ( ; idx < encodedLen; idx++) {
     71       c = encoded.charAt(idx);
     72       if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') {
     73         break;
     74       }
     75     }
     76 
     77     stack.add(0, reverse(encoded.subSequence(0, idx)));
     78 
     79     if (c == '!' || c == '?' || c == ':' || c == ',') {
     80       // '!' represents an interior node that represents an ICANN entry in the map.
     81       // '?' represents a leaf node, which represents an ICANN entry in map.
     82       // ':' represents an interior node that represents a private entry in the map
     83       // ',' represents a leaf node, which represents a private entry in the map.
     84       String domain = PREFIX_JOINER.join(stack);
     85       if (domain.length() > 0) {
     86         builder.put(domain, PublicSuffixType.fromCode(c));
     87       }
     88     }
     89     idx++;
     90 
     91     if (c != '?' && c != ',') {
     92       while (idx < encodedLen) {
     93         // Read all the children
     94         idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
     95         if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
     96           // An extra '?' or ',' after a child node indicates the end of all children of this node.
     97           idx++;
     98           break;
     99         }
    100       }
    101     }
    102     stack.remove(0);
    103     return idx;
    104   }
    105 
    106   /**
    107    * Reverses a character sequence. This is borrowed from
    108    * https://code.google.com/p/google-web-toolkit/source/detail?r=11591#
    109    * and can be replaced with a simple {@code StringBuffer#reverse} once GWT 2.6 is available.
    110    */
    111   private static CharSequence reverse(CharSequence s) {
    112     int length = s.length();
    113     if (length <= 1) {
    114       return s;
    115     }
    116 
    117     char[] buffer = new char[length];
    118     buffer[0] = s.charAt(length - 1);
    119 
    120     for (int i = 1; i < length; i++) {
    121       buffer[i] = s.charAt(length - 1 - i);
    122       if (Character.isSurrogatePair(buffer[i], buffer[i - 1])) {
    123         swap(buffer, i - 1, i);
    124       }
    125     }
    126 
    127     return new String(buffer);
    128   }
    129 
    130   private static void swap(char[] buffer, int f, int s) {
    131     char tmp = buffer[f];
    132     buffer[f] = buffer[s];
    133     buffer[s] = tmp;
    134   }
    135 }
    136