Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.quicksearchbox.util;
     18 
     19 /**
     20  * This class represents the matrix used in the Levenshtein distance algorithm, together
     21  * with the algorithm itself which operates on the matrix.
     22  *
     23  * We also track of the individual operations applied to transform the source string into the
     24  * target string so we can trace the path taken through the matrix afterwards, in order to
     25  * perform the formatting as required.
     26  */
     27 public class LevenshteinDistance {
     28     public static final int EDIT_DELETE = 0;
     29     public static final int EDIT_INSERT = 1;
     30     public static final int EDIT_REPLACE = 2;
     31     public static final int EDIT_UNCHANGED = 3;
     32 
     33     private final Token[] mSource;
     34     private final Token[] mTarget;
     35     private final int[][] mEditTypeTable;
     36     private final int[][] mDistanceTable;
     37 
     38     public LevenshteinDistance(Token[] source, Token[] target) {
     39         final int sourceSize = source.length;
     40         final int targetSize = target.length;
     41         final int[][] editTab = new int[sourceSize+1][targetSize+1];
     42         final int[][] distTab = new int[sourceSize+1][targetSize+1];
     43         editTab[0][0] = EDIT_UNCHANGED;
     44         distTab[0][0] = 0;
     45         for (int i = 1; i <= sourceSize; ++i) {
     46             editTab[i][0] = EDIT_DELETE;
     47             distTab[i][0] = i;
     48         }
     49         for (int i = 1; i <= targetSize; ++i) {
     50             editTab[0][i] = EDIT_INSERT;
     51             distTab[0][i] = i;
     52         }
     53         mEditTypeTable = editTab;
     54         mDistanceTable = distTab;
     55         mSource = source;
     56         mTarget = target;
     57     }
     58 
     59     /**
     60      * Implementation of Levenshtein distance algorithm.
     61      *
     62      * @return The Levenshtein distance.
     63      */
     64     public int calculate() {
     65         final Token[] src = mSource;
     66         final Token[] trg = mTarget;
     67         final int sourceLen = src.length;
     68         final int targetLen = trg.length;
     69         final int[][] distTab = mDistanceTable;
     70         final int[][] editTab = mEditTypeTable;
     71         for (int s = 1; s <= sourceLen; ++s) {
     72             Token sourceToken = src[s-1];
     73             for (int t = 1; t <= targetLen; ++t) {
     74                 Token targetToken = trg[t-1];
     75                 int cost = sourceToken.prefixOf(targetToken) ? 0 : 1;
     76 
     77                 int distance = distTab[s-1][t] + 1;
     78                 int type = EDIT_DELETE;
     79 
     80                 int d = distTab[s][t - 1];
     81                 if (d + 1 < distance ) {
     82                     distance = d + 1;
     83                     type = EDIT_INSERT;
     84                 }
     85 
     86                 d = distTab[s - 1][t - 1];
     87                 if (d + cost < distance) {
     88                     distance = d + cost;
     89                     type = cost == 0 ? EDIT_UNCHANGED : EDIT_REPLACE;
     90                 }
     91                 distTab[s][t] = distance;
     92                 editTab[s][t] = type;
     93             }
     94         }
     95         return distTab[sourceLen][targetLen];
     96     }
     97 
     98     /**
     99      * Gets the list of operations which were applied to each target token; {@link #calculate} must
    100      * have been called on this object before using this method.
    101      * @return A list of {@link EditOperation}s indicating the origin of each token in the target
    102      *      string. The position of the token indicates the position in the source string of the
    103      *      token that was unchanged/replaced, or the position in the source after which a target
    104      *      token was inserted.
    105      */
    106     public EditOperation[] getTargetOperations() {
    107         final int trgLen = mTarget.length;
    108         final EditOperation[] ops = new EditOperation[trgLen];
    109         int targetPos = trgLen;
    110         int sourcePos = mSource.length;
    111         final int[][] editTab = mEditTypeTable;
    112         while (targetPos > 0) {
    113             int editType = editTab[sourcePos][targetPos];
    114             switch (editType) {
    115                 case LevenshteinDistance.EDIT_DELETE:
    116                     sourcePos--;
    117                     break;
    118                 case LevenshteinDistance.EDIT_INSERT:
    119                     targetPos--;
    120                     ops[targetPos] = new EditOperation(editType, sourcePos);
    121                     break;
    122                 case LevenshteinDistance.EDIT_UNCHANGED:
    123                 case LevenshteinDistance.EDIT_REPLACE:
    124                     targetPos--;
    125                     sourcePos--;
    126                     ops[targetPos] = new EditOperation(editType, sourcePos);
    127                     break;
    128             }
    129         }
    130 
    131         return ops;
    132     }
    133 
    134     public static final class EditOperation {
    135         private final int mType;
    136         private final int mPosition;
    137         public EditOperation(int type, int position) {
    138             mType = type;
    139             mPosition = position;
    140         }
    141         public int getType() {
    142             return mType;
    143         }
    144         public int getPosition() {
    145             return mPosition;
    146         }
    147     }
    148 
    149     public static final class Token implements CharSequence {
    150         private final char[] mContainer;
    151         public final int mStart;
    152         public final int mEnd;
    153 
    154         public Token(char[] container, int start, int end) {
    155             mContainer = container;
    156             mStart = start;
    157             mEnd = end;
    158         }
    159 
    160         public int length() {
    161             return mEnd - mStart;
    162         }
    163 
    164         @Override
    165         public String toString() {
    166             // used in tests only.
    167             return subSequence(0, length());
    168         }
    169 
    170         public boolean prefixOf(final Token that) {
    171             final int len = length();
    172             if (len > that.length()) return false;
    173             final int thisStart = mStart;
    174             final int thatStart = that.mStart;
    175             final char[] thisContainer = mContainer;
    176             final char[] thatContainer = that.mContainer;
    177             for (int i = 0; i < len; ++i) {
    178                 if (thisContainer[thisStart + i] != thatContainer[thatStart + i]) {
    179                     return false;
    180                 }
    181             }
    182             return true;
    183         }
    184 
    185         public char charAt(int index) {
    186             return mContainer[index + mStart];
    187         }
    188 
    189         public String subSequence(int start, int end) {
    190             return new String(mContainer, mStart + start, length());
    191         }
    192 
    193     }
    194 }
    195