Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $
     20  */
     21 package org.apache.xpath.compiler;
     22 
     23 import java.util.Vector;
     24 
     25 import org.apache.xml.utils.PrefixResolver;
     26 import org.apache.xpath.res.XPATHErrorResources;
     27 
     28 /**
     29  * This class is in charge of lexical processing of the XPath
     30  * expression into tokens.
     31  */
     32 class Lexer
     33 {
     34 
     35   /**
     36    * The target XPath.
     37    */
     38   private Compiler m_compiler;
     39 
     40   /**
     41    * The prefix resolver to map prefixes to namespaces in the XPath.
     42    */
     43   PrefixResolver m_namespaceContext;
     44 
     45   /**
     46    * The XPath processor object.
     47    */
     48   XPathParser m_processor;
     49 
     50   /**
     51    * This value is added to each element name in the TARGETEXTRA
     52    * that is a 'target' (right-most top-level element name).
     53    */
     54   static final int TARGETEXTRA = 10000;
     55 
     56   /**
     57    * Ignore this, it is going away.
     58    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
     59    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
     60    * Each element that is a 'target', (right-most top level element name) has
     61    * TARGETEXTRA added to it.
     62    *
     63    */
     64   private int m_patternMap[] = new int[100];
     65 
     66   /**
     67    * Ignore this, it is going away.
     68    * The number of elements that m_patternMap maps;
     69    */
     70   private int m_patternMapSize;
     71 
     72   /**
     73    * Create a Lexer object.
     74    *
     75    * @param compiler The owning compiler for this lexer.
     76    * @param resolver The prefix resolver for mapping qualified name prefixes
     77    *                 to namespace URIs.
     78    * @param xpathProcessor The parser that is processing strings to opcodes.
     79    */
     80   Lexer(Compiler compiler, PrefixResolver resolver,
     81         XPathParser xpathProcessor)
     82   {
     83 
     84     m_compiler = compiler;
     85     m_namespaceContext = resolver;
     86     m_processor = xpathProcessor;
     87   }
     88 
     89   /**
     90    * Walk through the expression and build a token queue, and a map of the top-level
     91    * elements.
     92    * @param pat XSLT Expression.
     93    *
     94    * @throws javax.xml.transform.TransformerException
     95    */
     96   void tokenize(String pat) throws javax.xml.transform.TransformerException
     97   {
     98     tokenize(pat, null);
     99   }
    100 
    101   /**
    102    * Walk through the expression and build a token queue, and a map of the top-level
    103    * elements.
    104    * @param pat XSLT Expression.
    105    * @param targetStrings Vector to hold Strings, may be null.
    106    *
    107    * @throws javax.xml.transform.TransformerException
    108    */
    109   void tokenize(String pat, Vector targetStrings)
    110           throws javax.xml.transform.TransformerException
    111   {
    112 
    113     m_compiler.m_currentPattern = pat;
    114     m_patternMapSize = 0;
    115 
    116     // This needs to grow too.  Use a conservative estimate that the OpMapVector
    117     // needs about five time the length of the input path expression - to a
    118     // maximum of MAXTOKENQUEUESIZE*5.  If the OpMapVector needs to grow, grow
    119     // it freely (second argument to constructor).
    120     int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE)
    121                                  ? pat.length() :  OpMap.MAXTOKENQUEUESIZE) * 5;
    122     m_compiler.m_opMap = new OpMapVector(initTokQueueSize,
    123                                          OpMap.BLOCKTOKENQUEUESIZE * 5,
    124                                          OpMap.MAPINDEX_LENGTH);
    125 
    126     int nChars = pat.length();
    127     int startSubstring = -1;
    128     int posOfNSSep = -1;
    129     boolean isStartOfPat = true;
    130     boolean isAttrName = false;
    131     boolean isNum = false;
    132 
    133     // Nesting of '[' so we can know if the given element should be
    134     // counted inside the m_patternMap.
    135     int nesting = 0;
    136 
    137     // char[] chars = pat.toCharArray();
    138     for (int i = 0; i < nChars; i++)
    139     {
    140       char c = pat.charAt(i);
    141 
    142       switch (c)
    143       {
    144       case '\"' :
    145       {
    146         if (startSubstring != -1)
    147         {
    148           isNum = false;
    149           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    150           isAttrName = false;
    151 
    152           if (-1 != posOfNSSep)
    153           {
    154             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
    155           }
    156           else
    157           {
    158             addToTokenQueue(pat.substring(startSubstring, i));
    159           }
    160         }
    161 
    162         startSubstring = i;
    163 
    164         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
    165 
    166         if (c == '\"' && i < nChars)
    167         {
    168           addToTokenQueue(pat.substring(startSubstring, i + 1));
    169 
    170           startSubstring = -1;
    171         }
    172         else
    173         {
    174           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
    175                             null);  //"misquoted literal... expected double quote!");
    176         }
    177       }
    178       break;
    179       case '\'' :
    180         if (startSubstring != -1)
    181         {
    182           isNum = false;
    183           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    184           isAttrName = false;
    185 
    186           if (-1 != posOfNSSep)
    187           {
    188             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
    189           }
    190           else
    191           {
    192             addToTokenQueue(pat.substring(startSubstring, i));
    193           }
    194         }
    195 
    196         startSubstring = i;
    197 
    198         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
    199 
    200         if (c == '\'' && i < nChars)
    201         {
    202           addToTokenQueue(pat.substring(startSubstring, i + 1));
    203 
    204           startSubstring = -1;
    205         }
    206         else
    207         {
    208           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
    209                             null);  //"misquoted literal... expected single quote!");
    210         }
    211         break;
    212       case 0x0A :
    213       case 0x0D :
    214       case ' ' :
    215       case '\t' :
    216         if (startSubstring != -1)
    217         {
    218           isNum = false;
    219           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    220           isAttrName = false;
    221 
    222           if (-1 != posOfNSSep)
    223           {
    224             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
    225           }
    226           else
    227           {
    228             addToTokenQueue(pat.substring(startSubstring, i));
    229           }
    230 
    231           startSubstring = -1;
    232         }
    233         break;
    234       case '@' :
    235         isAttrName = true;
    236 
    237       // fall-through on purpose
    238       case '-' :
    239         if ('-' == c)
    240         {
    241           if (!(isNum || (startSubstring == -1)))
    242           {
    243             break;
    244           }
    245 
    246           isNum = false;
    247         }
    248 
    249       // fall-through on purpose
    250       case '(' :
    251       case '[' :
    252       case ')' :
    253       case ']' :
    254       case '|' :
    255       case '/' :
    256       case '*' :
    257       case '+' :
    258       case '=' :
    259       case ',' :
    260       case '\\' :  // Unused at the moment
    261       case '^' :  // Unused at the moment
    262       case '!' :  // Unused at the moment
    263       case '$' :
    264       case '<' :
    265       case '>' :
    266         if (startSubstring != -1)
    267         {
    268           isNum = false;
    269           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    270           isAttrName = false;
    271 
    272           if (-1 != posOfNSSep)
    273           {
    274             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
    275           }
    276           else
    277           {
    278             addToTokenQueue(pat.substring(startSubstring, i));
    279           }
    280 
    281           startSubstring = -1;
    282         }
    283         else if (('/' == c) && isStartOfPat)
    284         {
    285           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    286         }
    287         else if ('*' == c)
    288         {
    289           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    290           isAttrName = false;
    291         }
    292 
    293         if (0 == nesting)
    294         {
    295           if ('|' == c)
    296           {
    297             if (null != targetStrings)
    298             {
    299               recordTokenString(targetStrings);
    300             }
    301 
    302             isStartOfPat = true;
    303           }
    304         }
    305 
    306         if ((')' == c) || (']' == c))
    307         {
    308           nesting--;
    309         }
    310         else if (('(' == c) || ('[' == c))
    311         {
    312           nesting++;
    313         }
    314 
    315         addToTokenQueue(pat.substring(i, i + 1));
    316         break;
    317       case ':' :
    318         if (i>0)
    319         {
    320           if (posOfNSSep == (i - 1))
    321           {
    322             if (startSubstring != -1)
    323             {
    324               if (startSubstring < (i - 1))
    325                 addToTokenQueue(pat.substring(startSubstring, i - 1));
    326             }
    327 
    328             isNum = false;
    329             isAttrName = false;
    330             startSubstring = -1;
    331             posOfNSSep = -1;
    332 
    333             addToTokenQueue(pat.substring(i - 1, i + 1));
    334 
    335             break;
    336           }
    337           else
    338           {
    339             posOfNSSep = i;
    340           }
    341         }
    342 
    343       // fall through on purpose
    344       default :
    345         if (-1 == startSubstring)
    346         {
    347           startSubstring = i;
    348           isNum = Character.isDigit(c);
    349         }
    350         else if (isNum)
    351         {
    352           isNum = Character.isDigit(c);
    353         }
    354       }
    355     }
    356 
    357     if (startSubstring != -1)
    358     {
    359       isNum = false;
    360       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
    361 
    362       if ((-1 != posOfNSSep) ||
    363          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
    364       {
    365         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
    366       }
    367       else
    368       {
    369         addToTokenQueue(pat.substring(startSubstring, nChars));
    370       }
    371     }
    372 
    373     if (0 == m_compiler.getTokenQueueSize())
    374     {
    375       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
    376     }
    377     else if (null != targetStrings)
    378     {
    379       recordTokenString(targetStrings);
    380     }
    381 
    382     m_processor.m_queueMark = 0;
    383   }
    384 
    385   /**
    386    * Record the current position on the token queue as long as
    387    * this is a top-level element.  Must be called before the
    388    * next token is added to the m_tokenQueue.
    389    *
    390    * @param nesting The nesting count for the pattern element.
    391    * @param isStart true if this is the start of a pattern.
    392    * @param isAttrName true if we have determined that this is an attribute name.
    393    *
    394    * @return true if this is the start of a pattern.
    395    */
    396   private boolean mapPatternElemPos(int nesting, boolean isStart,
    397                                     boolean isAttrName)
    398   {
    399 
    400     if (0 == nesting)
    401     {
    402       if(m_patternMapSize >= m_patternMap.length)
    403       {
    404         int patternMap[] = m_patternMap;
    405         int len = m_patternMap.length;
    406         m_patternMap = new int[m_patternMapSize + 100];
    407         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
    408       }
    409       if (!isStart)
    410       {
    411         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
    412       }
    413       m_patternMap[m_patternMapSize] =
    414         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
    415 
    416       m_patternMapSize++;
    417 
    418       isStart = false;
    419     }
    420 
    421     return isStart;
    422   }
    423 
    424   /**
    425    * Given a map pos, return the corresponding token queue pos.
    426    *
    427    * @param i The index in the m_patternMap.
    428    *
    429    * @return the token queue position.
    430    */
    431   private int getTokenQueuePosFromMap(int i)
    432   {
    433 
    434     int pos = m_patternMap[i];
    435 
    436     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
    437   }
    438 
    439   /**
    440    * Reset token queue mark and m_token to a
    441    * given position.
    442    * @param mark The new position.
    443    */
    444   private final void resetTokenMark(int mark)
    445   {
    446 
    447     int qsz = m_compiler.getTokenQueueSize();
    448 
    449     m_processor.m_queueMark = (mark > 0)
    450                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
    451 
    452     if (m_processor.m_queueMark < qsz)
    453     {
    454       m_processor.m_token =
    455         (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
    456       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
    457     }
    458     else
    459     {
    460       m_processor.m_token = null;
    461       m_processor.m_tokenChar = 0;
    462     }
    463   }
    464 
    465   /**
    466    * Given a string, return the corresponding keyword token.
    467    *
    468    * @param key The keyword.
    469    *
    470    * @return An opcode value.
    471    */
    472   final int getKeywordToken(String key)
    473   {
    474 
    475     int tok;
    476 
    477     try
    478     {
    479       Integer itok = (Integer) Keywords.getKeyWord(key);
    480 
    481       tok = (null != itok) ? itok.intValue() : 0;
    482     }
    483     catch (NullPointerException npe)
    484     {
    485       tok = 0;
    486     }
    487     catch (ClassCastException cce)
    488     {
    489       tok = 0;
    490     }
    491 
    492     return tok;
    493   }
    494 
    495   /**
    496    * Record the current token in the passed vector.
    497    *
    498    * @param targetStrings Vector of string.
    499    */
    500   private void recordTokenString(Vector targetStrings)
    501   {
    502 
    503     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
    504 
    505     resetTokenMark(tokPos + 1);
    506 
    507     if (m_processor.lookahead('(', 1))
    508     {
    509       int tok = getKeywordToken(m_processor.m_token);
    510 
    511       switch (tok)
    512       {
    513       case OpCodes.NODETYPE_COMMENT :
    514         targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
    515         break;
    516       case OpCodes.NODETYPE_TEXT :
    517         targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
    518         break;
    519       case OpCodes.NODETYPE_NODE :
    520         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
    521         break;
    522       case OpCodes.NODETYPE_ROOT :
    523         targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
    524         break;
    525       case OpCodes.NODETYPE_ANYELEMENT :
    526         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
    527         break;
    528       case OpCodes.NODETYPE_PI :
    529         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
    530         break;
    531       default :
    532         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
    533       }
    534     }
    535     else
    536     {
    537       if (m_processor.tokenIs('@'))
    538       {
    539         tokPos++;
    540 
    541         resetTokenMark(tokPos + 1);
    542       }
    543 
    544       if (m_processor.lookahead(':', 1))
    545       {
    546         tokPos += 2;
    547       }
    548 
    549       targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
    550     }
    551   }
    552 
    553   /**
    554    * Add a token to the token queue.
    555    *
    556    *
    557    * @param s The token.
    558    */
    559   private final void addToTokenQueue(String s)
    560   {
    561     m_compiler.getTokenQueue().addElement(s);
    562   }
    563 
    564   /**
    565    * When a seperator token is found, see if there's a element name or
    566    * the like to map.
    567    *
    568    * @param pat The XPath name string.
    569    * @param startSubstring The start of the name string.
    570    * @param posOfNSSep The position of the namespace seperator (':').
    571    * @param posOfScan The end of the name index.
    572    *
    573    * @throws javax.xml.transform.TransformerException
    574    *
    575    * @return -1 always.
    576    */
    577   private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
    578                           int posOfScan)
    579            throws javax.xml.transform.TransformerException
    580  {
    581 
    582     String prefix = "";
    583 
    584     if ((startSubstring >= 0) && (posOfNSSep >= 0))
    585     {
    586        prefix = pat.substring(startSubstring, posOfNSSep);
    587     }
    588     String uName;
    589 
    590     if ((null != m_namespaceContext) &&!prefix.equals("*")
    591             &&!prefix.equals("xmlns"))
    592     {
    593       try
    594       {
    595         if (prefix.length() > 0)
    596           uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
    597             prefix);
    598         else
    599         {
    600 
    601           // Assume last was wildcard. This is not legal according
    602           // to the draft. Set the below to true to make namespace
    603           // wildcards work.
    604           if (false)
    605           {
    606             addToTokenQueue(":");
    607 
    608             String s = pat.substring(posOfNSSep + 1, posOfScan);
    609 
    610             if (s.length() > 0)
    611               addToTokenQueue(s);
    612 
    613             return -1;
    614           }
    615           else
    616           {
    617             uName =
    618               ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
    619                 prefix);
    620           }
    621         }
    622       }
    623       catch (ClassCastException cce)
    624       {
    625         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
    626       }
    627     }
    628     else
    629     {
    630       uName = prefix;
    631     }
    632 
    633     if ((null != uName) && (uName.length() > 0))
    634     {
    635       addToTokenQueue(uName);
    636       addToTokenQueue(":");
    637 
    638       String s = pat.substring(posOfNSSep + 1, posOfScan);
    639 
    640       if (s.length() > 0)
    641         addToTokenQueue(s);
    642     }
    643     else
    644     {
    645         // To older XPath code it doesn't matter if
    646         // error() is called or errorForDOM3().
    647 		m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
    648 						 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
    649 
    650 /** old code commented out 17-Sep-2004
    651 // error("Could not locate namespace for prefix: "+prefix);
    652 //		  m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
    653 //					 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
    654 */
    655 
    656       /***  Old code commented out 10-Jan-2001
    657       addToTokenQueue(prefix);
    658       addToTokenQueue(":");
    659 
    660       String s = pat.substring(posOfNSSep + 1, posOfScan);
    661 
    662       if (s.length() > 0)
    663         addToTokenQueue(s);
    664       ***/
    665     }
    666 
    667     return -1;
    668   }
    669 }
    670