Home | History | Annotate | Download | only in util
      1 //
      2 //  ========================================================================
      3 //  Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd.
      4 //  ------------------------------------------------------------------------
      5 //  All rights reserved. This program and the accompanying materials
      6 //  are made available under the terms of the Eclipse Public License v1.0
      7 //  and Apache License v2.0 which accompanies this distribution.
      8 //
      9 //      The Eclipse Public License is available at
     10 //      http://www.eclipse.org/legal/epl-v10.html
     11 //
     12 //      The Apache License v2.0 is available at
     13 //      http://www.opensource.org/licenses/apache2.0.php
     14 //
     15 //  You may elect to redistribute this code under either of these licenses.
     16 //  ========================================================================
     17 //
     18 
     19 package org.eclipse.jetty.util;
     20 
     21 import java.io.UnsupportedEncodingException;
     22 import java.net.URI;
     23 import java.net.URLEncoder;
     24 
     25 import org.eclipse.jetty.util.log.Log;
     26 
     27 
     28 
     29 /* ------------------------------------------------------------ */
     30 /** URI Holder.
     31  * This class assists with the decoding and encoding or HTTP URI's.
     32  * It differs from the java.net.URL class as it does not provide
     33  * communications ability, but it does assist with query string
     34  * formatting.
     35  * <P>UTF-8 encoding is used by default for % encoded characters. This
     36  * may be overridden with the org.eclipse.jetty.util.URI.charset system property.
     37  * @see UrlEncoded
     38  *
     39  */
     40 public class URIUtil
     41     implements Cloneable
     42 {
     43     public static final String SLASH="/";
     44     public static final String HTTP="http";
     45     public static final String HTTP_COLON="http:";
     46     public static final String HTTPS="https";
     47     public static final String HTTPS_COLON="https:";
     48 
     49     // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
     50     public static final String __CHARSET=System.getProperty("org.eclipse.jetty.util.URI.charset",StringUtil.__UTF8);
     51 
     52     private URIUtil()
     53     {}
     54 
     55     /* ------------------------------------------------------------ */
     56     /** Encode a URI path.
     57      * This is the same encoding offered by URLEncoder, except that
     58      * the '/' character is not encoded.
     59      * @param path The path the encode
     60      * @return The encoded path
     61      */
     62     public static String encodePath(String path)
     63     {
     64         if (path==null || path.length()==0)
     65             return path;
     66 
     67         StringBuilder buf = encodePath(null,path);
     68         return buf==null?path:buf.toString();
     69     }
     70 
     71     /* ------------------------------------------------------------ */
     72     /** Encode a URI path.
     73      * @param path The path the encode
     74      * @param buf StringBuilder to encode path into (or null)
     75      * @return The StringBuilder or null if no substitutions required.
     76      */
     77     public static StringBuilder encodePath(StringBuilder buf, String path)
     78     {
     79         byte[] bytes=null;
     80         if (buf==null)
     81         {
     82         loop:
     83             for (int i=0;i<path.length();i++)
     84             {
     85                 char c=path.charAt(i);
     86                 switch(c)
     87                 {
     88                     case '%':
     89                     case '?':
     90                     case ';':
     91                     case '#':
     92                     case '\'':
     93                     case '"':
     94                     case '<':
     95                     case '>':
     96                     case ' ':
     97                         buf=new StringBuilder(path.length()*2);
     98                         break loop;
     99                     default:
    100                         if (c>127)
    101                         {
    102                             try
    103                             {
    104                                 bytes=path.getBytes(URIUtil.__CHARSET);
    105                             }
    106                             catch (UnsupportedEncodingException e)
    107                             {
    108                                 throw new IllegalStateException(e);
    109                             }
    110                             buf=new StringBuilder(path.length()*2);
    111                             break loop;
    112                         }
    113 
    114                 }
    115             }
    116             if (buf==null)
    117                 return null;
    118         }
    119 
    120         synchronized(buf)
    121         {
    122             if (bytes!=null)
    123             {
    124                 for (int i=0;i<bytes.length;i++)
    125                 {
    126                     byte c=bytes[i];
    127                     switch(c)
    128                     {
    129                       case '%':
    130                           buf.append("%25");
    131                           continue;
    132                       case '?':
    133                           buf.append("%3F");
    134                           continue;
    135                       case ';':
    136                           buf.append("%3B");
    137                           continue;
    138                       case '#':
    139                           buf.append("%23");
    140                           continue;
    141                       case '"':
    142                           buf.append("%22");
    143                           continue;
    144                       case '\'':
    145                           buf.append("%27");
    146                           continue;
    147                       case '<':
    148                           buf.append("%3C");
    149                           continue;
    150                       case '>':
    151                           buf.append("%3E");
    152                           continue;
    153                       case ' ':
    154                           buf.append("%20");
    155                           continue;
    156                       default:
    157                           if (c<0)
    158                           {
    159                               buf.append('%');
    160                               TypeUtil.toHex(c,buf);
    161                           }
    162                           else
    163                               buf.append((char)c);
    164                           continue;
    165                     }
    166                 }
    167 
    168             }
    169             else
    170             {
    171                 for (int i=0;i<path.length();i++)
    172                 {
    173                     char c=path.charAt(i);
    174                     switch(c)
    175                     {
    176                         case '%':
    177                             buf.append("%25");
    178                             continue;
    179                         case '?':
    180                             buf.append("%3F");
    181                             continue;
    182                         case ';':
    183                             buf.append("%3B");
    184                             continue;
    185                         case '#':
    186                             buf.append("%23");
    187                             continue;
    188                         case '"':
    189                             buf.append("%22");
    190                             continue;
    191                         case '\'':
    192                             buf.append("%27");
    193                             continue;
    194                         case '<':
    195                             buf.append("%3C");
    196                             continue;
    197                         case '>':
    198                             buf.append("%3E");
    199                             continue;
    200                         case ' ':
    201                             buf.append("%20");
    202                             continue;
    203                         default:
    204                             buf.append(c);
    205                             continue;
    206                     }
    207                 }
    208             }
    209         }
    210 
    211         return buf;
    212     }
    213 
    214     /* ------------------------------------------------------------ */
    215     /** Encode a URI path.
    216      * @param path The path the encode
    217      * @param buf StringBuilder to encode path into (or null)
    218      * @param encode String of characters to encode. % is always encoded.
    219      * @return The StringBuilder or null if no substitutions required.
    220      */
    221     public static StringBuilder encodeString(StringBuilder buf,
    222                                              String path,
    223                                              String encode)
    224     {
    225         if (buf==null)
    226         {
    227         loop:
    228             for (int i=0;i<path.length();i++)
    229             {
    230                 char c=path.charAt(i);
    231                 if (c=='%' || encode.indexOf(c)>=0)
    232                 {
    233                     buf=new StringBuilder(path.length()<<1);
    234                     break loop;
    235                 }
    236             }
    237             if (buf==null)
    238                 return null;
    239         }
    240 
    241         synchronized(buf)
    242         {
    243             for (int i=0;i<path.length();i++)
    244             {
    245                 char c=path.charAt(i);
    246                 if (c=='%' || encode.indexOf(c)>=0)
    247                 {
    248                     buf.append('%');
    249                     StringUtil.append(buf,(byte)(0xff&c),16);
    250                 }
    251                 else
    252                     buf.append(c);
    253             }
    254         }
    255 
    256         return buf;
    257     }
    258 
    259     /* ------------------------------------------------------------ */
    260     /* Decode a URI path and strip parameters
    261      * @param path The path the encode
    262      * @param buf StringBuilder to encode path into
    263      */
    264     public static String decodePath(String path)
    265     {
    266         if (path==null)
    267             return null;
    268         // Array to hold all converted characters
    269         char[] chars=null;
    270         int n=0;
    271         // Array to hold a sequence of %encodings
    272         byte[] bytes=null;
    273         int b=0;
    274 
    275         int len=path.length();
    276 
    277         for (int i=0;i<len;i++)
    278         {
    279             char c = path.charAt(i);
    280 
    281             if (c=='%' && (i+2)<len)
    282             {
    283                 if (chars==null)
    284                 {
    285                     chars=new char[len];
    286                     bytes=new byte[len];
    287                     path.getChars(0,i,chars,0);
    288                 }
    289                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
    290                 i+=2;
    291                 continue;
    292             }
    293             else if (c==';')
    294             {
    295                 if (chars==null)
    296                 {
    297                     chars=new char[len];
    298                     path.getChars(0,i,chars,0);
    299                     n=i;
    300                 }
    301                 break;
    302             }
    303             else if (bytes==null)
    304             {
    305                 n++;
    306                 continue;
    307             }
    308 
    309             // Do we have some bytes to convert?
    310             if (b>0)
    311             {
    312                 // convert series of bytes and add to chars
    313                 String s;
    314                 try
    315                 {
    316                     s=new String(bytes,0,b,__CHARSET);
    317                 }
    318                 catch (UnsupportedEncodingException e)
    319                 {
    320                     s=new String(bytes,0,b);
    321                 }
    322                 s.getChars(0,s.length(),chars,n);
    323                 n+=s.length();
    324                 b=0;
    325             }
    326 
    327             chars[n++]=c;
    328         }
    329 
    330         if (chars==null)
    331             return path;
    332 
    333         // if we have a remaining sequence of bytes
    334         if (b>0)
    335         {
    336             // convert series of bytes and add to chars
    337             String s;
    338             try
    339             {
    340                 s=new String(bytes,0,b,__CHARSET);
    341             }
    342             catch (UnsupportedEncodingException e)
    343             {
    344                 s=new String(bytes,0,b);
    345             }
    346             s.getChars(0,s.length(),chars,n);
    347             n+=s.length();
    348         }
    349 
    350         return new String(chars,0,n);
    351     }
    352 
    353     /* ------------------------------------------------------------ */
    354     /* Decode a URI path and strip parameters.
    355      * @param path The path the encode
    356      * @param buf StringBuilder to encode path into
    357      */
    358     public static String decodePath(byte[] buf, int offset, int length)
    359     {
    360         byte[] bytes=null;
    361         int n=0;
    362 
    363         for (int i=0;i<length;i++)
    364         {
    365             byte b = buf[i + offset];
    366 
    367             if (b=='%' && (i+2)<length)
    368             {
    369                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
    370                 i+=2;
    371             }
    372             else if (b==';')
    373             {
    374                 length=i;
    375                 break;
    376             }
    377             else if (bytes==null)
    378             {
    379                 n++;
    380                 continue;
    381             }
    382 
    383             if (bytes==null)
    384             {
    385                 bytes=new byte[length];
    386                 for (int j=0;j<n;j++)
    387                     bytes[j]=buf[j + offset];
    388             }
    389 
    390             bytes[n++]=b;
    391         }
    392 
    393         if (bytes==null)
    394             return StringUtil.toString(buf,offset,length,__CHARSET);
    395         return StringUtil.toString(bytes,0,n,__CHARSET);
    396     }
    397 
    398 
    399     /* ------------------------------------------------------------ */
    400     /** Add two URI path segments.
    401      * Handles null and empty paths, path and query params (eg ?a=b or
    402      * ;JSESSIONID=xxx) and avoids duplicate '/'
    403      * @param p1 URI path segment (should be encoded)
    404      * @param p2 URI path segment (should be encoded)
    405      * @return Legally combined path segments.
    406      */
    407     public static String addPaths(String p1, String p2)
    408     {
    409         if (p1==null || p1.length()==0)
    410         {
    411             if (p1!=null && p2==null)
    412                 return p1;
    413             return p2;
    414         }
    415         if (p2==null || p2.length()==0)
    416             return p1;
    417 
    418         int split=p1.indexOf(';');
    419         if (split<0)
    420             split=p1.indexOf('?');
    421         if (split==0)
    422             return p2+p1;
    423         if (split<0)
    424             split=p1.length();
    425 
    426         StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
    427         buf.append(p1);
    428 
    429         if (buf.charAt(split-1)=='/')
    430         {
    431             if (p2.startsWith(URIUtil.SLASH))
    432             {
    433                 buf.deleteCharAt(split-1);
    434                 buf.insert(split-1,p2);
    435             }
    436             else
    437                 buf.insert(split,p2);
    438         }
    439         else
    440         {
    441             if (p2.startsWith(URIUtil.SLASH))
    442                 buf.insert(split,p2);
    443             else
    444             {
    445                 buf.insert(split,'/');
    446                 buf.insert(split+1,p2);
    447             }
    448         }
    449 
    450         return buf.toString();
    451     }
    452 
    453     /* ------------------------------------------------------------ */
    454     /** Return the parent Path.
    455      * Treat a URI like a directory path and return the parent directory.
    456      */
    457     public static String parentPath(String p)
    458     {
    459         if (p==null || URIUtil.SLASH.equals(p))
    460             return null;
    461         int slash=p.lastIndexOf('/',p.length()-2);
    462         if (slash>=0)
    463             return p.substring(0,slash+1);
    464         return null;
    465     }
    466 
    467     /* ------------------------------------------------------------ */
    468     /** Convert a path to a cananonical form.
    469      * All instances of "." and ".." are factored out.  Null is returned
    470      * if the path tries to .. above its root.
    471      * @param path
    472      * @return path or null.
    473      */
    474     public static String canonicalPath(String path)
    475     {
    476         if (path==null || path.length()==0)
    477             return path;
    478 
    479         int end=path.length();
    480         int start = path.lastIndexOf('/', end);
    481 
    482     search:
    483         while (end>0)
    484         {
    485             switch(end-start)
    486             {
    487               case 2: // possible single dot
    488                   if (path.charAt(start+1)!='.')
    489                       break;
    490                   break search;
    491               case 3: // possible double dot
    492                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
    493                       break;
    494                   break search;
    495             }
    496 
    497             end=start;
    498             start=path.lastIndexOf('/',end-1);
    499         }
    500 
    501         // If we have checked the entire string
    502         if (start>=end)
    503             return path;
    504 
    505         StringBuilder buf = new StringBuilder(path);
    506         int delStart=-1;
    507         int delEnd=-1;
    508         int skip=0;
    509 
    510         while (end>0)
    511         {
    512             switch(end-start)
    513             {
    514               case 2: // possible single dot
    515                   if (buf.charAt(start+1)!='.')
    516                   {
    517                       if (skip>0 && --skip==0)
    518                       {
    519                           delStart=start>=0?start:0;
    520                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
    521                               delStart++;
    522                       }
    523                       break;
    524                   }
    525 
    526                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
    527                       break;
    528 
    529                   if(delEnd<0)
    530                       delEnd=end;
    531                   delStart=start;
    532                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
    533                   {
    534                       delStart++;
    535                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
    536                           delEnd++;
    537                       break;
    538                   }
    539                   if (end==buf.length())
    540                       delStart++;
    541 
    542                   end=start--;
    543                   while (start>=0 && buf.charAt(start)!='/')
    544                       start--;
    545                   continue;
    546 
    547               case 3: // possible double dot
    548                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
    549                   {
    550                       if (skip>0 && --skip==0)
    551                       {   delStart=start>=0?start:0;
    552                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
    553                               delStart++;
    554                       }
    555                       break;
    556                   }
    557 
    558                   delStart=start;
    559                   if (delEnd<0)
    560                       delEnd=end;
    561 
    562                   skip++;
    563                   end=start--;
    564                   while (start>=0 && buf.charAt(start)!='/')
    565                       start--;
    566                   continue;
    567 
    568               default:
    569                   if (skip>0 && --skip==0)
    570                   {
    571                       delStart=start>=0?start:0;
    572                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
    573                           delStart++;
    574                   }
    575             }
    576 
    577             // Do the delete
    578             if (skip<=0 && delStart>=0 && delEnd>=delStart)
    579             {
    580                 buf.delete(delStart,delEnd);
    581                 delStart=delEnd=-1;
    582                 if (skip>0)
    583                     delEnd=end;
    584             }
    585 
    586             end=start--;
    587             while (start>=0 && buf.charAt(start)!='/')
    588                 start--;
    589         }
    590 
    591         // Too many ..
    592         if (skip>0)
    593             return null;
    594 
    595         // Do the delete
    596         if (delEnd>=0)
    597             buf.delete(delStart,delEnd);
    598 
    599         return buf.toString();
    600     }
    601 
    602     /* ------------------------------------------------------------ */
    603     /** Convert a path to a compact form.
    604      * All instances of "//" and "///" etc. are factored out to single "/"
    605      * @param path
    606      * @return path
    607      */
    608     public static String compactPath(String path)
    609     {
    610         if (path==null || path.length()==0)
    611             return path;
    612 
    613         int state=0;
    614         int end=path.length();
    615         int i=0;
    616 
    617         loop:
    618         while (i<end)
    619         {
    620             char c=path.charAt(i);
    621             switch(c)
    622             {
    623                 case '?':
    624                     return path;
    625                 case '/':
    626                     state++;
    627                     if (state==2)
    628                         break loop;
    629                     break;
    630                 default:
    631                     state=0;
    632             }
    633             i++;
    634         }
    635 
    636         if (state<2)
    637             return path;
    638 
    639         StringBuffer buf = new StringBuffer(path.length());
    640         buf.append(path,0,i);
    641 
    642         loop2:
    643         while (i<end)
    644         {
    645             char c=path.charAt(i);
    646             switch(c)
    647             {
    648                 case '?':
    649                     buf.append(path,i,end);
    650                     break loop2;
    651                 case '/':
    652                     if (state++==0)
    653                         buf.append(c);
    654                     break;
    655                 default:
    656                     state=0;
    657                     buf.append(c);
    658             }
    659             i++;
    660         }
    661 
    662         return buf.toString();
    663     }
    664 
    665     /* ------------------------------------------------------------ */
    666     /**
    667      * @param uri URI
    668      * @return True if the uri has a scheme
    669      */
    670     public static boolean hasScheme(String uri)
    671     {
    672         for (int i=0;i<uri.length();i++)
    673         {
    674             char c=uri.charAt(i);
    675             if (c==':')
    676                 return true;
    677             if (!(c>='a'&&c<='z' ||
    678                   c>='A'&&c<='Z' ||
    679                   (i>0 &&(c>='0'&&c<='9' ||
    680                           c=='.' ||
    681                           c=='+' ||
    682                           c=='-'))
    683                   ))
    684                 break;
    685         }
    686         return false;
    687     }
    688 
    689 }
    690 
    691 
    692 
    693