Home | History | Annotate | Download | only in Misc
      1 /*
      2  * Copyright (C) 2005 Apple Computer, Inc.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #import <WebKit/WebNSDataExtras.h>
     30 #import <WebKit/WebNSDataExtrasPrivate.h>
     31 
     32 #import <wtf/Assertions.h>
     33 
     34 @interface NSString (WebNSDataExtrasInternal)
     35 - (NSString *)_web_capitalizeRFC822HeaderFieldName;
     36 @end
     37 
     38 @implementation NSString (WebNSDataExtrasInternal)
     39 
     40 -(NSString *)_web_capitalizeRFC822HeaderFieldName
     41 {
     42     CFStringRef name = (CFStringRef)self;
     43     NSString *result = nil;
     44 
     45     CFIndex i;
     46     CFIndex len = CFStringGetLength(name);
     47     char *charPtr = NULL;
     48     UniChar *uniCharPtr = NULL;
     49     Boolean useUniCharPtr = FALSE;
     50     Boolean shouldCapitalize = TRUE;
     51     Boolean somethingChanged = FALSE;
     52 
     53     for (i = 0; i < len; i ++) {
     54         UniChar ch = CFStringGetCharacterAtIndex(name, i);
     55         Boolean replace = FALSE;
     56         if (shouldCapitalize && ch >= 'a' && ch <= 'z') {
     57             ch = ch + 'A' - 'a';
     58             replace = TRUE;
     59         }
     60         else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') {
     61             ch = ch + 'a' - 'A';
     62             replace = TRUE;
     63         }
     64         if (replace) {
     65             if (!somethingChanged) {
     66                 somethingChanged = TRUE;
     67                 if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) {
     68                     // Can be encoded in ISOLatin1
     69                     useUniCharPtr = FALSE;
     70                     charPtr = CFAllocatorAllocate(NULL, len + 1, 0);
     71                     CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1);
     72                 }
     73                 else {
     74                     useUniCharPtr = TRUE;
     75                     uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0);
     76                     CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr);
     77                 }
     78             }
     79             if (useUniCharPtr) {
     80                 uniCharPtr[i] = ch;
     81             }
     82             else {
     83                 charPtr[i] = ch;
     84             }
     85         }
     86         if (ch == '-') {
     87             shouldCapitalize = TRUE;
     88         }
     89         else {
     90             shouldCapitalize = FALSE;
     91         }
     92     }
     93     if (somethingChanged) {
     94         if (useUniCharPtr) {
     95             result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL));
     96         }
     97         else {
     98             result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL));
     99         }
    100     }
    101     else {
    102         result = [self retain];
    103     }
    104 
    105     return [result autorelease];
    106 }
    107 
    108 @end
    109 
    110 @implementation NSData (WebKitExtras)
    111 
    112 -(NSString *)_webkit_guessedMIMETypeForXML
    113 {
    114     int length = [self length];
    115     const UInt8 *bytes = [self bytes];
    116 
    117 #define CHANNEL_TAG_LENGTH 7
    118 
    119     const char *p = (const char *)bytes;
    120     int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1);
    121 
    122     BOOL foundRDF = false;
    123 
    124     while (remaining > 0) {
    125         // Look for a "<".
    126         const char *hit = memchr(p, '<', remaining);
    127         if (!hit) {
    128             break;
    129         }
    130 
    131         // We are trying to identify RSS or Atom. RSS has a top-level
    132         // element of either <rss> or <rdf>. However, there are
    133         // non-RSS RDF files, so in the case of <rdf> we further look
    134         // for a <channel> element. In the case of an Atom file, a
    135         // top-level <feed> element is all we need to see. Only tags
    136         // starting with <? or <! can precede the root element. We
    137         // bail if we don't find an <rss>, <feed> or <rdf> element
    138         // right after those.
    139 
    140         if (foundRDF) {
    141             if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) {
    142                 return @"application/rss+xml";
    143             }
    144         } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) {
    145             foundRDF = TRUE;
    146         } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) {
    147             return @"application/rss+xml";
    148         } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) {
    149             return @"application/atom+xml";
    150         } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) {
    151             return nil;
    152         }
    153 
    154         // Skip the "<" and continue.
    155         remaining -= (hit + 1) - p;
    156         p = hit + 1;
    157     }
    158 
    159     return nil;
    160 }
    161 
    162 -(NSString *)_webkit_guessedMIMEType
    163 {
    164 #define JPEG_MAGIC_NUMBER_LENGTH 4
    165 #define SCRIPT_TAG_LENGTH 7
    166 #define TEXT_HTML_LENGTH 9
    167 #define VCARD_HEADER_LENGTH 11
    168 #define VCAL_HEADER_LENGTH 15
    169 
    170     NSString *MIMEType = [self _webkit_guessedMIMETypeForXML];
    171     if ([MIMEType length])
    172         return MIMEType;
    173 
    174     int length = [self length];
    175     const char *bytes = [self bytes];
    176 
    177     const char *p = bytes;
    178     int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1);
    179     while (remaining > 0) {
    180         // Look for a "<".
    181         const char *hit = memchr(p, '<', remaining);
    182         if (!hit) {
    183             break;
    184         }
    185 
    186         // If we found a "<", look for "<html>" or "<a " or "<script".
    187         if (strncasecmp(hit, "<html>",  strlen("<html>")) == 0 ||
    188             strncasecmp(hit, "<a ",     strlen("<a ")) == 0 ||
    189             strncasecmp(hit, "<script", strlen("<script")) == 0 ||
    190             strncasecmp(hit, "<title>", strlen("<title>")) == 0) {
    191             return @"text/html";
    192         }
    193 
    194         // Skip the "<" and continue.
    195         remaining -= (hit + 1) - p;
    196         p = hit + 1;
    197     }
    198 
    199     // Test for a broken server which has sent the content type as part of the content.
    200     // This code could be improved to look for other mime types.
    201     p = bytes;
    202     remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1);
    203     while (remaining > 0) {
    204         // Look for a "t" or "T".
    205         const char *hit = NULL;
    206         const char *lowerhit = memchr(p, 't', remaining);
    207         const char *upperhit = memchr(p, 'T', remaining);
    208         if (!lowerhit && !upperhit) {
    209             break;
    210         }
    211         if (!lowerhit) {
    212             hit = upperhit;
    213         }
    214         else if (!upperhit) {
    215             hit = lowerhit;
    216         }
    217         else {
    218             hit = MIN(lowerhit, upperhit);
    219         }
    220 
    221         // If we found a "t/T", look for "text/html".
    222         if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) {
    223             return @"text/html";
    224         }
    225 
    226         // Skip the "t/T" and continue.
    227         remaining -= (hit + 1) - p;
    228         p = hit + 1;
    229     }
    230 
    231     if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) {
    232         return @"text/vcard";
    233     }
    234     if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) {
    235         return @"text/calendar";
    236     }
    237 
    238     // Test for plain text.
    239     int i;
    240     for(i=0; i<length; i++){
    241         char c = bytes[i];
    242         if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) {
    243             break;
    244         }
    245     }
    246     if (i == length) {
    247         // Didn't encounter any bad characters, looks like plain text.
    248         return @"text/plain";
    249     }
    250 
    251     // Looks like this is a binary file.
    252 
    253     // Sniff for the JPEG magic number.
    254     if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) {
    255         return @"image/jpeg";
    256     }
    257 
    258 #undef JPEG_MAGIC_NUMBER_LENGTH
    259 #undef SCRIPT_TAG_LENGTH
    260 #undef TEXT_HTML_LENGTH
    261 #undef VCARD_HEADER_LENGTH
    262 #undef VCAL_HEADER_LENGTH
    263 
    264     return nil;
    265 }
    266 
    267 @end
    268 
    269 @implementation NSData (WebNSDataExtras)
    270 
    271 -(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string
    272 {
    273     ASSERT(string);
    274 
    275     const char *bytes = [self bytes];
    276     return strncasecmp(bytes, string, [self length]) == 0;
    277 }
    278 
    279 static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) {
    280 
    281     // According to the HTTP specification EOL is defined as
    282     // a CRLF pair.  Unfortunately, some servers will use LF
    283     // instead.  Worse yet, some servers will use a combination
    284     // of both (e.g. <header>CRLFLF<body>), so findEOL needs
    285     // to be more forgiving.  It will now accept CRLF, LF, or
    286     // CR.
    287     //
    288     // It returns NULL if EOL is not found or it will return
    289     // a pointer to the first terminating character.
    290     CFIndex i;
    291     for (i = 0;  i < len; i++)
    292     {
    293         UInt8 c = bytes[i];
    294         if ('\n' == c) return bytes + i;
    295         if ('\r' == c)
    296         {
    297             // Check to see if spanning buffer bounds
    298             // (CRLF is across reads).  If so, wait for
    299             // next read.
    300             if (i + 1 == len) break;
    301 
    302             return bytes + i;
    303         }
    304     }
    305 
    306     return NULL;
    307 }
    308 
    309 -(NSMutableDictionary *)_webkit_parseRFC822HeaderFields
    310 {
    311     NSMutableDictionary *headerFields = [NSMutableDictionary dictionary];
    312 
    313     const UInt8 *bytes = [self bytes];
    314     unsigned length = [self length];
    315     NSString *lastKey = nil;
    316     const UInt8 *eol;
    317 
    318     // Loop over lines until we're past the header, or we can't find any more end-of-lines
    319     while ((eol = _findEOL(bytes, length))) {
    320         const UInt8 *line = bytes;
    321         SInt32 lineLength = eol - bytes;
    322 
    323         // Move bytes to the character after the terminator as returned by _findEOL.
    324         bytes = eol + 1;
    325         if (('\r' == *eol) && ('\n' == *bytes)) {
    326             bytes++; // Safe since _findEOL won't return a spanning CRLF.
    327         }
    328 
    329         length -= (bytes - line);
    330         if (lineLength == 0) {
    331             // Blank line; we're at the end of the header
    332             break;
    333         }
    334         else if (*line == ' ' || *line == '\t') {
    335             // Continuation of the previous header
    336             if (!lastKey) {
    337                 // malformed header; ignore it and continue
    338                 continue;
    339             }
    340             else {
    341                 // Merge the continuation of the previous header
    342                 NSString *currentValue = [headerFields objectForKey:lastKey];
    343                 NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE));
    344                 ASSERT(currentValue);
    345                 ASSERT(newValue);
    346                 NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue];
    347                 [headerFields setObject:(NSString *)mergedValue forKey:lastKey];
    348                 [newValue release];
    349                 [mergedValue release];
    350                 // Note: currentValue is autoreleased
    351             }
    352         }
    353         else {
    354             // Brand new header
    355             const UInt8 *colon;
    356             for (colon = line; *colon != ':' && colon != eol; colon ++) {
    357                 // empty loop
    358             }
    359             if (colon == eol) {
    360                 // malformed header; ignore it and continue
    361                 continue;
    362             }
    363             else {
    364                 lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE));
    365                 [lastKey autorelease];
    366                 NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName];
    367                 lastKey = value;
    368                 for (colon++; colon != eol; colon++) {
    369                     if (*colon != ' ' && *colon != '\t') {
    370                         break;
    371                     }
    372                 }
    373                 if (colon == eol) {
    374                     value = [[NSString alloc] initWithString:@""];
    375                     [value autorelease];
    376                 }
    377                 else {
    378                     value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE));
    379                     [value autorelease];
    380                 }
    381                 NSString *oldValue = [headerFields objectForKey:lastKey];
    382                 if (oldValue) {
    383                     NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value];
    384                     value = newValue;
    385                     [newValue autorelease];
    386                 }
    387                 [headerFields setObject:(NSString *)value forKey:lastKey];
    388             }
    389         }
    390     }
    391 
    392     return headerFields;
    393 }
    394 
    395 - (BOOL)_web_startsWithBlankLine
    396 {
    397     return [self length] > 0 && ((const char *)[self bytes])[0] == '\n';
    398 }
    399 
    400 - (NSInteger)_web_locationAfterFirstBlankLine
    401 {
    402     const char *bytes = (const char *)[self bytes];
    403     unsigned length = [self length];
    404 
    405     unsigned i;
    406     for (i = 0; i < length - 4; i++) {
    407 
    408         //  Support for Acrobat. It sends "\n\n".
    409         if (bytes[i] == '\n' && bytes[i+1] == '\n') {
    410             return i+2;
    411         }
    412 
    413         // Returns the position after 2 CRLF's or 1 CRLF if it is the first line.
    414         if (bytes[i] == '\r' && bytes[i+1] == '\n') {
    415             i += 2;
    416             if (i == 2) {
    417                 return i;
    418             } else if (bytes[i] == '\n') {
    419                 // Support for Director. It sends "\r\n\n" (3880387).
    420                 return i+1;
    421             } else if (bytes[i] == '\r' && bytes[i+1] == '\n') {
    422                 // Support for Flash. It sends "\r\n\r\n" (3758113).
    423                 return i+2;
    424             }
    425         }
    426     }
    427     return NSNotFound;
    428 }
    429 
    430 @end
    431