Home | History | Annotate | Download | only in xmpp
      1 /*
      2  * libjingle
      3  * Copyright 2004--2005, Google Inc.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions are met:
      7  *
      8  *  1. Redistributions of source code must retain the above copyright notice,
      9  *     this list of conditions and the following disclaimer.
     10  *  2. Redistributions in binary form must reproduce the above copyright notice,
     11  *     this list of conditions and the following disclaimer in the documentation
     12  *     and/or other materials provided with the distribution.
     13  *  3. The name of the author may not be used to endorse or promote products
     14  *     derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
     17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
     19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "talk/xmpp/jid.h"
     29 
     30 #include <ctype.h>
     31 
     32 #include <algorithm>
     33 #include <string>
     34 
     35 #include "talk/base/common.h"
     36 #include "talk/base/logging.h"
     37 #include "talk/xmpp/constants.h"
     38 
     39 namespace buzz {
     40 
     41 Jid::Jid() : data_(NULL) {
     42 }
     43 
     44 Jid::Jid(bool is_special, const std::string & special) {
     45   data_ = is_special ? new Data(special, STR_EMPTY, STR_EMPTY) : NULL;
     46 }
     47 
     48 Jid::Jid(const std::string & jid_string) {
     49   if (jid_string == STR_EMPTY) {
     50     data_ = NULL;
     51     return;
     52   }
     53 
     54   // First find the slash and slice of that part
     55   size_t slash = jid_string.find('/');
     56   std::string resource_name = (slash == std::string::npos ? STR_EMPTY :
     57                     jid_string.substr(slash + 1));
     58 
     59   // Now look for the node
     60   std::string node_name;
     61   size_t at = jid_string.find('@');
     62   size_t domain_begin;
     63   if (at < slash && at != std::string::npos) {
     64     node_name = jid_string.substr(0, at);
     65     domain_begin = at + 1;
     66   } else {
     67     domain_begin = 0;
     68   }
     69 
     70   // Now take what is left as the domain
     71   size_t domain_length =
     72     (  slash == std::string::npos
     73      ? jid_string.length() - domain_begin
     74      : slash - domain_begin);
     75 
     76   // avoid allocating these constants repeatedly
     77   std::string domain_name;
     78 
     79   if (domain_length == 9  && jid_string.find("gmail.com", domain_begin) == domain_begin) {
     80     domain_name = STR_GMAIL_COM;
     81   }
     82   else if (domain_length == 14 && jid_string.find("googlemail.com", domain_begin) == domain_begin) {
     83     domain_name = STR_GOOGLEMAIL_COM;
     84   }
     85   else if (domain_length == 10 && jid_string.find("google.com", domain_begin) == domain_begin) {
     86     domain_name = STR_GOOGLE_COM;
     87   }
     88   else {
     89     domain_name = jid_string.substr(domain_begin, domain_length);
     90   }
     91 
     92   // If the domain is empty we have a non-valid jid and we should empty
     93   // everything else out
     94   if (domain_name.empty()) {
     95     data_ = NULL;
     96     return;
     97   }
     98 
     99   bool valid_node;
    100   std::string validated_node = prepNode(node_name,
    101       node_name.begin(), node_name.end(), &valid_node);
    102   bool valid_domain;
    103   std::string validated_domain = prepDomain(domain_name,
    104       domain_name.begin(), domain_name.end(), &valid_domain);
    105   bool valid_resource;
    106   std::string validated_resource = prepResource(resource_name,
    107       resource_name.begin(), resource_name.end(), &valid_resource);
    108 
    109   if (!valid_node || !valid_domain || !valid_resource) {
    110     data_ = NULL;
    111     return;
    112   }
    113 
    114   data_ = new Data(validated_node, validated_domain, validated_resource);
    115 }
    116 
    117 Jid::Jid(const std::string & node_name,
    118          const std::string & domain_name,
    119          const std::string & resource_name) {
    120   if (domain_name.empty()) {
    121     data_ = NULL;
    122     return;
    123   }
    124 
    125   bool valid_node;
    126   std::string validated_node = prepNode(node_name,
    127       node_name.begin(), node_name.end(), &valid_node);
    128   bool valid_domain;
    129   std::string validated_domain = prepDomain(domain_name,
    130       domain_name.begin(), domain_name.end(), &valid_domain);
    131   bool valid_resource;
    132   std::string validated_resource = prepResource(resource_name,
    133       resource_name.begin(), resource_name.end(), &valid_resource);
    134 
    135   if (!valid_node || !valid_domain || !valid_resource) {
    136     data_ = NULL;
    137     return;
    138   }
    139 
    140   data_ = new Data(validated_node, validated_domain, validated_resource);
    141 }
    142 
    143 std::string Jid::Str() const {
    144   if (!IsValid())
    145     return STR_EMPTY;
    146 
    147   std::string ret;
    148 
    149   if (!data_->node_name_.empty())
    150     ret = data_->node_name_ + "@";
    151 
    152   ASSERT(data_->domain_name_ != STR_EMPTY);
    153   ret += data_->domain_name_;
    154 
    155   if (!data_->resource_name_.empty())
    156     ret += "/" + data_->resource_name_;
    157 
    158   return ret;
    159 }
    160 
    161 bool
    162 Jid::IsValid() const {
    163   return data_ != NULL && !data_->domain_name_.empty();
    164 }
    165 
    166 bool
    167 Jid::IsBare() const {
    168   if (Compare(JID_EMPTY) == 0) {
    169     LOG(LS_VERBOSE) << "Warning: Calling IsBare() on the empty jid";
    170     return true;
    171   }
    172   return IsValid() &&
    173          data_->resource_name_.empty();
    174 }
    175 
    176 bool
    177 Jid::IsFull() const {
    178   return IsValid() &&
    179          !data_->resource_name_.empty();
    180 }
    181 
    182 Jid
    183 Jid::BareJid() const {
    184   if (!IsValid())
    185     return Jid();
    186   if (!IsFull())
    187     return *this;
    188   return Jid(data_->node_name_, data_->domain_name_, STR_EMPTY);
    189 }
    190 
    191 #if 0
    192 void
    193 Jid::set_node(const std::string & node_name) {
    194     data_->node_name_ = node_name;
    195 }
    196 void
    197 Jid::set_domain(const std::string & domain_name) {
    198     data_->domain_name_ = domain_name;
    199 }
    200 void
    201 Jid::set_resource(const std::string & res_name) {
    202     data_->resource_name_ = res_name;
    203 }
    204 #endif
    205 
    206 bool
    207 Jid::BareEquals(const Jid & other) const {
    208   return (other.data_ == data_ ||
    209           (data_ != NULL &&
    210           other.data_ != NULL &&
    211           other.data_->node_name_ == data_->node_name_ &&
    212           other.data_->domain_name_ == data_->domain_name_));
    213 }
    214 
    215 bool
    216 Jid::operator==(const Jid & other) const {
    217   return (other.data_ == data_ ||
    218           (data_ != NULL &&
    219           other.data_ != NULL &&
    220           other.data_->node_name_ == data_->node_name_ &&
    221           other.data_->domain_name_ == data_->domain_name_ &&
    222           other.data_->resource_name_ == data_->resource_name_));
    223 }
    224 
    225 int
    226 Jid::Compare(const Jid & other) const {
    227   if (other.data_ == data_)
    228     return 0;
    229   if (data_ == NULL)
    230     return -1;
    231   if (other.data_ == NULL)
    232     return 1;
    233 
    234   int compare_result;
    235   compare_result = data_->node_name_.compare(other.data_->node_name_);
    236   if (0 != compare_result)
    237     return compare_result;
    238   compare_result = data_->domain_name_.compare(other.data_->domain_name_);
    239   if (0 != compare_result)
    240     return compare_result;
    241   compare_result = data_->resource_name_.compare(other.data_->resource_name_);
    242   return compare_result;
    243 }
    244 
    245 uint32 Jid::ComputeLameHash() const {
    246   uint32 hash = 0;
    247   // Hash the node portion
    248   {
    249     const std::string &str = node();
    250     for (int i = 0; i < static_cast<int>(str.size()); ++i) {
    251       hash = ((hash << 2) + hash) + str[i];
    252     }
    253   }
    254 
    255   // Hash the domain portion
    256   {
    257     const std::string &str = domain();
    258     for (int i = 0; i < static_cast<int>(str.size()); ++i)
    259       hash = ((hash << 2) + hash) + str[i];
    260   }
    261 
    262   // Hash the resource portion
    263   {
    264     const std::string &str = resource();
    265     for (int i = 0; i < static_cast<int>(str.size()); ++i)
    266       hash = ((hash << 2) + hash) + str[i];
    267   }
    268 
    269   return hash;
    270 }
    271 
    272 // --- JID parsing code: ---
    273 
    274 // Checks and normalizes the node part of a JID.
    275 std::string
    276 Jid::prepNode(const std::string str, std::string::const_iterator start,
    277     std::string::const_iterator end, bool *valid) {
    278   *valid = false;
    279   std::string result;
    280 
    281   for (std::string::const_iterator i = start; i < end; i++) {
    282     bool char_valid = true;
    283     unsigned char ch = *i;
    284     if (ch <= 0x7F) {
    285       result += prepNodeAscii(ch, &char_valid);
    286     }
    287     else {
    288       // TODO: implement the correct stringprep protocol for these
    289       result += tolower(ch);
    290     }
    291     if (!char_valid) {
    292       return STR_EMPTY;
    293     }
    294   }
    295 
    296   if (result.length() > 1023) {
    297     return STR_EMPTY;
    298   }
    299   *valid = true;
    300   return result;
    301 }
    302 
    303 
    304 // Returns the appropriate mapping for an ASCII character in a node.
    305 char
    306 Jid::prepNodeAscii(char ch, bool *valid) {
    307   *valid = true;
    308   switch (ch) {
    309     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
    310     case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
    311     case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
    312     case 'V': case 'W': case 'X': case 'Y': case 'Z':
    313       return (char)(ch + ('a' - 'A'));
    314 
    315     case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
    316     case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
    317     case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
    318     case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
    319     case ' ': case '&': case '/': case ':': case '<': case '>': case '@':
    320     case '\"': case '\'':
    321     case 0x7F:
    322       *valid = false;
    323       return 0;
    324 
    325     default:
    326       return ch;
    327   }
    328 }
    329 
    330 
    331 // Checks and normalizes the resource part of a JID.
    332 std::string
    333 Jid::prepResource(const std::string str, std::string::const_iterator start,
    334     std::string::const_iterator end, bool *valid) {
    335   *valid = false;
    336   std::string result;
    337 
    338   for (std::string::const_iterator i = start; i < end; i++) {
    339     bool char_valid = true;
    340     unsigned char ch = *i;
    341     if (ch <= 0x7F) {
    342       result += prepResourceAscii(ch, &char_valid);
    343     }
    344     else {
    345       // TODO: implement the correct stringprep protocol for these
    346       result += ch;
    347     }
    348   }
    349 
    350   if (result.length() > 1023) {
    351     return STR_EMPTY;
    352   }
    353   *valid = true;
    354   return result;
    355 }
    356 
    357 // Returns the appropriate mapping for an ASCII character in a resource.
    358 char
    359 Jid::prepResourceAscii(char ch, bool *valid) {
    360   *valid = true;
    361   switch (ch) {
    362     case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
    363     case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
    364     case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
    365     case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
    366     case 0x7F:
    367       *valid = false;
    368       return 0;
    369 
    370     default:
    371       return ch;
    372   }
    373 }
    374 
    375 // Checks and normalizes the domain part of a JID.
    376 std::string
    377 Jid::prepDomain(const std::string str, std::string::const_iterator start,
    378     std::string::const_iterator end, bool *valid) {
    379   *valid = false;
    380   std::string result;
    381 
    382   // TODO: if the domain contains a ':', then we should parse it
    383   // as an IPv6 address rather than giving an error about illegal domain.
    384   prepDomain(str, start, end, &result, valid);
    385   if (!*valid) {
    386     return STR_EMPTY;
    387   }
    388 
    389   if (result.length() > 1023) {
    390     return STR_EMPTY;
    391   }
    392   *valid = true;
    393   return result;
    394 }
    395 
    396 
    397 // Checks and normalizes an IDNA domain.
    398 void
    399 Jid::prepDomain(const std::string str, std::string::const_iterator start,
    400     std::string::const_iterator end, std::string *buf, bool *valid) {
    401   *valid = false;
    402   std::string::const_iterator last = start;
    403   for (std::string::const_iterator i = start; i < end; i++) {
    404     bool label_valid = true;
    405     char ch = *i;
    406     switch (ch) {
    407       case 0x002E:
    408 #if 0 // FIX: This isn't UTF-8-aware.
    409       case 0x3002:
    410       case 0xFF0E:
    411       case 0xFF61:
    412 #endif
    413         prepDomainLabel(str, last, i, buf, &label_valid);
    414         *buf += '.';
    415         last = i + 1;
    416         break;
    417     }
    418     if (!label_valid) {
    419       return;
    420     }
    421   }
    422   prepDomainLabel(str, last, end, buf, valid);
    423 }
    424 
    425 // Checks and normalizes a domain label.
    426 void
    427 Jid::prepDomainLabel(const std::string str, std::string::const_iterator start,
    428     std::string::const_iterator end, std::string *buf, bool *valid) {
    429   *valid = false;
    430 
    431   int startLen = buf->length();
    432   for (std::string::const_iterator i = start; i < end; i++) {
    433     bool char_valid = true;
    434     unsigned char ch = *i;
    435     if (ch <= 0x7F) {
    436       *buf += prepDomainLabelAscii(ch, &char_valid);
    437     }
    438     else {
    439       // TODO: implement ToASCII for these
    440       *buf += ch;
    441     }
    442     if (!char_valid) {
    443       return;
    444     }
    445   }
    446 
    447   int count = buf->length() - startLen;
    448   if (count == 0) {
    449     return;
    450   }
    451   else if (count > 63) {
    452     return;
    453   }
    454 
    455   // Is this check needed? See comment in prepDomainLabelAscii.
    456   if ((*buf)[startLen] == '-') {
    457     return;
    458   }
    459   if ((*buf)[buf->length() - 1] == '-') {
    460     return;
    461   }
    462   *valid = true;
    463 }
    464 
    465 
    466 // Returns the appropriate mapping for an ASCII character in a domain label.
    467 char
    468 Jid::prepDomainLabelAscii(char ch, bool *valid) {
    469   *valid = true;
    470   // TODO: A literal reading of the spec seems to say that we do
    471   // not need to check for these illegal characters (an "internationalized
    472   // domain label" runs ToASCII with UseSTD3... set to false).  But that
    473   // can't be right.  We should at least be checking that there are no '/'
    474   // or '@' characters in the domain.  Perhaps we should see what others
    475   // do in this case.
    476 
    477   switch (ch) {
    478     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
    479     case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
    480     case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
    481     case 'V': case 'W': case 'X': case 'Y': case 'Z':
    482       return (char)(ch + ('a' - 'A'));
    483 
    484     case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
    485     case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B:
    486     case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11:
    487     case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
    488     case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D:
    489     case 0x1E: case 0x1F: case 0x20: case 0x21: case 0x22: case 0x23:
    490     case 0x24: case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:
    491     case 0x2A: case 0x2B: case 0x2C: case 0x2E: case 0x2F: case 0x3A:
    492     case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: case 0x40:
    493     case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60:
    494     case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F:
    495       *valid = false;
    496       return 0;
    497 
    498     default:
    499       return ch;
    500   }
    501 }
    502 
    503 }
    504