1 /* 2 * libjingle 3 * Copyright 2004--2005, Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "talk/xmpp/jid.h" 29 30 #include <ctype.h> 31 32 #include <algorithm> 33 #include <string> 34 35 #include "talk/base/common.h" 36 #include "talk/base/logging.h" 37 #include "talk/xmpp/constants.h" 38 39 namespace buzz { 40 41 Jid::Jid() : data_(NULL) { 42 } 43 44 Jid::Jid(bool is_special, const std::string & special) { 45 data_ = is_special ? new Data(special, STR_EMPTY, STR_EMPTY) : NULL; 46 } 47 48 Jid::Jid(const std::string & jid_string) { 49 if (jid_string == STR_EMPTY) { 50 data_ = NULL; 51 return; 52 } 53 54 // First find the slash and slice of that part 55 size_t slash = jid_string.find('/'); 56 std::string resource_name = (slash == std::string::npos ? STR_EMPTY : 57 jid_string.substr(slash + 1)); 58 59 // Now look for the node 60 std::string node_name; 61 size_t at = jid_string.find('@'); 62 size_t domain_begin; 63 if (at < slash && at != std::string::npos) { 64 node_name = jid_string.substr(0, at); 65 domain_begin = at + 1; 66 } else { 67 domain_begin = 0; 68 } 69 70 // Now take what is left as the domain 71 size_t domain_length = 72 ( slash == std::string::npos 73 ? jid_string.length() - domain_begin 74 : slash - domain_begin); 75 76 // avoid allocating these constants repeatedly 77 std::string domain_name; 78 79 if (domain_length == 9 && jid_string.find("gmail.com", domain_begin) == domain_begin) { 80 domain_name = STR_GMAIL_COM; 81 } 82 else if (domain_length == 14 && jid_string.find("googlemail.com", domain_begin) == domain_begin) { 83 domain_name = STR_GOOGLEMAIL_COM; 84 } 85 else if (domain_length == 10 && jid_string.find("google.com", domain_begin) == domain_begin) { 86 domain_name = STR_GOOGLE_COM; 87 } 88 else { 89 domain_name = jid_string.substr(domain_begin, domain_length); 90 } 91 92 // If the domain is empty we have a non-valid jid and we should empty 93 // everything else out 94 if (domain_name.empty()) { 95 data_ = NULL; 96 return; 97 } 98 99 bool valid_node; 100 std::string validated_node = prepNode(node_name, 101 node_name.begin(), node_name.end(), &valid_node); 102 bool valid_domain; 103 std::string validated_domain = prepDomain(domain_name, 104 domain_name.begin(), domain_name.end(), &valid_domain); 105 bool valid_resource; 106 std::string validated_resource = prepResource(resource_name, 107 resource_name.begin(), resource_name.end(), &valid_resource); 108 109 if (!valid_node || !valid_domain || !valid_resource) { 110 data_ = NULL; 111 return; 112 } 113 114 data_ = new Data(validated_node, validated_domain, validated_resource); 115 } 116 117 Jid::Jid(const std::string & node_name, 118 const std::string & domain_name, 119 const std::string & resource_name) { 120 if (domain_name.empty()) { 121 data_ = NULL; 122 return; 123 } 124 125 bool valid_node; 126 std::string validated_node = prepNode(node_name, 127 node_name.begin(), node_name.end(), &valid_node); 128 bool valid_domain; 129 std::string validated_domain = prepDomain(domain_name, 130 domain_name.begin(), domain_name.end(), &valid_domain); 131 bool valid_resource; 132 std::string validated_resource = prepResource(resource_name, 133 resource_name.begin(), resource_name.end(), &valid_resource); 134 135 if (!valid_node || !valid_domain || !valid_resource) { 136 data_ = NULL; 137 return; 138 } 139 140 data_ = new Data(validated_node, validated_domain, validated_resource); 141 } 142 143 std::string Jid::Str() const { 144 if (!IsValid()) 145 return STR_EMPTY; 146 147 std::string ret; 148 149 if (!data_->node_name_.empty()) 150 ret = data_->node_name_ + "@"; 151 152 ASSERT(data_->domain_name_ != STR_EMPTY); 153 ret += data_->domain_name_; 154 155 if (!data_->resource_name_.empty()) 156 ret += "/" + data_->resource_name_; 157 158 return ret; 159 } 160 161 bool 162 Jid::IsValid() const { 163 return data_ != NULL && !data_->domain_name_.empty(); 164 } 165 166 bool 167 Jid::IsBare() const { 168 if (Compare(JID_EMPTY) == 0) { 169 LOG(LS_VERBOSE) << "Warning: Calling IsBare() on the empty jid"; 170 return true; 171 } 172 return IsValid() && 173 data_->resource_name_.empty(); 174 } 175 176 bool 177 Jid::IsFull() const { 178 return IsValid() && 179 !data_->resource_name_.empty(); 180 } 181 182 Jid 183 Jid::BareJid() const { 184 if (!IsValid()) 185 return Jid(); 186 if (!IsFull()) 187 return *this; 188 return Jid(data_->node_name_, data_->domain_name_, STR_EMPTY); 189 } 190 191 #if 0 192 void 193 Jid::set_node(const std::string & node_name) { 194 data_->node_name_ = node_name; 195 } 196 void 197 Jid::set_domain(const std::string & domain_name) { 198 data_->domain_name_ = domain_name; 199 } 200 void 201 Jid::set_resource(const std::string & res_name) { 202 data_->resource_name_ = res_name; 203 } 204 #endif 205 206 bool 207 Jid::BareEquals(const Jid & other) const { 208 return (other.data_ == data_ || 209 (data_ != NULL && 210 other.data_ != NULL && 211 other.data_->node_name_ == data_->node_name_ && 212 other.data_->domain_name_ == data_->domain_name_)); 213 } 214 215 bool 216 Jid::operator==(const Jid & other) const { 217 return (other.data_ == data_ || 218 (data_ != NULL && 219 other.data_ != NULL && 220 other.data_->node_name_ == data_->node_name_ && 221 other.data_->domain_name_ == data_->domain_name_ && 222 other.data_->resource_name_ == data_->resource_name_)); 223 } 224 225 int 226 Jid::Compare(const Jid & other) const { 227 if (other.data_ == data_) 228 return 0; 229 if (data_ == NULL) 230 return -1; 231 if (other.data_ == NULL) 232 return 1; 233 234 int compare_result; 235 compare_result = data_->node_name_.compare(other.data_->node_name_); 236 if (0 != compare_result) 237 return compare_result; 238 compare_result = data_->domain_name_.compare(other.data_->domain_name_); 239 if (0 != compare_result) 240 return compare_result; 241 compare_result = data_->resource_name_.compare(other.data_->resource_name_); 242 return compare_result; 243 } 244 245 uint32 Jid::ComputeLameHash() const { 246 uint32 hash = 0; 247 // Hash the node portion 248 { 249 const std::string &str = node(); 250 for (int i = 0; i < static_cast<int>(str.size()); ++i) { 251 hash = ((hash << 2) + hash) + str[i]; 252 } 253 } 254 255 // Hash the domain portion 256 { 257 const std::string &str = domain(); 258 for (int i = 0; i < static_cast<int>(str.size()); ++i) 259 hash = ((hash << 2) + hash) + str[i]; 260 } 261 262 // Hash the resource portion 263 { 264 const std::string &str = resource(); 265 for (int i = 0; i < static_cast<int>(str.size()); ++i) 266 hash = ((hash << 2) + hash) + str[i]; 267 } 268 269 return hash; 270 } 271 272 // --- JID parsing code: --- 273 274 // Checks and normalizes the node part of a JID. 275 std::string 276 Jid::prepNode(const std::string str, std::string::const_iterator start, 277 std::string::const_iterator end, bool *valid) { 278 *valid = false; 279 std::string result; 280 281 for (std::string::const_iterator i = start; i < end; i++) { 282 bool char_valid = true; 283 unsigned char ch = *i; 284 if (ch <= 0x7F) { 285 result += prepNodeAscii(ch, &char_valid); 286 } 287 else { 288 // TODO: implement the correct stringprep protocol for these 289 result += tolower(ch); 290 } 291 if (!char_valid) { 292 return STR_EMPTY; 293 } 294 } 295 296 if (result.length() > 1023) { 297 return STR_EMPTY; 298 } 299 *valid = true; 300 return result; 301 } 302 303 304 // Returns the appropriate mapping for an ASCII character in a node. 305 char 306 Jid::prepNodeAscii(char ch, bool *valid) { 307 *valid = true; 308 switch (ch) { 309 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 310 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': 311 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': 312 case 'V': case 'W': case 'X': case 'Y': case 'Z': 313 return (char)(ch + ('a' - 'A')); 314 315 case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: 316 case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B: 317 case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11: 318 case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: 319 case ' ': case '&': case '/': case ':': case '<': case '>': case '@': 320 case '\"': case '\'': 321 case 0x7F: 322 *valid = false; 323 return 0; 324 325 default: 326 return ch; 327 } 328 } 329 330 331 // Checks and normalizes the resource part of a JID. 332 std::string 333 Jid::prepResource(const std::string str, std::string::const_iterator start, 334 std::string::const_iterator end, bool *valid) { 335 *valid = false; 336 std::string result; 337 338 for (std::string::const_iterator i = start; i < end; i++) { 339 bool char_valid = true; 340 unsigned char ch = *i; 341 if (ch <= 0x7F) { 342 result += prepResourceAscii(ch, &char_valid); 343 } 344 else { 345 // TODO: implement the correct stringprep protocol for these 346 result += ch; 347 } 348 } 349 350 if (result.length() > 1023) { 351 return STR_EMPTY; 352 } 353 *valid = true; 354 return result; 355 } 356 357 // Returns the appropriate mapping for an ASCII character in a resource. 358 char 359 Jid::prepResourceAscii(char ch, bool *valid) { 360 *valid = true; 361 switch (ch) { 362 case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: 363 case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B: 364 case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11: 365 case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: 366 case 0x7F: 367 *valid = false; 368 return 0; 369 370 default: 371 return ch; 372 } 373 } 374 375 // Checks and normalizes the domain part of a JID. 376 std::string 377 Jid::prepDomain(const std::string str, std::string::const_iterator start, 378 std::string::const_iterator end, bool *valid) { 379 *valid = false; 380 std::string result; 381 382 // TODO: if the domain contains a ':', then we should parse it 383 // as an IPv6 address rather than giving an error about illegal domain. 384 prepDomain(str, start, end, &result, valid); 385 if (!*valid) { 386 return STR_EMPTY; 387 } 388 389 if (result.length() > 1023) { 390 return STR_EMPTY; 391 } 392 *valid = true; 393 return result; 394 } 395 396 397 // Checks and normalizes an IDNA domain. 398 void 399 Jid::prepDomain(const std::string str, std::string::const_iterator start, 400 std::string::const_iterator end, std::string *buf, bool *valid) { 401 *valid = false; 402 std::string::const_iterator last = start; 403 for (std::string::const_iterator i = start; i < end; i++) { 404 bool label_valid = true; 405 char ch = *i; 406 switch (ch) { 407 case 0x002E: 408 #if 0 // FIX: This isn't UTF-8-aware. 409 case 0x3002: 410 case 0xFF0E: 411 case 0xFF61: 412 #endif 413 prepDomainLabel(str, last, i, buf, &label_valid); 414 *buf += '.'; 415 last = i + 1; 416 break; 417 } 418 if (!label_valid) { 419 return; 420 } 421 } 422 prepDomainLabel(str, last, end, buf, valid); 423 } 424 425 // Checks and normalizes a domain label. 426 void 427 Jid::prepDomainLabel(const std::string str, std::string::const_iterator start, 428 std::string::const_iterator end, std::string *buf, bool *valid) { 429 *valid = false; 430 431 int startLen = buf->length(); 432 for (std::string::const_iterator i = start; i < end; i++) { 433 bool char_valid = true; 434 unsigned char ch = *i; 435 if (ch <= 0x7F) { 436 *buf += prepDomainLabelAscii(ch, &char_valid); 437 } 438 else { 439 // TODO: implement ToASCII for these 440 *buf += ch; 441 } 442 if (!char_valid) { 443 return; 444 } 445 } 446 447 int count = buf->length() - startLen; 448 if (count == 0) { 449 return; 450 } 451 else if (count > 63) { 452 return; 453 } 454 455 // Is this check needed? See comment in prepDomainLabelAscii. 456 if ((*buf)[startLen] == '-') { 457 return; 458 } 459 if ((*buf)[buf->length() - 1] == '-') { 460 return; 461 } 462 *valid = true; 463 } 464 465 466 // Returns the appropriate mapping for an ASCII character in a domain label. 467 char 468 Jid::prepDomainLabelAscii(char ch, bool *valid) { 469 *valid = true; 470 // TODO: A literal reading of the spec seems to say that we do 471 // not need to check for these illegal characters (an "internationalized 472 // domain label" runs ToASCII with UseSTD3... set to false). But that 473 // can't be right. We should at least be checking that there are no '/' 474 // or '@' characters in the domain. Perhaps we should see what others 475 // do in this case. 476 477 switch (ch) { 478 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 479 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': 480 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': 481 case 'V': case 'W': case 'X': case 'Y': case 'Z': 482 return (char)(ch + ('a' - 'A')); 483 484 case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: 485 case 0x06: case 0x07: case 0x08: case 0x09: case 0x0A: case 0x0B: 486 case 0x0C: case 0x0D: case 0x0E: case 0x0F: case 0x10: case 0x11: 487 case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: 488 case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: 489 case 0x1E: case 0x1F: case 0x20: case 0x21: case 0x22: case 0x23: 490 case 0x24: case 0x25: case 0x26: case 0x27: case 0x28: case 0x29: 491 case 0x2A: case 0x2B: case 0x2C: case 0x2E: case 0x2F: case 0x3A: 492 case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: case 0x40: 493 case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60: 494 case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: 495 *valid = false; 496 return 0; 497 498 default: 499 return ch; 500 } 501 } 502 503 } 504