1 /* 2 * Copyright (C) 2004-2005 Kay Sievers <kay.sievers (at) vrfy.org> 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation version 2 of the License. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License along 14 * with this program; if not, write to the Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 * 17 */ 18 19 20 static int string_is_true(const char *str) 21 { 22 if (strcasecmp(str, "true") == 0) 23 return 1; 24 if (strcasecmp(str, "yes") == 0) 25 return 1; 26 if (strcasecmp(str, "1") == 0) 27 return 1; 28 return 0; 29 } 30 31 static void remove_trailing_chars(char *path, char c) 32 { 33 size_t len; 34 35 len = strlen(path); 36 while (len > 0 && path[len-1] == c) 37 path[--len] = '\0'; 38 } 39 40 /* count of characters used to encode one unicode char */ 41 static int utf8_encoded_expected_len(const char *str) 42 { 43 unsigned char c = (unsigned char)str[0]; 44 45 if (c < 0x80) 46 return 1; 47 if ((c & 0xe0) == 0xc0) 48 return 2; 49 if ((c & 0xf0) == 0xe0) 50 return 3; 51 if ((c & 0xf8) == 0xf0) 52 return 4; 53 if ((c & 0xfc) == 0xf8) 54 return 5; 55 if ((c & 0xfe) == 0xfc) 56 return 6; 57 return 0; 58 } 59 60 /* decode one unicode char */ 61 static int utf8_encoded_to_unichar(const char *str) 62 { 63 int unichar; 64 int len; 65 int i; 66 67 len = utf8_encoded_expected_len(str); 68 switch (len) { 69 case 1: 70 return (int)str[0]; 71 case 2: 72 unichar = str[0] & 0x1f; 73 break; 74 case 3: 75 unichar = (int)str[0] & 0x0f; 76 break; 77 case 4: 78 unichar = (int)str[0] & 0x07; 79 break; 80 case 5: 81 unichar = (int)str[0] & 0x03; 82 break; 83 case 6: 84 unichar = (int)str[0] & 0x01; 85 break; 86 default: 87 return -1; 88 } 89 90 for (i = 1; i < len; i++) { 91 if (((int)str[i] & 0xc0) != 0x80) 92 return -1; 93 unichar <<= 6; 94 unichar |= (int)str[i] & 0x3f; 95 } 96 97 return unichar; 98 } 99 100 /* expected size used to encode one unicode char */ 101 static int utf8_unichar_to_encoded_len(int unichar) 102 { 103 if (unichar < 0x80) 104 return 1; 105 if (unichar < 0x800) 106 return 2; 107 if (unichar < 0x10000) 108 return 3; 109 if (unichar < 0x200000) 110 return 4; 111 if (unichar < 0x4000000) 112 return 5; 113 return 6; 114 } 115 116 /* check if unicode char has a valid numeric range */ 117 static int utf8_unichar_valid_range(int unichar) 118 { 119 if (unichar > 0x10ffff) 120 return 0; 121 if ((unichar & 0xfffff800) == 0xd800) 122 return 0; 123 if ((unichar > 0xfdcf) && (unichar < 0xfdf0)) 124 return 0; 125 if ((unichar & 0xffff) == 0xffff) 126 return 0; 127 return 1; 128 } 129 130 /* validate one encoded unicode char and return its length */ 131 static int utf8_encoded_valid_unichar(const char *str) 132 { 133 int len; 134 int unichar; 135 int i; 136 137 len = utf8_encoded_expected_len(str); 138 if (len == 0) 139 return -1; 140 141 /* ascii is valid */ 142 if (len == 1) 143 return 1; 144 145 /* check if expected encoded chars are available */ 146 for (i = 0; i < len; i++) 147 if ((str[i] & 0x80) != 0x80) 148 return -1; 149 150 unichar = utf8_encoded_to_unichar(str); 151 152 /* check if encoded length matches encoded value */ 153 if (utf8_unichar_to_encoded_len(unichar) != len) 154 return -1; 155 156 /* check if value has valid range */ 157 if (!utf8_unichar_valid_range(unichar)) 158 return -1; 159 160 return len; 161 } 162 163 /* replace everything but whitelisted plain ascii and valid utf8 */ 164 static int replace_untrusted_chars(char *str) 165 { 166 size_t i = 0; 167 int replaced = 0; 168 169 while (str[i] != '\0') { 170 int len; 171 172 /* valid printable ascii char */ 173 if ((str[i] >= '0' && str[i] <= '9') || 174 (str[i] >= 'A' && str[i] <= 'Z') || 175 (str[i] >= 'a' && str[i] <= 'z') || 176 strchr(" #$%+-./:=?@_,", str[i])) { 177 i++; 178 continue; 179 } 180 /* valid utf8 is accepted */ 181 len = utf8_encoded_valid_unichar(&str[i]); 182 if (len > 1) { 183 i += len; 184 continue; 185 } 186 187 /* everything else is garbage */ 188 str[i] = '_'; 189 i++; 190 replaced++; 191 } 192 193 return replaced; 194 } 195