Home | History | Annotate | Download | only in server
      1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
      2  * Use of this source code is governed by a BSD-style license that can be
      3  * found in the LICENSE file.
      4  */
      5 
      6 #include <stdlib.h>
      7 #include <stdint.h>
      8 #include <sys/types.h>
      9 
     10 #ifdef CRAS_DBUS
     11 #include <dbus/dbus.h>
     12 #endif
     13 
     14 #include "cras_utf8.h"
     15 #include "cras_util.h"
     16 
     17 static const uint8_t kUTF8ByteOrderMask[3] = { 0xef, 0xbb, 0xbf };
     18 
     19 typedef struct u8range {
     20 	uint8_t min;
     21 	uint8_t max;
     22 } u8range_t;
     23 
     24 static const u8range_t kUTF8TwoByteSeq[] = {
     25 	{ 0xc2, 0xdf },
     26 	{ 0x80, 0xbf },
     27 	{ 0, 0 }
     28 };
     29 
     30 static const u8range_t kUTF8ByteSeqE0[] = {
     31 	{ 0xe0, 0xe0 },
     32 	{ 0xa0, 0xbf },
     33 	{ 0x80, 0xbf },
     34 	{ 0, 0 }
     35 };
     36 
     37 static const u8range_t kUTF8ByteSeqE1EC[] = {
     38 	{ 0xe1, 0xec },
     39 	{ 0x80, 0xbf },
     40 	{ 0x80, 0xbf },
     41 	{ 0, 0 }
     42 };
     43 
     44 static const u8range_t kUTF8ByteSeqED[] = {
     45 	{ 0xed, 0xed },
     46 	{ 0x80, 0x9f },
     47 	{ 0x80, 0xbf },
     48 	{ 0, 0 }
     49 };
     50 
     51 static const u8range_t kUTF8ByteSeqEEEF[] = {
     52 	{ 0xee, 0xef },
     53 	{ 0x80, 0xbf },
     54 	{ 0x80, 0xbf },
     55 	{ 0, 0 }
     56 };
     57 
     58 static const u8range_t kUTF8ByteSeqF0[] = {
     59 	{ 0xf0, 0xf0 },
     60 	{ 0x90, 0xbf },
     61 	{ 0x80, 0xbf },
     62 	{ 0x80, 0xbf },
     63 	{ 0, 0 }
     64 };
     65 
     66 static const u8range_t kUTF8ByteSeqF1F3[] = {
     67 	{ 0xf1, 0xf3 },
     68 	{ 0x80, 0xbf },
     69 	{ 0x80, 0xbf },
     70 	{ 0x80, 0xbf },
     71 	{ 0, 0 }
     72 };
     73 
     74 static const u8range_t kUTF8ByteSeqF4[] = {
     75 	{ 0xf4, 0xf4 },
     76 	{ 0x80, 0x8f },
     77 	{ 0x80, 0xbf },
     78 	{ 0x80, 0xbf },
     79 	{ 0, 0 }
     80 };
     81 
     82 static const u8range_t kUTF8NullRange[] = {
     83 	{ 0, 0 }
     84 };
     85 
     86 typedef struct utf8seq {
     87 	const u8range_t *ranges;
     88 } utf8seq_t;
     89 
     90 static const utf8seq_t kUTF8Sequences[] = {
     91 	{ kUTF8TwoByteSeq },
     92 	{ kUTF8ByteSeqE0 },
     93 	{ kUTF8ByteSeqE1EC },
     94 	{ kUTF8ByteSeqED },
     95 	{ kUTF8ByteSeqEEEF },
     96 	{ kUTF8ByteSeqF0 },
     97 	{ kUTF8ByteSeqF1F3 },
     98 	{ kUTF8ByteSeqF4 },
     99 	{ kUTF8NullRange }
    100 };
    101 
    102 int valid_utf8_string(const char *string, size_t *bad_pos)
    103 {
    104 	int bom_chars = 0;
    105 	uint8_t byte;
    106 	const char *pos = string;
    107 	int ret = 1;
    108 	const utf8seq_t *seq = NULL;
    109 	const u8range_t *range = NULL;
    110 
    111 	if (!pos) {
    112 		ret = 0;
    113 		goto error;
    114 	}
    115 
    116 	while ((byte = (uint8_t)*(pos++))) {
    117 		if (!range || range->min == 0) {
    118 			if (byte < 128) {
    119 				/* Ascii character. */
    120 				continue;
    121 			}
    122 
    123 			if (bom_chars < ARRAY_SIZE(kUTF8ByteOrderMask)) {
    124 				if (byte == kUTF8ByteOrderMask[bom_chars]) {
    125 					bom_chars++;
    126 					continue;
    127 				} else {
    128 					/* Characters not matching BOM.
    129 					 * Rewind and assume that there is
    130 					 * no BOM. */
    131 					bom_chars =
    132 					        ARRAY_SIZE(kUTF8ByteOrderMask);
    133                                         pos = string;
    134 					continue;
    135 				}
    136 			}
    137 
    138 			/* Find the matching sequence of characters by
    139 			 * matching the first character in the sequence.
    140 			 */
    141 			seq = kUTF8Sequences;
    142 			while (seq->ranges->min != 0) {
    143 				if (byte >= seq->ranges->min &&
    144 				    byte <= seq->ranges->max) {
    145 					/* Matching sequence. */
    146 					break;
    147 				}
    148 				seq++;
    149 			}
    150 
    151 			if (seq->ranges->min == 0) {
    152 				/* Could not find a matching sequence. */
    153 				ret = 0;
    154 				goto error;
    155 			}
    156 
    157 			/* Found the appropriate sequence. */
    158 			range = seq->ranges + 1;
    159 			continue;
    160 		}
    161 
    162 		if (byte >= range->min && byte <= range->max) {
    163 			range++;
    164 			continue;
    165 		}
    166 
    167 		/* This character doesn't belong in UTF8. */
    168 		ret = 0;
    169 		goto error;
    170 	}
    171 
    172 	if (range && range->min != 0) {
    173 	        /* Stopped in the middle of a sequence. */
    174 	        ret = 0;
    175 	}
    176 
    177 error:
    178 	if (bad_pos)
    179 		*bad_pos = pos - string - 1;
    180 	return ret;
    181 }
    182 
    183 #ifdef CRAS_DBUS
    184 /* Use the DBus implementation if available to ensure that the UTF-8
    185  * sequences match those expected by the DBus implementation. */
    186 
    187 int is_utf8_string(const char *string)
    188 {
    189 	return !!dbus_validate_utf8(string, NULL);
    190 }
    191 
    192 #else
    193 
    194 int is_utf8_string (const char *string) {
    195 	return valid_utf8_string(string, NULL);
    196 }
    197 
    198 #endif
    199