Home | History | Annotate | Download | only in lib
      1 // SPDX-License-Identifier: GPL-2.0+
      2 /*
      3  *  charset conversion utils
      4  *
      5  *  Copyright (c) 2017 Rob Clark
      6  */
      7 
      8 #include <charset.h>
      9 #include <malloc.h>
     10 
     11 /*
     12  * utf8/utf16 conversion mostly lifted from grub
     13  */
     14 
     15 size_t utf16_strlen(const uint16_t *in)
     16 {
     17 	size_t i;
     18 	for (i = 0; in[i]; i++);
     19 	return i;
     20 }
     21 
     22 size_t utf16_strnlen(const uint16_t *in, size_t count)
     23 {
     24 	size_t i;
     25 	for (i = 0; count-- && in[i]; i++);
     26 	return i;
     27 }
     28 
     29 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
     30 {
     31 	uint16_t *tmp = dest;
     32 
     33 	while ((*dest++ = *src++) != '\0')
     34 		/* nothing */;
     35 	return tmp;
     36 
     37 }
     38 
     39 uint16_t *utf16_strdup(const uint16_t *s)
     40 {
     41 	uint16_t *new;
     42 	if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
     43 		return NULL;
     44 	utf16_strcpy(new, s);
     45 	return new;
     46 }
     47 
     48 /* Convert UTF-16 to UTF-8.  */
     49 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
     50 {
     51 	uint32_t code_high = 0;
     52 
     53 	while (size--) {
     54 		uint32_t code = *src++;
     55 
     56 		if (code_high) {
     57 			if (code >= 0xDC00 && code <= 0xDFFF) {
     58 				/* Surrogate pair.  */
     59 				code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
     60 
     61 				*dest++ = (code >> 18) | 0xF0;
     62 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
     63 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
     64 				*dest++ = (code & 0x3F) | 0x80;
     65 			} else {
     66 				/* Error...  */
     67 				*dest++ = '?';
     68 				/* *src may be valid. Don't eat it.  */
     69 				src--;
     70 			}
     71 
     72 			code_high = 0;
     73 		} else {
     74 			if (code <= 0x007F) {
     75 				*dest++ = code;
     76 			} else if (code <= 0x07FF) {
     77 				*dest++ = (code >> 6) | 0xC0;
     78 				*dest++ = (code & 0x3F) | 0x80;
     79 			} else if (code >= 0xD800 && code <= 0xDBFF) {
     80 				code_high = code;
     81 				continue;
     82 			} else if (code >= 0xDC00 && code <= 0xDFFF) {
     83 				/* Error... */
     84 				*dest++ = '?';
     85 			} else if (code < 0x10000) {
     86 				*dest++ = (code >> 12) | 0xE0;
     87 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
     88 				*dest++ = (code & 0x3F) | 0x80;
     89 			} else {
     90 				*dest++ = (code >> 18) | 0xF0;
     91 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
     92 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
     93 				*dest++ = (code & 0x3F) | 0x80;
     94 			}
     95 		}
     96 	}
     97 
     98 	return dest;
     99 }
    100 
    101 uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size)
    102 {
    103 	while (size--) {
    104 		int extension_bytes;
    105 		uint32_t code;
    106 
    107 		extension_bytes = 0;
    108 		if (*src <= 0x7f) {
    109 			code = *src++;
    110 			/* Exit on zero byte */
    111 			if (!code)
    112 				size = 0;
    113 		} else if (*src <= 0xbf) {
    114 			/* Illegal code */
    115 			code = '?';
    116 		} else if (*src <= 0xdf) {
    117 			code = *src++ & 0x1f;
    118 			extension_bytes = 1;
    119 		} else if (*src <= 0xef) {
    120 			code = *src++ & 0x0f;
    121 			extension_bytes = 2;
    122 		} else if (*src <= 0xf7) {
    123 			code = *src++ & 0x07;
    124 			extension_bytes = 3;
    125 		} else {
    126 			/* Illegal code */
    127 			code = '?';
    128 		}
    129 
    130 		for (; extension_bytes && size; --size, --extension_bytes) {
    131 			if ((*src & 0xc0) == 0x80) {
    132 				code <<= 6;
    133 				code |= *src++ & 0x3f;
    134 			} else {
    135 				/* Illegal code */
    136 				code = '?';
    137 				++src;
    138 				--size;
    139 				break;
    140 			}
    141 		}
    142 
    143 		if (code < 0x10000) {
    144 			*dest++ = code;
    145 		} else {
    146 			/*
    147 			 * Simplified expression for
    148 			 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800
    149 			 */
    150 			*dest++ = (code >> 10) + 0xd7c0;
    151 			*dest++ = (code & 0x3ff) | 0xdc00;
    152 		}
    153 	}
    154 	return dest;
    155 }
    156