Home | History | Annotate | Download | only in libutf
      1 /*
      2  * The authors of this software are Rob Pike and Ken Thompson.
      3  *              Copyright (c) 2002 by Lucent Technologies.
      4  * Permission to use, copy, modify, and distribute this software for any
      5  * purpose without fee is hereby granted, provided that this entire notice
      6  * is included in all copies of any software which is or includes a copy
      7  * or modification of this software and in all copies of the supporting
      8  * documentation for such software.
      9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
     10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
     11  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
     12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
     13  */
     14 #include "utf.h"
     15 #include "utfdef.h"
     16 
     17 static
     18 Rune*
     19 rbsearch(Rune c, Rune *t, int n, int ne)
     20 {
     21 	Rune *p;
     22 	int m;
     23 
     24 	while(n > 1) {
     25 		m = n >> 1;
     26 		p = t + m*ne;
     27 		if(c >= p[0]) {
     28 			t = p;
     29 			n = n-m;
     30 		} else
     31 			n = m;
     32 	}
     33 	if(n && c >= t[0])
     34 		return t;
     35 	return 0;
     36 }
     37 
     38 /*
     39  * The "ideographic" property is hard to extract from UnicodeData.txt,
     40  * so it is hard coded here.
     41  *
     42  * It is defined in the Unicode PropList.txt file, for example
     43  * PropList-3.0.0.txt.  Unlike the UnicodeData.txt file, the format of
     44  * PropList changes between versions.  This property appears relatively static;
     45  * it is the same in version 4.0.1, except that version defines some >16 bit
     46  * chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d.
     47  */
     48 static Rune __isideographicr[] = {
     49 	0x3006, 0x3007,			/* 3006 not in Unicode 2, in 2.1 */
     50 	0x3021, 0x3029,
     51 	0x3038, 0x303a,			/* not in Unicode 2 or 2.1 */
     52 	0x3400, 0x4db5,			/* not in Unicode 2 or 2.1 */
     53 	0x4e00, 0x9fbb,			/* 0x9FA6..0x9FBB added for 4.1.0? */
     54 	0xf900, 0xfa2d,
     55         0x20000, 0x2A6D6,
     56         0x2F800, 0x2FA1D,
     57 };
     58 
     59 int
     60 isideographicrune(Rune c)
     61 {
     62 	Rune *p;
     63 
     64 	p = rbsearch(c, __isideographicr, nelem(__isideographicr)/2, 2);
     65 	if(p && c >= p[0] && c <= p[1])
     66 		return 1;
     67 	return 0;
     68 }
     69 
     70 #include "runetypebody.h"
     71