1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ucdstrip.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb20 14 * created by: Markus W. Scherer 15 * 16 * Simple tool for Unicode Character Database files with semicolon-delimited fields. 17 * Removes comments behind data lines but not in others. 18 * 19 * To compile, just call a C compiler/linker with this source file. 20 * On Windows: cl ucdstrip.c 21 */ 22 23 #include <stdio.h> 24 #include <string.h> 25 #include <stdlib.h> 26 27 /* return the first character position after the end of the data */ 28 static char * 29 endOfData(const char *l) { 30 char *end; 31 char c; 32 33 end=strchr(l, '#'); 34 if(end!=NULL) { 35 /* ignore whitespace before the comment */ 36 while(l!=end && ((c=*(end-1))==' ' || c=='\t')) { 37 --end; 38 } 39 } else { 40 end=strchr(l, 0); 41 } 42 return end; 43 } 44 45 extern int 46 main(int argc, const char *argv[]) { 47 static char line[2000]; 48 char *end; 49 50 while(gets(line)!=NULL) { 51 if(strtol(line, &end, 16)>=0 && end!=line) { 52 /* code point or range followed by semicolon and data, remove comment */ 53 *endOfData(line)=0; 54 } 55 puts(line); 56 } 57 58 return 0; 59 } 60