1 /* Hyphen - hyphenation library using converted TeX hyphenation patterns 2 * 3 * (C) 1998 Raph Levien 4 * (C) 2001 ALTLinux, Moscow 5 * (C) 2006, 2007, 2008 Lszl Nmeth 6 * 7 * This was part of libHnj library by Raph Levien. 8 * 9 * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj 10 * to use it in OpenOffice.org. 11 * 12 * Non-standard and compound word hyphenation support by Lszl Nmeth. 13 * 14 * License is the original LibHnj license: 15 * 16 * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both 17 * licenses follows. 18 */ 19 20 /* LibHnj - a library for high quality hyphenation and justification 21 * Copyright (C) 1998 Raph Levien 22 * 23 * This library is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU Library General Public 25 * License as published by the Free Software Foundation; either 26 * version 2 of the License, or (at your option) any later version. 27 * 28 * This library is distributed in the hope that it will be useful, 29 * but WITHOUT ANY WARRANTY; without even the implied warranty of 30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 31 * Library General Public License for more details. 32 * 33 * You should have received a copy of the GNU Library General Public 34 * License along with this library; if not, write to the 35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 36 * Boston, MA 02111-1307 USA. 37 */ 38 39 /* 40 * The contents of this file are subject to the Mozilla Public License 41 * Version 1.0 (the "MPL"); you may not use this file except in 42 * compliance with the MPL. You may obtain a copy of the MPL at 43 * http://www.mozilla.org/MPL/ 44 * 45 * Software distributed under the MPL is distributed on an "AS IS" basis, 46 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL 47 * for the specific language governing rights and limitations under the 48 * MPL. 49 * 50 */ 51 #ifndef __HYPHEN_H__ 52 #define __HYPHEN_H__ 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif /* __cplusplus */ 57 58 typedef struct _HyphenDict HyphenDict; 59 typedef struct _HyphenState HyphenState; 60 typedef struct _HyphenTrans HyphenTrans; 61 #define MAX_CHARS 100 62 #define MAX_NAME 20 63 64 struct _HyphenDict { 65 /* user options */ 66 char lhmin; /* lefthyphenmin: min. hyph. distance from the left side */ 67 char rhmin; /* righthyphenmin: min. hyph. distance from the right side */ 68 char clhmin; /* min. hyph. distance from the left compound boundary */ 69 char crhmin; /* min. hyph. distance from the right compound boundary */ 70 /* system variables */ 71 int num_states; 72 char cset[MAX_NAME]; 73 int utf8; 74 HyphenState *states; 75 HyphenDict *nextlevel; 76 }; 77 78 struct _HyphenState { 79 char *match; 80 char *repl; 81 signed char replindex; 82 signed char replcut; 83 int fallback_state; 84 int num_trans; 85 HyphenTrans *trans; 86 }; 87 88 struct _HyphenTrans { 89 char ch; 90 int new_state; 91 }; 92 93 HyphenDict *hnj_hyphen_load (const char *fn); 94 HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents, 95 int dict_length); 96 void hnj_hyphen_free (HyphenDict *dict); 97 98 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ 99 int hnj_hyphen_hyphenate (HyphenDict *dict, 100 const char *word, int word_size, 101 char *hyphens); 102 103 /* 104 105 int hnj_hyphen_hyphenate2(): non-standard hyphenation. 106 107 (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish 108 etc. orthography, see documentation.) 109 110 input data: 111 word: input word 112 word_size: byte length of the input word 113 114 hyphens: allocated character buffer (size = word_size + 5) 115 hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL 116 rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers 117 (size=word_size) or with NULL value) or NULL 118 119 output data: 120 hyphens: hyphenation vector (hyphenation points signed with odd numbers) 121 hyphenated_word: hyphenated input word (hyphens signed with `='), 122 optional (NULL input) 123 rep: NULL (only standard hyph.), or replacements (hyphenation points 124 signed with `=' in replacements); 125 pos: NULL, or difference of the actual position and the beginning 126 positions of the change in input words; 127 cut: NULL, or counts of the removed characters of the original words 128 at hyphenation, 129 130 Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the 131 character positions of the input word. 132 133 For example: 134 Schiffahrt -> Schiff=fahrt, 135 pattern: f1f/ff=f,1,2 136 output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2 137 138 Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size 139 length arrays): 140 141 char ** rep = NULL; 142 int * pos = NULL; 143 int * cut = NULL; 144 char hyphens[MAXWORDLEN]; 145 hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut); 146 147 See example in the source distribution. 148 149 */ 150 151 int hnj_hyphen_hyphenate2 (HyphenDict *dict, 152 const char *word, int word_size, char * hyphens, 153 char *hyphenated_word, char *** rep, int ** pos, int ** cut); 154 155 /* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */ 156 /* lhmin: lefthyphenmin 157 * rhmin: righthyphenmin 158 * clhmin: compoundlefthyphemin 159 * crhmin: compoundrighthyphenmin 160 * (see documentation) */ 161 162 int hnj_hyphen_hyphenate3 (HyphenDict *dict, 163 const char *word, int word_size, char * hyphens, 164 char *hyphword, char *** rep, int ** pos, int ** cut, 165 int lhmin, int rhmin, int clhmin, int crhmin); 166 167 #ifdef __cplusplus 168 } 169 #endif /* __cplusplus */ 170 171 #endif /* __HYPHEN_H__ */ 172