Home | History | Annotate | Download | only in hyphenation
      1 /* Hyphen - hyphenation library using converted TeX hyphenation patterns
      2  *
      3  * (C) 1998 Raph Levien
      4  * (C) 2001 ALTLinux, Moscow
      5  * (C) 2006, 2007, 2008 Lszl Nmeth
      6  *
      7  * This was part of libHnj library by Raph Levien.
      8  *
      9  * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj
     10  * to use it in OpenOffice.org.
     11  *
     12  * Non-standard and compound word hyphenation support by Lszl Nmeth.
     13  *
     14  * License is the original LibHnj license:
     15  *
     16  * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both
     17  * licenses follows.
     18  */
     19 
     20 /* LibHnj - a library for high quality hyphenation and justification
     21  * Copyright (C) 1998 Raph Levien
     22  *
     23  * This library is free software; you can redistribute it and/or
     24  * modify it under the terms of the GNU Library General Public
     25  * License as published by the Free Software Foundation; either
     26  * version 2 of the License, or (at your option) any later version.
     27  *
     28  * This library is distributed in the hope that it will be useful,
     29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     31  * Library General Public License for more details.
     32  *
     33  * You should have received a copy of the GNU Library General Public
     34  * License along with this library; if not, write to the
     35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     36  * Boston, MA  02111-1307  USA.
     37 */
     38 
     39 /*
     40  * The contents of this file are subject to the Mozilla Public License
     41  * Version 1.0 (the "MPL"); you may not use this file except in
     42  * compliance with the MPL.  You may obtain a copy of the MPL at
     43  * http://www.mozilla.org/MPL/
     44  *
     45  * Software distributed under the MPL is distributed on an "AS IS" basis,
     46  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
     47  * for the specific language governing rights and limitations under the
     48  * MPL.
     49  *
     50  */
     51 #ifndef __HYPHEN_H__
     52 #define __HYPHEN_H__
     53 
     54 #ifdef __cplusplus
     55 extern "C" {
     56 #endif /* __cplusplus */
     57 
     58 typedef struct _HyphenDict HyphenDict;
     59 typedef struct _HyphenState HyphenState;
     60 typedef struct _HyphenTrans HyphenTrans;
     61 #define MAX_CHARS 100
     62 #define MAX_NAME 20
     63 
     64 struct _HyphenDict {
     65   /* user options */
     66   char lhmin;    /* lefthyphenmin: min. hyph. distance from the left side */
     67   char rhmin;    /* righthyphenmin: min. hyph. distance from the right side */
     68   char clhmin;   /* min. hyph. distance from the left compound boundary */
     69   char crhmin;   /* min. hyph. distance from the right compound boundary */
     70   /* system variables */
     71   int num_states;
     72   char cset[MAX_NAME];
     73   int utf8;
     74   HyphenState *states;
     75   HyphenDict *nextlevel;
     76 };
     77 
     78 struct _HyphenState {
     79   char *match;
     80   char *repl;
     81   signed char replindex;
     82   signed char replcut;
     83   int fallback_state;
     84   int num_trans;
     85   HyphenTrans *trans;
     86 };
     87 
     88 struct _HyphenTrans {
     89   char ch;
     90   int new_state;
     91 };
     92 
     93 HyphenDict *hnj_hyphen_load (const char *fn);
     94 HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
     95     int dict_length);
     96 void hnj_hyphen_free (HyphenDict *dict);
     97 
     98 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
     99 int hnj_hyphen_hyphenate (HyphenDict *dict,
    100 			   const char *word, int word_size,
    101 			   char *hyphens);
    102 
    103 /*
    104 
    105  int hnj_hyphen_hyphenate2(): non-standard hyphenation.
    106 
    107  (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish
    108   etc. orthography, see documentation.)
    109 
    110  input data:
    111  word:      input word
    112  word_size: byte length of the input word
    113 
    114  hyphens:   allocated character buffer (size = word_size + 5)
    115  hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL
    116  rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers
    117                 (size=word_size) or with NULL value) or NULL
    118 
    119  output data:
    120  hyphens:   hyphenation vector (hyphenation points signed with odd numbers)
    121  hyphenated_word: hyphenated input word (hyphens signed with `='),
    122                   optional (NULL input)
    123  rep:       NULL (only standard hyph.), or replacements (hyphenation points
    124             signed with `=' in replacements);
    125  pos:       NULL, or difference of the actual position and the beginning
    126             positions of the change in input words;
    127  cut:       NULL, or counts of the removed characters of the original words
    128             at hyphenation,
    129 
    130  Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the
    131        character positions of the input word.
    132 
    133  For example:
    134  Schiffahrt -> Schiff=fahrt,
    135  pattern: f1f/ff=f,1,2
    136  output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2
    137 
    138  Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size
    139        length arrays):
    140 
    141  char ** rep = NULL;
    142  int * pos = NULL;
    143  int * cut = NULL;
    144  char hyphens[MAXWORDLEN];
    145  hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut);
    146 
    147  See example in the source distribution.
    148 
    149 */
    150 
    151 int hnj_hyphen_hyphenate2 (HyphenDict *dict,
    152         const char *word, int word_size, char * hyphens,
    153         char *hyphenated_word, char *** rep, int ** pos, int ** cut);
    154 
    155 /* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */
    156 /* lhmin: lefthyphenmin
    157  * rhmin: righthyphenmin
    158  * clhmin: compoundlefthyphemin
    159  * crhmin: compoundrighthyphenmin
    160  * (see documentation) */
    161 
    162 int hnj_hyphen_hyphenate3 (HyphenDict *dict,
    163 	const char *word, int word_size, char * hyphens,
    164 	char *hyphword, char *** rep, int ** pos, int ** cut,
    165 	int lhmin, int rhmin, int clhmin, int crhmin);
    166 
    167 #ifdef __cplusplus
    168 }
    169 #endif /* __cplusplus */
    170 
    171 #endif /* __HYPHEN_H__ */
    172