Home | History | Annotate | Download | only in fts1
      1 /*
      2 ** 2006 July 10
      3 **
      4 ** The author disclaims copyright to this source code.
      5 **
      6 *************************************************************************
      7 ** Defines the interface to tokenizers used by fulltext-search.  There
      8 ** are three basic components:
      9 **
     10 ** sqlite3_tokenizer_module is a singleton defining the tokenizer
     11 ** interface functions.  This is essentially the class structure for
     12 ** tokenizers.
     13 **
     14 ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
     15 ** including customization information defined at creation time.
     16 **
     17 ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
     18 ** tokens from a particular input.
     19 */
     20 #ifndef _TOKENIZER_H_
     21 #define _TOKENIZER_H_
     22 
     23 /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
     24 ** If tokenizers are to be allowed to call sqlite3_*() functions, then
     25 ** we will need a way to register the API consistently.
     26 */
     27 #include "sqlite3.h"
     28 
     29 /*
     30 ** Structures used by the tokenizer interface.
     31 */
     32 typedef struct sqlite3_tokenizer sqlite3_tokenizer;
     33 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
     34 typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
     35 
     36 struct sqlite3_tokenizer_module {
     37   int iVersion;                  /* currently 0 */
     38 
     39   /*
     40   ** Create and destroy a tokenizer.  argc/argv are passed down from
     41   ** the fulltext virtual table creation to allow customization.
     42   */
     43   int (*xCreate)(int argc, const char **argv,
     44                  sqlite3_tokenizer **ppTokenizer);
     45   int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
     46 
     47   /*
     48   ** Tokenize a particular input.  Call xOpen() to prepare to
     49   ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
     50   ** xClose() to free any internal state.  The pInput passed to
     51   ** xOpen() must exist until the cursor is closed.  The ppToken
     52   ** result from xNext() is only valid until the next call to xNext()
     53   ** or until xClose() is called.
     54   */
     55   /* TODO(shess) current implementation requires pInput to be
     56   ** nul-terminated.  This should either be fixed, or pInput/nBytes
     57   ** should be converted to zInput.
     58   */
     59   int (*xOpen)(sqlite3_tokenizer *pTokenizer,
     60                const char *pInput, int nBytes,
     61                sqlite3_tokenizer_cursor **ppCursor);
     62   int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
     63   int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
     64                const char **ppToken, int *pnBytes,
     65                int *piStartOffset, int *piEndOffset, int *piPosition);
     66 };
     67 
     68 struct sqlite3_tokenizer {
     69   sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
     70   /* Tokenizer implementations will typically add additional fields */
     71 };
     72 
     73 struct sqlite3_tokenizer_cursor {
     74   sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
     75   /* Tokenizer implementations will typically add additional fields */
     76 };
     77 
     78 /*
     79 ** Get the module for a tokenizer which generates tokens based on a
     80 ** set of non-token characters.  The default is to break tokens at any
     81 ** non-alnum character, though the set of delimiters can also be
     82 ** specified by the first argv argument to xCreate().
     83 */
     84 /* TODO(shess) This doesn't belong here.  Need some sort of
     85 ** registration process.
     86 */
     87 void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
     88 
     89 #endif /* _TOKENIZER_H_ */
     90