Viewing file: spell.h (5.42 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/*------------------------------------------------------------------------- * * spell.h * * Declarations for ISpell dictionary * * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * * src/include/tsearch/dicts/spell.h * *------------------------------------------------------------------------- */
#ifndef __SPELL_H__ #define __SPELL_H__
#include "regex/regex.h" #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h"
/* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. */ struct SPNode;
typedef struct { uint32 val:8, isword:1, /* Stores compound flags listed below */ compoundflag:4, /* Reference to an entry of the AffixData field */ affix:19; struct SPNode *node; } SPNodeData;
/* * Names of FF_ are correlated with Hunspell options in affix file * http://hunspell.sourceforge.net/ */ #define FF_COMPOUNDONLY 0x01 #define FF_COMPOUNDBEGIN 0x02 #define FF_COMPOUNDMIDDLE 0x04 #define FF_COMPOUNDLAST 0x08 #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \ FF_COMPOUNDLAST ) #define FF_COMPOUNDFLAGMASK 0x0f
typedef struct SPNode { uint32 length; SPNodeData data[FLEXIBLE_ARRAY_MEMBER]; } SPNode;
#define SPNHDRSZ (offsetof(SPNode,data))
/* * Represents an entry in a words list. */ typedef struct spell_struct { union { /* * flag is filled in by NIImportDictionary(). After * NISortDictionary(), d is used instead of flag. */ char *flag; /* d is used in mkSPNode() */ struct { /* Reference to an entry of the AffixData field */ int affix; /* Length of the word */ int len; } d; } p; char word[FLEXIBLE_ARRAY_MEMBER]; } SPELL;
#define SPELLHDRSZ (offsetof(SPELL, word))
/* * If an affix uses a regex, we have to store that separately in a struct * that won't move around when arrays of affixes are enlarged or sorted. * This is so that it can be found to be cleaned up at context destruction. */ typedef struct aff_regex_struct { regex_t regex; MemoryContextCallback mcallback; } aff_regex_struct;
/* * Represents an entry in an affix list. */ typedef struct aff_struct { char *flag; /* FF_SUFFIX or FF_PREFIX */ uint32 type:1, flagflags:7, issimple:1, isregis:1, replen:14; char *find; char *repl; union { aff_regex_struct *pregex; Regis regis; } reg; } AFFIX;
/* * affixes use dictionary flags too */ #define FF_COMPOUNDPERMITFLAG 0x10 #define FF_COMPOUNDFORBIDFLAG 0x20 #define FF_CROSSPRODUCT 0x40
/* * Don't change the order of these. Initialization sorts by these, * and expects prefixes to come first after sorting. */ #define FF_SUFFIX 1 #define FF_PREFIX 0
/* * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store * an affix list. */ struct AffixNode;
typedef struct { uint32 val:8, naff:24; AFFIX **aff; struct AffixNode *node; } AffixNodeData;
typedef struct AffixNode { uint32 isvoid:1, length:31; AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]; } AffixNode;
#define ANHRDSZ (offsetof(AffixNode, data))
typedef struct { char *affix; int len; bool issuffix; } CMPDAffix;
/* * Type of encoding affix flags in Hunspell dictionaries */ typedef enum { FM_CHAR, /* one character (like ispell) */ FM_LONG, /* two characters */ FM_NUM /* number, >= 0 and < 65536 */ } FlagMode;
/* * Structure to store Hunspell options. Flag representation depends on flag * type. These flags are about support of compound words. */ typedef struct CompoundAffixFlag { union { /* Flag name if flagMode is FM_CHAR or FM_LONG */ char *s; /* Flag name if flagMode is FM_NUM */ uint32 i; } flag; /* we don't have a bsearch_arg version, so, copy FlagMode */ FlagMode flagMode; uint32 value; } CompoundAffixFlag;
#define FLAGNUM_MAXSIZE (1 << 16)
typedef struct { int maffixes; int naffixes; AFFIX *Affix;
AffixNode *Suffix; AffixNode *Prefix;
SPNode *Dictionary; /* Array of sets of affixes */ char **AffixData; int lenAffixData; int nAffixData; bool useFlagAliases;
CMPDAffix *CompoundAffix;
bool usecompound; FlagMode flagMode;
/* * All follow fields are actually needed only for initialization */
/* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; /* number of entries in CompoundAffixFlags array */ int nCompoundAffixFlag; /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag;
/* * Remaining fields are only used during dictionary construction; they are * set up by NIStartBuild and cleared by NIFinishBuild. */ MemoryContext buildCxt; /* temp context for construction */
/* Temporary array of all words in the dict file */ SPELL **Spell; int nspell; /* number of valid entries in Spell array */ int mspell; /* allocated length of Spell array */
/* These are used to allocate "compact" data without palloc overhead */ char *firstfree; /* first free address (always maxaligned) */ size_t avail; /* free space remaining at firstfree */ } IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
extern void NIStartBuild(IspellDict *Conf); extern void NIImportAffixes(IspellDict *Conf, const char *filename); extern void NIImportDictionary(IspellDict *Conf, const char *filename); extern void NISortDictionary(IspellDict *Conf); extern void NISortAffixes(IspellDict *Conf); extern void NIFinishBuild(IspellDict *Conf);
#endif
|