144 #include <hash_table.h>
154 #define LM_DICTWID_BADMAP -16000
155 #define LM_CLASSID_BASE 0x01000000
158 #define LM_LEGACY_CONSTANT BAD_S3LMWID
163 #define LM_SPHINX_CONSTANT BAD_S3LMWID32
172 #define LM_CLASSID_TO_CLASS(m,i) ((m)->lmclass[(i)-LM_CLASSID_BASE])
174 #define MIN_PROB_F -99.0
182 #define LM_ALLOC_BLOCK 16
191 #define LM_NOT_FOUND -1
193 #define LM_OFFSET_TOO_LARGE -2
199 #define LM_NO_DATA_MARK -3
202 #define LM_UNKNOWN_NG -4
204 #define LM_BAD_LM_COUNT -5
206 #define LM_UNKNOWN_WORDS -6
209 #define LM_BAD_BIGRAM -7
215 #define LM_BAD_TRIGRAM -8
221 #define LM_BAD_QUADGRAM -9
228 #define LM_BAD_QUINGRAM -10
239 #define LM_BAD_NGRAM -11
245 #define LM_TOO_MANY_NGRAM -12
249 #define LM_NO_MINUS_1GRAM -13
252 #define LM_FILE_NOT_FOUND -14
254 #define LM_CANNOT_ALLOCATE -15
258 #define LMDMP_VERSIONNULL 0
264 #define LMDMP_VERSION_TG_16BIT -1
268 #define LMDMP_VERSION_TG_16BIT_V2 -2
271 #define LMDMP_VERSION_TG_32BIT -3
277 #define LMTXT_VERSION 1000
278 #define LMFST_VERSION 1001
279 #define LMFORCED_TXT32VERSION 1002
548 #define LOG2_BG_SEG_SZ 9
549 #define BG_SEG_SZ (1 << (LOG2_BG_SEG_SZ))
550 #define LM_TGCACHE_SIZE 100003
559 typedef struct lm_s {
688 #define lm_lmwid2dictwid(lm,u) ((lm)->ug[u].dictwid)
689 #define lm_n_ug(lm) ((lm)->n_ug)
690 #define lm_n_bg(lm) ((lm)->n_bg)
691 #define lm_n_tg(lm) ((lm)->n_tg)
692 #define lm_wordstr(lm,u) ((lm)->wordstr[u])
693 #define lm_startwid(lm) ((lm)->startlwid)
694 #define lm_finishwid(lm) ((lm)->finishlwid)
695 #define lm_access_type(lm) ((lm)->access_type)
763 const char* lmctlfile,
766 const char* lmdumpdir,
789 const char *lmdumpdir,
802 const char* lmdumpdir,
920 int32 lm_bg_wordprob(
lm_t *lm,
1052 logmath_t *logmath);
1136 const char *outputfile,
1137 const char *filename,
1173 const char *outputfile,
1174 const char *filename,
1176 const char* inputenc,
1206 const char* filename
1351 #define LM_TGPROB(lm,tgptr) ((lm)->tgprob[(tgptr)->probid].l)
1352 #define LM_BGPROB(lm,bgptr) ((lm)->bgprob[(bgptr)->probid].l)
1353 #define LM_UGPROB(lm,ugptr) ((ugptr)->prob.l)
1354 #define LM_RAWSCORE(lm,score) ((score - (lm)->wip) / ((lm)->lw))
1355 #define LM_DICTWID(lm,lmwid) ((lm)->ug[(lmwid)].dictwid)
lm_t * lmset_get_lm_wname(lmset_t *lms, const char *lmname)
lmclass_t ** lmclass
Definition: lm.h:733
int32 lm_bglist(lm_t *lmp, s3lmwid32_t w, bg_t **bg, int32 *bowt)
char ** wordstr
Definition: lm.h:650
lm_t * lmset_get_lm_widx(lmset_t *lms, int32 lmidx)
S3DECODER_EXPORT lmset_t * lmset_init(const char *lmfile, const char *lmctlfile, const char *ctl_lm, const char *lmname, const char *lmdumpdir, float32 lw, float32 wip, float32 uw, dict_t *dict, logmath_t *logmath)
int32 version
Definition: lm.h:740
A unigram structure Please see.
Definition: lm.h:446
struct sorted_entry_s sorted_entry_t
S3DECODER_EXPORT void lm_cache_reset(lm_t *lmp)
char * name
Definition: lm.h:642
s3lmwid_t w1
Definition: lm.h:531
A bigram structure.
Definition: lm.h:460
tginfo32_t ** tginfo32
Definition: lm.h:685
int32 lm_tg_exists(lm_t *lm, s3lmwid32_t lw1, s3lmwid32_t lw2, s3lmwid32_t lw3)
uint16 s3lmwid_t
Definition: s3types.h:142
int32 n_lmclass
Definition: lm.h:734
Generic structure that could be used at any n-gram level.
Definition: lm.h:783
int32 s3wid_t
Definition: s3types.h:136
int32 lm_is32bits(lm_t *model)
lmset_t * lmset_read_ctl(const char *ctlfile, dict_t *dict, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath)
Structure for multiple LM, provide operations for addition/deletion/read Structure for multiple...
void copy_tg32_to_tg(lm_t *lm)
void ug_write(FILE *fp, ug_t *ug)
int32 find_tg(tg_t *tg, int32 n, s3lmwid32_t w)
void lm_null_struct(lm_t *lm)
void tg32_write(FILE *fp, tg32_t *tg)
int32 max_ug
Definition: lm.h:646
lm_tgcache_entry_t * tgcache
Definition: lm.h:671
S3DECODER_EXPORT int32 lm_write(lm_t *model, const char *outputfile, const char *filename, const char *fmt)
int32 n_tg_inmem
Definition: lm.h:715
S3DECODER_EXPORT void lmset_free(lmset_t *lms)
S3DECODER_EXPORT void lm_cache_stats_dump(lm_t *lmp)
lmlog_t * tgbowt
Definition: lm.h:693
s3lmwid32_t finishlwid
Definition: lm.h:663
int32 lmset_name_to_idx(lmset_t *lms, const char *lmname)
S3DECODER_EXPORT void lm_free(lm_t *lm)
void bg_write(FILE *fp, bg_t *bg)
Operations on dictionary.
int32 * inclass_ugscore
Definition: lm.h:735
void lmset_set_curlm_widx(lmset_t *lms, int32 lmidx)
int32 n_tg
Definition: lm.h:548
S3DECODER_EXPORT void lmset_set_curlm_wname(lmset_t *lms, const char *lmname)
int32 n_bg_inmem
Definition: lm.h:711
int32 n_tg_fill
Definition: lm.h:714
int32 byteswap
Definition: lm.h:701
int32 access_type
Definition: lm.h:720
ug_t * ug
Definition: lm.h:656
lm_t ** lmarray
Definition: lm.h:761
int32 bgoff
Definition: lm.h:702
void lmset_add_lm(lmset_t *lms, lm_t *lm, const char *lmname)
int32 used
Definition: lm.h:536
int32 n_tgcache_hit
Definition: lm.h:718
void swap_tg32(tg32_t *tg)
int32 wip
Definition: lm.h:706
int32 cur_lm_idx
Definition: lm.h:764
tg32_t * tg32
Definition: lm.h:549
s3lmwid32_t startlwid
Definition: lm.h:662
uint32 bg_seg_sz
Definition: lm.h:654
membg_t * membg
Definition: lm.h:667
int32 lm_ug_score(lm_t *lmp, s3lmwid32_t lwid, s3wid_t wid)
ug_t * NewUnigramTable(int32 n_ug)
int32 n_ug
Definition: lm.h:643
int32 tgoff
Definition: lm.h:703
char * lmset_idx_to_name(lmset_t *lms, int32 lmidx)
lmlog_t * bgprob
Definition: lm.h:691
tg32_t * tg32
Definition: lm.h:683
tg_t * tg
Definition: lm.h:666
membg32_t * membg32
Definition: lm.h:684
sorted_list_t sorted_prob2
Definition: lm.h:746
int32 lm_ug_exists(lm_t *lm, s3lmwid32_t lwid)
int32 dict_size
Definition: lm.h:727
A trigram structure.
Definition: lm.h:483
FILE * fp
Definition: lm.h:700
void swap_bg32(bg32_t *bg)
void bg32_write(FILE *fp, bg32_t *bg)
int32 * tg_segbase
Definition: lm.h:694
int32 bowt
Definition: lm.h:550
int32 n_alloc_lm
Definition: lm.h:766
lm_t * cur_lm
Definition: lm.h:762
int32 n_ng
Definition: lm.h:648
int32 n_tgbowt
Definition: lm.h:698
Size definition of semantically units. Common for both s3 and s3.X decoder.
lmlog_t * tgprob
Definition: lm.h:692
int32 n_tg
Definition: lm.h:533
int32 outputenc
Definition: lm.h:739
int32 find_bg32(bg32_t *bg, int32 n, s3lmwid32_t w)
int32 max_sorted_entries
Definition: lm.h:749
The sorted list used lm reading. list is a (64K long) array. The first entry is the root of the tree ...
Definition: lm.h:437
int32 isLM_IN_MEMORY
Definition: lm.h:724
#define S3DECODER_EXPORT
Definition: sphinx3_export.h:15
bg_t * bg
Definition: lm.h:665
void copy_bg32_to_bg(lm_t *lm)
int32 n_bg_score
Definition: lm.h:712
int32 find_bg(bg_t *bg, int32 n, s3lmwid32_t w)
struct tginfo_s * next
Definition: lm.h:537
single entry used in the linked list structure of lm reading
uint32 lower
Definition: lm.h:425
int32 lm_bg_score(lm_t *lmp, s3lmwid32_t lw1, s3lmwid32_t lw2, s3wid_t w2)
void lmset_delete_lm(lmset_t *lms, const char *lmname)
a structure for a dictionary.
Definition: dict.h:146
int32 lm_tg_score(lm_t *lmp, s3lmwid32_t lw1, s3lmwid32_t lw2, s3lmwid32_t lw3, s3wid_t w3)
hash_table_t * HT
Definition: lm.h:729
int32 n_tg
Definition: lm.h:645
trigram cache that enhance locating trigram for a given bigram (w_1,w_2)
sorted_list_t sorted_prob3
Definition: lm.h:748
int32 is32bits
Definition: lm.h:743
int32 lm_get_classid(lm_t *model, const char *name)
int32 lm_bg32list(lm_t *lmp, s3lmwid32_t w, bg32_t **bg, int32 *bowt)
struct tginfo32_s tginfo32_t
s3lmwid32_t * dict2lmwid
Definition: lm.h:661
Log quantities represented in either floating or integer format.
Definition: lm.h:412
uint32 log_bg_seg_sz
Definition: lm.h:653
int32 n_bgprob
Definition: lm.h:696
void copy_bg_to_bg32(lm_t *lm)
S3DECODER_EXPORT lm_t * lm_read_advance2(const char *file, const char *lmname, float64 lw, float64 wip, float64 uw, int32 ndict, const char *fmt, int32 applyweight, int lminmemory, logmath_t *logmath)
int32 lm_write_advance(lm_t *model, const char *outputfile, const char *filename, const char *fmt, const char *inputenc, char *outputenc)
sorted_list_t sorted_bowt2
Definition: lm.h:747
int32 n_bg_fill
Definition: lm.h:710
void copy_tg_to_tg32(lm_t *lm)
s3lmwid32_t w1
Definition: lm.h:546
int32 lm_tglist(lm_t *lmp, s3lmwid32_t w1, s3lmwid32_t w2, tg_t **tg, int32 *bowt)
logmath_t * logmath
Definition: lm.h:751
s3lmwid32_t lm_wid(lm_t *lm, const char *wd)
A 32 bits version of tg_t.
Definition: lm.h:493
int32 lm_bg_exists(lm_t *lm, s3lmwid32_t lw1, s3lmwid32_t lw2)
int32 used
Definition: lm.h:551
int32 n_tgprob
Definition: lm.h:697
int32 lm_ug_wordprob(lm_t *lm, dict_t *dict, int32 th, wordprob_t *wp)
int32 lm_tg32list(lm_t *lmp, s3lmwid32_t w1, s3lmwid32_t w2, tg32_t **tg, int32 *bowt)
float32 lw
Definition: lm.h:705
Management of in-memory bigrams. Not used if all bigrams in memory.
Definition: lm.h:502
lm_tgcache_entry32_t * tgcache32
Definition: lm.h:687
The language model. All unigrams are read into memory on initialization. Bigrams and trigrams read in...
void lm_convert_structure(lm_t *model, int32 is32bits)
int32 n_bg_bo
Definition: lm.h:713
struct tginfo32_s * next
Definition: lm.h:552
uint32 s3lmwid32_t
Definition: s3types.h:149
tg_t * tg
Definition: lm.h:534
int32 lm_add_word_to_ug(lm_t *lm, dict_t *dict, const char *newword)
tginfo_t ** tginfo
Definition: lm.h:668
int32 lm_uglist(lm_t *lmp, ug_t **ug)
lm_t * lm_read(const char *file, const char *lmname, cmd_ln_t *config, logmath_t *logmath)
A bigram structure which has 32 bits.
Definition: lm.h:471
int32 lm_add_wordlist(lm_t *lm, dict_t *dict, const char *filename)
int32 bowt
Definition: lm.h:535
int32 n_tg_bo
Definition: lm.h:717
Language model class modules. This module maintains classes of words and associated probabilities (P(...
void tg_write(FILE *fp, tg_t *tg)
int32 n_lm
Definition: lm.h:765
bg32_t * bg32
Definition: lm.h:682
void lm_set_param(lm_t *lm, float64 lw, float64 wip)
S3DECODER_EXPORT int32 lm_rawscore(lm_t *lm, int32 score)
int32 inputenc
Definition: lm.h:738
int32 n_bg
Definition: lm.h:644
lm_t * lm_read_advance(const char *file, const char *lmname, float64 lw, float64 wip, float64 uw, int32 ndict, const char *fmt, int32 applyweight, logmath_t *logmath)
int32 n_tg_score
Definition: lm.h:716
lmset_t * lmset_read_lm(const char *lmfile, dict_t *dict, const char *lmname, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath)
int32 find_tg32(tg32_t *tg, int32 n, s3lmwid32_t w)
A 32 bits version of membg_t.
Definition: lm.h:512
lmlog_t val
Definition: lm.h:424
uint32 higher
Definition: lm.h:428