SphinxBase 0.6
src/libsphinxbase/lm/fsg_model.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 
00035 /* System headers. */
00036 #ifdef _WIN32_WCE
00037 /*MC in a debug build it's implicitly included by assert.h
00038      but you need this in a release build */
00039 #include <windows.h>
00040 #else
00041 #include <time.h>
00042 #endif /* _WIN32_WCE */
00043 #include <stdio.h>
00044 #include <string.h>
00045 #include <assert.h>
00046 
00047 /* SphinxBase headers. */
00048 #include "sphinxbase/err.h"
00049 #include "sphinxbase/pio.h"
00050 #include "sphinxbase/ckd_alloc.h"
00051 #include "sphinxbase/prim_type.h"
00052 #include "sphinxbase/strfuncs.h"
00053 #include "sphinxbase/hash_table.h"
00054 #include "sphinxbase/fsg_model.h"
00055 
00063 struct trans_list_s {
00064     hash_table_t *null_trans; /* Null transitions keyed by state. */
00065     hash_table_t *trans;      /* Lists of non-null transitions keyed by state. */
00066 };
00067 
00071 struct fsg_arciter_s {
00072     hash_iter_t *itor, *null_itor;
00073     gnode_t *gn;
00074 };
00075 
00076 #define FSG_MODEL_BEGIN_DECL            "FSG_BEGIN"
00077 #define FSG_MODEL_END_DECL              "FSG_END"
00078 #define FSG_MODEL_N_DECL                        "N"
00079 #define FSG_MODEL_NUM_STATES_DECL       "NUM_STATES"
00080 #define FSG_MODEL_S_DECL                        "S"
00081 #define FSG_MODEL_START_STATE_DECL      "START_STATE"
00082 #define FSG_MODEL_F_DECL                        "F"
00083 #define FSG_MODEL_FINAL_STATE_DECL      "FINAL_STATE"
00084 #define FSG_MODEL_T_DECL                        "T"
00085 #define FSG_MODEL_TRANSITION_DECL       "TRANSITION"
00086 #define FSG_MODEL_COMMENT_CHAR          '#'
00087 
00088 
00089 static int32
00090 nextline_str2words(FILE * fp, int32 * lineno,
00091                    char **lineptr, char ***wordptr)
00092 {
00093     for (;;) {
00094         size_t len;
00095         int32 n;
00096 
00097         ckd_free(*lineptr);
00098         if ((*lineptr = fread_line(fp, &len)) == NULL)
00099             return -1;
00100 
00101         (*lineno)++;
00102 
00103         if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
00104             continue; /* Skip comment lines */
00105         
00106         n = str2words(*lineptr, NULL, 0);
00107         if (n == 0)
00108             continue; /* Skip blank lines */
00109 
00110         /* Abuse of realloc(), but this doesn't have to be fast. */
00111         if (*wordptr == NULL)
00112             *wordptr = ckd_calloc(n, sizeof(**wordptr));
00113         else
00114             *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
00115         return str2words(*lineptr, *wordptr, n);
00116     }
00117 }
00118 
00119 void
00120 fsg_model_trans_add(fsg_model_t * fsg,
00121                     int32 from, int32 to, int32 logp, int32 wid)
00122 {
00123     fsg_link_t *link;
00124     glist_t gl;
00125     gnode_t *gn;
00126 
00127     if (fsg->trans[from].trans == NULL)
00128         fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
00129 
00130     /* Check for duplicate link (i.e., link already exists with label=wid) */
00131     for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
00132         link = (fsg_link_t *) gnode_ptr(gn);
00133         if (link->wid == wid) {
00134             if (link->logs2prob < logp)
00135                 link->logs2prob = logp;
00136             return;
00137         }
00138     }
00139 
00140     /* Create transition object */
00141     link = listelem_malloc(fsg->link_alloc);
00142     link->from_state = from;
00143     link->to_state = to;
00144     link->logs2prob = logp;
00145     link->wid = wid;
00146 
00147     /* Add it to the list of transitions and update the hash table */
00148     gl = glist_add_ptr(gl, (void *)link);
00149     hash_table_replace_bkey(fsg->trans[from].trans,
00150                             (char const *)&link->to_state,
00151                             sizeof(link->to_state), gl);
00152 }
00153 
00154 int32
00155 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp, int32 wid)
00156 {
00157     fsg_link_t *link, *link2;
00158 
00159     /* Check for transition probability */
00160     if (logp > 0) {
00161         E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
00162                 from, to);
00163     }
00164 
00165     /* Self-loop null transitions (with prob <= 1.0) are redundant */
00166     if (from == to)
00167         return -1;
00168 
00169     if (fsg->trans[from].null_trans == NULL)
00170         fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
00171 
00172     /* Check for a duplicate link; if found, keep the higher prob */
00173     link = fsg_model_null_trans(fsg, from, to);
00174     if (link) {
00175         if (link->logs2prob < logp) {
00176             link->logs2prob = logp;
00177             return 0;
00178         }
00179         else
00180             return -1;
00181     }
00182 
00183     /* Create null transition object */
00184     link = listelem_malloc(fsg->link_alloc);
00185     link->from_state = from;
00186     link->to_state = to;
00187     link->logs2prob = logp;
00188     link->wid = -1;
00189 
00190     link2 = (fsg_link_t *)
00191         hash_table_enter_bkey(fsg->trans[from].null_trans,
00192                               (char const *)&link->to_state,
00193                               sizeof(link->to_state), link);
00194     assert(link == link2);
00195 
00196     return 1;
00197 }
00198 
00199 int32
00200 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp)
00201 {
00202     return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
00203 }
00204 
00205 glist_t
00206 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
00207 {
00208     gnode_t *gn1, *gn2;
00209     int updated;
00210     fsg_link_t *tl1, *tl2;
00211     int32 k, n;
00212 
00213     E_INFO("Computing transitive closure for null transitions\n");
00214 
00215     if (nulls == NULL) {
00216         fsg_link_t *null;
00217         int i, j;
00218         
00219         for (i = 0; i < fsg->n_state; ++i) {
00220             for (j = 0; j < fsg->n_state; ++j) {
00221                 if ((null = fsg_model_null_trans(fsg, i, j)))
00222                     nulls = glist_add_ptr(nulls, null);
00223             }
00224         }
00225     }
00226 
00227     /*
00228      * Probably not the most efficient closure implementation, in general, but
00229      * probably reasonably efficient for a sparse null transition matrix.
00230      */
00231     n = 0;
00232     do {
00233         updated = FALSE;
00234 
00235         for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
00236             tl1 = (fsg_link_t *) gnode_ptr(gn1);
00237             assert(tl1->wid < 0);
00238 
00239             for (gn2 = nulls; gn2; gn2 = gnode_next(gn2)) {
00240                 tl2 = (fsg_link_t *) gnode_ptr(gn2);
00241 
00242                 if (tl1->to_state == tl2->from_state) {
00243                     k = fsg_model_null_trans_add(fsg,
00244                                                 tl1->from_state,
00245                                                 tl2->to_state,
00246                                                 tl1->logs2prob +
00247                                                 tl2->logs2prob);
00248                     if (k >= 0) {
00249                         updated = TRUE;
00250                         if (k > 0) {
00251                             nulls =
00252                                 glist_add_ptr(nulls, (void *)
00253                                               fsg_model_null_trans
00254                                               (fsg, tl1->from_state,
00255                                                tl2->to_state));
00256                             n++;
00257                         }
00258                     }
00259                 }
00260             }
00261         }
00262     } while (updated);
00263 
00264     E_INFO("%d null transitions added\n", n);
00265 
00266     return nulls;
00267 }
00268 
00269 glist_t
00270 fsg_model_trans(fsg_model_t *fsg, int32 i, int32 j)
00271 {
00272     void *val;
00273 
00274     if (fsg->trans[i].trans == NULL)
00275         return NULL;
00276     if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *)&j,
00277                                sizeof(j), &val) < 0)
00278         return NULL;
00279     return (glist_t)val;
00280 }
00281 
00282 fsg_link_t *
00283 fsg_model_null_trans(fsg_model_t *fsg, int32 i, int32 j)
00284 {
00285     void *val;
00286 
00287     if (fsg->trans[i].null_trans == NULL)
00288         return NULL;
00289     if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *)&j,
00290                                sizeof(j), &val) < 0)
00291         return NULL;
00292     return (fsg_link_t *)val;
00293 }
00294 
00295 fsg_arciter_t *
00296 fsg_model_arcs(fsg_model_t *fsg, int32 i)
00297 {
00298     fsg_arciter_t *itor;
00299 
00300     if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
00301         return NULL;
00302     itor = ckd_calloc(1, sizeof(*itor));
00303     if (fsg->trans[i].null_trans)
00304         itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
00305     if (fsg->trans[i].trans)
00306         itor->itor = hash_table_iter(fsg->trans[i].trans);
00307     if (itor->itor != NULL)
00308         itor->gn = hash_entry_val(itor->itor->ent);
00309     return itor;
00310 }
00311 
00312 fsg_link_t *
00313 fsg_arciter_get(fsg_arciter_t *itor)
00314 {
00315     /* Iterate over non-null arcs first. */
00316     if (itor->gn)
00317         return (fsg_link_t *)gnode_ptr(itor->gn);
00318     else if (itor->null_itor)
00319         return (fsg_link_t *)hash_entry_val(itor->null_itor->ent);
00320     else
00321         return NULL;
00322 }
00323 
00324 fsg_arciter_t *
00325 fsg_arciter_next(fsg_arciter_t *itor)
00326 {
00327     /* Iterate over non-null arcs first. */
00328     if (itor->gn) {
00329         itor->gn = gnode_next(itor->gn);
00330         /* Move to the next destination arc. */
00331         if (itor->gn == NULL) {
00332             itor->itor = hash_table_iter_next(itor->itor);
00333             if (itor->itor != NULL)
00334                 itor->gn = hash_entry_val(itor->itor->ent);
00335             else if (itor->null_itor == NULL)
00336                 goto stop_iteration;
00337         }
00338     }
00339     else {
00340         if (itor->null_itor == NULL)
00341             goto stop_iteration;
00342         itor->null_itor = hash_table_iter_next(itor->null_itor);
00343         if (itor->null_itor == NULL)
00344             goto stop_iteration;
00345     }
00346     return itor;
00347 stop_iteration:
00348     fsg_arciter_free(itor);
00349     return NULL;
00350 
00351 }
00352 
00353 void
00354 fsg_arciter_free(fsg_arciter_t *itor)
00355 {
00356     if (itor == NULL)
00357         return;
00358     hash_table_iter_free(itor->null_itor);
00359     hash_table_iter_free(itor->itor);
00360     ckd_free(itor);
00361 }
00362 
00363 int
00364 fsg_model_word_id(fsg_model_t *fsg, char const *word)
00365 {
00366     int wid;
00367 
00368     /* Search for an existing word matching this. */
00369     for (wid = 0; wid < fsg->n_word; ++wid) {
00370         if (0 == strcmp(fsg->vocab[wid], word))
00371             break;
00372     }
00373     /* If not found, add this to the vocab. */
00374     if (wid == fsg->n_word)
00375         return -1;
00376     return wid;
00377 }
00378 
00379 int
00380 fsg_model_word_add(fsg_model_t *fsg, char const *word)
00381 {
00382     int wid;
00383 
00384     /* Search for an existing word matching this. */
00385     wid = fsg_model_word_id(fsg, word);
00386     /* If not found, add this to the vocab. */
00387     if (wid == -1) {
00388         wid = fsg->n_word;
00389         if (fsg->n_word == fsg->n_word_alloc) {
00390             fsg->n_word_alloc += 10;
00391             fsg->vocab = ckd_realloc(fsg->vocab,
00392                                      fsg->n_word_alloc * sizeof(*fsg->vocab));
00393             if (fsg->silwords)
00394                 fsg->silwords = bitvec_realloc(fsg->silwords, fsg->n_word_alloc);
00395             if (fsg->altwords)
00396                 fsg->altwords = bitvec_realloc(fsg->altwords, fsg->n_word_alloc);
00397         }
00398         ++fsg->n_word;
00399         fsg->vocab[wid] = ckd_salloc(word);
00400     }
00401     return wid;
00402 }
00403 
00404 int
00405 fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
00406                       int state, float32 silprob)
00407 {
00408     int32 logsilp;
00409     int n_trans, silwid, src;
00410 
00411     E_INFO("Adding silence transitions for %s to FSG\n", silword);
00412 
00413     silwid = fsg_model_word_add(fsg, silword);
00414     logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
00415     if (fsg->silwords == NULL)
00416         fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
00417     bitvec_set(fsg->silwords, silwid);
00418 
00419     n_trans = 0;
00420     if (state == -1) {
00421         for (src = 0; src < fsg->n_state; src++) {
00422             fsg_model_trans_add(fsg, src, src, logsilp, silwid);
00423             ++n_trans;
00424         }
00425     }
00426     else {
00427         fsg_model_trans_add(fsg, state, state, logsilp, silwid);
00428         ++n_trans;
00429     }
00430 
00431     E_INFO("Added %d silence word transitions\n", n_trans);
00432     return n_trans;
00433 }
00434 
00435 int
00436 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
00437                   char const *altword)
00438 {
00439     int i, basewid, altwid;
00440     int ntrans;
00441 
00442     /* FIXME: This will get slow, eventually... */
00443     for (basewid = 0; basewid < fsg->n_word; ++basewid)
00444         if (0 == strcmp(fsg->vocab[basewid], baseword))
00445             break;
00446     if (basewid == fsg->n_word) {
00447         E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
00448         return -1;
00449     }
00450     altwid = fsg_model_word_add(fsg, altword);
00451     if (fsg->altwords == NULL)
00452         fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
00453     bitvec_set(fsg->altwords, altwid);
00454 
00455     E_DEBUG(2,("Adding alternate word transitions (%s,%s) to FSG\n",
00456                baseword, altword));
00457 
00458     /* Look for all transitions involving baseword and duplicate them. */
00459     /* FIXME: This will also get slow, eventually... */
00460     ntrans = 0;
00461     for (i = 0; i < fsg->n_state; ++i) {
00462         hash_iter_t *itor;
00463         if (fsg->trans[i].trans == NULL)
00464             continue;
00465         for (itor = hash_table_iter(fsg->trans[i].trans); itor;
00466              itor = hash_table_iter_next(itor)) {
00467             glist_t trans;
00468             gnode_t *gn;
00469 
00470             trans = hash_entry_val(itor->ent);
00471             for (gn = trans; gn; gn = gnode_next(gn)) {
00472                 fsg_link_t *fl = gnode_ptr(gn);
00473                 if (fl->wid == basewid) {
00474                     fsg_link_t *link;
00475 
00476                     /* Create transition object */
00477                     link = listelem_malloc(fsg->link_alloc);
00478                     link->from_state = fl->from_state;
00479                     link->to_state = fl->to_state;
00480                     link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
00481                     link->wid = altwid;
00482 
00483                     trans =
00484                         glist_add_ptr(trans, (void *) link);
00485                     ++ntrans;
00486                 }
00487             }
00488             hash_entry_val(itor->ent) = trans;
00489         }
00490     }
00491 
00492     E_DEBUG(2,("Added %d alternate word transitions\n", ntrans));
00493     return ntrans;
00494 }
00495 
00496 
00497 fsg_model_t *
00498 fsg_model_init(char const *name, logmath_t *lmath, float32 lw, int32 n_state)
00499 {
00500     fsg_model_t *fsg;
00501 
00502     /* Allocate basic stuff. */
00503     fsg = ckd_calloc(1, sizeof(*fsg));
00504     fsg->refcount = 1;
00505     fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
00506     fsg->lmath = lmath;
00507     fsg->name = name ? ckd_salloc(name) : NULL;
00508     fsg->n_state = n_state;
00509     fsg->lw = lw;
00510 
00511     fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
00512 
00513     return fsg;
00514 }
00515 
00516 fsg_model_t *
00517 fsg_model_read(FILE * fp, logmath_t *lmath, float32 lw)
00518 {
00519     fsg_model_t *fsg;
00520     hash_table_t *vocab;
00521     hash_iter_t *itor;
00522     int32 lastwid;
00523     char **wordptr;
00524     char *lineptr;
00525     char *fsgname;
00526     int32 lineno;
00527     int32 n, i, j;
00528     int n_state, n_trans, n_null_trans;
00529     glist_t nulls;
00530     float32 p;
00531 
00532     lineno = 0;
00533     vocab = hash_table_new(32, FALSE);
00534     wordptr = NULL;
00535     lineptr = NULL;
00536     nulls = NULL;
00537     fsgname = NULL;
00538     fsg = NULL;
00539 
00540     /* Scan upto FSG_BEGIN header */
00541     for (;;) {
00542         n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00543         if (n < 0) {
00544             E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
00545             goto parse_error;
00546         }
00547 
00548         if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
00549             if (n > 2) {
00550                 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
00551                         lineno);
00552                 goto parse_error;
00553             }
00554             break;
00555         }
00556     }
00557     /* Save FSG name, or it will get clobbered below :(.
00558      * If name is missing, try the default.
00559      */
00560     if (n == 2) {
00561         fsgname = ckd_salloc(wordptr[1]);
00562     } else {
00563         E_WARN ("FSG name is missing\n");
00564         fsgname = ckd_salloc("unknown");
00565     }
00566 
00567     /* Read #states */
00568     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00569     if ((n != 2)
00570         || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
00571             && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
00572         || (sscanf(wordptr[1], "%d", &n_state) != 1)
00573         || (n_state <= 0)) {
00574         E_ERROR
00575             ("Line[%d]: #states declaration line missing or malformed\n",
00576              lineno);
00577         goto parse_error;
00578     }
00579 
00580     /* Now create the FSG. */
00581     fsg = fsg_model_init(fsgname, lmath, lw, n_state);
00582     ckd_free(fsgname);
00583     fsgname = NULL;
00584 
00585     /* Read start state */
00586     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00587     if ((n != 2)
00588         || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
00589             && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
00590         || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
00591         || (fsg->start_state < 0)
00592         || (fsg->start_state >= fsg->n_state)) {
00593         E_ERROR
00594             ("Line[%d]: start state declaration line missing or malformed\n",
00595              lineno);
00596         goto parse_error;
00597     }
00598 
00599     /* Read final state */
00600     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00601     if ((n != 2)
00602         || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
00603             && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
00604         || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
00605         || (fsg->final_state < 0)
00606         || (fsg->final_state >= fsg->n_state)) {
00607         E_ERROR
00608             ("Line[%d]: final state declaration line missing or malformed\n",
00609              lineno);
00610         goto parse_error;
00611     }
00612 
00613     /* Read transitions */
00614     lastwid = 0;
00615     n_trans = n_null_trans = 0;
00616     for (;;) {
00617         int32 wid, tprob;
00618 
00619         n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00620         if (n <= 0) {
00621             E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
00622                     lineno);
00623             goto parse_error;
00624         }
00625 
00626         if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
00627             break;
00628         }
00629 
00630         if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
00631             || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
00632 
00633 
00634             if (((n != 4) && (n != 5))
00635                 || (sscanf(wordptr[1], "%d", &i) != 1)
00636                 || (sscanf(wordptr[2], "%d", &j) != 1)
00637                 || (i < 0) || (i >= fsg->n_state)
00638                 || (j < 0) || (j >= fsg->n_state)) {
00639                 E_ERROR
00640                     ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
00641                      lineno);
00642                 goto parse_error;
00643             }
00644 
00645             p = atof_c(wordptr[3]);
00646             if ((p <= 0.0) || (p > 1.0)) {
00647                 E_ERROR
00648                     ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
00649                      lineno);
00650                 goto parse_error;
00651             }
00652         }
00653         else {
00654             E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
00655                     lineno);
00656             goto parse_error;
00657         }
00658 
00659         tprob = (int32)(logmath_log(lmath, p) * fsg->lw);
00660         /* Add word to "dictionary". */
00661         if (n > 4) {
00662             if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
00663                 (void)hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid);
00664                 wid = lastwid;
00665                 ++lastwid;
00666             }
00667             fsg_model_trans_add(fsg, i, j, tprob, wid);
00668             ++n_trans;
00669         }
00670         else {
00671             if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
00672                 ++n_null_trans;
00673                 nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
00674             }
00675         }
00676     }
00677 
00678     E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
00679            fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
00680 
00681     /* Do transitive closure on null transitions */
00682     nulls = fsg_model_null_trans_closure(fsg, nulls);
00683     glist_free(nulls);
00684 
00685     /* Now create a string table from the "dictionary" */
00686     fsg->n_word = hash_table_inuse(vocab);
00687     fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
00688     fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
00689     for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) {
00690         char const *word = hash_entry_key(itor->ent);
00691         int32 wid = (int32)(long)hash_entry_val(itor->ent);
00692         fsg->vocab[wid] = (char *)word;
00693     }
00694     hash_table_free(vocab);
00695     ckd_free(lineptr);
00696     ckd_free(wordptr);
00697 
00698     return fsg;
00699 
00700   parse_error:
00701     for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor))
00702         ckd_free((char *)hash_entry_key(itor->ent));
00703     glist_free(nulls);
00704     hash_table_free(vocab);
00705     ckd_free(fsgname);
00706     ckd_free(lineptr);
00707     ckd_free(wordptr);
00708     fsg_model_free(fsg);
00709     return NULL;
00710 }
00711 
00712 
00713 fsg_model_t *
00714 fsg_model_readfile(const char *file, logmath_t *lmath, float32 lw)
00715 {
00716     FILE *fp;
00717     fsg_model_t *fsg;
00718 
00719     if ((fp = fopen(file, "r")) == NULL) {
00720         E_ERROR("Failed to open FSG file '%s' for reading: %s\n", file, strerror(errno));
00721         return NULL;
00722     }
00723     fsg = fsg_model_read(fp, lmath, lw);
00724     fclose(fp);
00725     return fsg;
00726 }
00727 
00728 fsg_model_t *
00729 fsg_model_retain(fsg_model_t *fsg)
00730 {
00731     ++fsg->refcount;
00732     return fsg;
00733 }
00734 
00735 static void
00736 trans_list_free(fsg_model_t *fsg, int32 i)
00737 {
00738     hash_iter_t *itor;
00739 
00740     /* FIXME (maybe): FSG links will all get freed when we call
00741      * listelem_alloc_free() so don't bother freeing them explicitly
00742      * here. */
00743     if (fsg->trans[i].trans) {
00744         for (itor = hash_table_iter(fsg->trans[i].trans);
00745              itor; itor = hash_table_iter_next(itor)) {
00746             glist_t gl = (glist_t)hash_entry_val(itor->ent);
00747             glist_free(gl);
00748         }
00749     }
00750     hash_table_free(fsg->trans[i].trans);
00751     hash_table_free(fsg->trans[i].null_trans);
00752 }
00753 
00754 int
00755 fsg_model_free(fsg_model_t * fsg)
00756 {
00757     int i;
00758 
00759     if (fsg == NULL)
00760         return 0;
00761 
00762     if (--fsg->refcount > 0)
00763         return fsg->refcount;
00764 
00765     for (i = 0; i < fsg->n_word; ++i)
00766         ckd_free(fsg->vocab[i]);
00767     for (i = 0; i < fsg->n_state; ++i)
00768         trans_list_free(fsg, i);
00769     ckd_free(fsg->trans);
00770     ckd_free(fsg->vocab);
00771     listelem_alloc_free(fsg->link_alloc);
00772     bitvec_free(fsg->silwords);
00773     bitvec_free(fsg->altwords);
00774     ckd_free(fsg->name);
00775     ckd_free(fsg);
00776     return 0;
00777 }
00778 
00779 
00780 void
00781 fsg_model_write(fsg_model_t * fsg, FILE * fp)
00782 {
00783     int32 i;
00784     
00785     fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, fsg->name ? fsg->name : "");
00786     fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
00787     fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
00788     fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
00789 
00790     for (i = 0; i < fsg->n_state; i++) {
00791         fsg_arciter_t *itor;
00792 
00793         for (itor = fsg_model_arcs(fsg, i); itor; itor = fsg_arciter_next(itor)) {
00794             fsg_link_t *tl = fsg_arciter_get(itor);
00795 
00796             fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
00797                     tl->from_state, tl->to_state,
00798                     logmath_exp(fsg->lmath, (int32)(tl->logs2prob / fsg->lw)),
00799                     (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
00800         }
00801     }
00802 
00803     fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
00804 
00805     fflush(fp);
00806 }
00807 
00808 void
00809 fsg_model_writefile(fsg_model_t *fsg, char const *file)
00810 {
00811     FILE *fp;
00812 
00813     assert(fsg);
00814 
00815     E_INFO("Writing FSG file '%s'\n", file);
00816 
00817     if ((fp = fopen(file, "w")) == NULL) {
00818         E_ERROR("Failed to open FSG file '%s' for reading: %s\n", file, strerror(errno));
00819         return;
00820     }
00821 
00822     fsg_model_write(fsg, fp);
00823 
00824     fclose(fp);
00825 }
00826 
00827 static void
00828 fsg_model_write_fsm_trans(fsg_model_t *fsg, int i, FILE *fp)
00829 {
00830     fsg_arciter_t *itor;
00831 
00832     for (itor = fsg_model_arcs(fsg, i); itor;
00833          itor = fsg_arciter_next(itor)) {
00834         fsg_link_t *tl = fsg_arciter_get(itor);
00835         fprintf(fp, "%d %d %s %f\n",
00836                 tl->from_state, tl->to_state,
00837                 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
00838                 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
00839     }
00840 }
00841 
00842 void
00843 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
00844 {
00845     int i;
00846 
00847     /* Write transitions from initial state first. */
00848     fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
00849 
00850     /* Other states. */
00851     for (i = 0; i < fsg->n_state; i++) {
00852         if (i == fsg_model_start_state(fsg))
00853             continue;
00854         fsg_model_write_fsm_trans(fsg, i, fp);
00855     }
00856 
00857     /* Final state. */
00858     fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
00859 
00860     fflush(fp);
00861 }
00862 
00863 void
00864 fsg_model_writefile_fsm(fsg_model_t *fsg, char const *file)
00865 {
00866     FILE *fp;
00867 
00868     assert(fsg);
00869 
00870     E_INFO("Writing FSM file '%s'\n", file);
00871 
00872     if ((fp = fopen(file, "w")) == NULL) {
00873         E_ERROR("Failed to open fsm file '%s' for writing: %s\n", file, strerror(errno));
00874         return;
00875     }
00876 
00877     fsg_model_write_fsm(fsg, fp);
00878 
00879     fclose(fp);
00880 }
00881 
00882 void
00883 fsg_model_write_symtab(fsg_model_t *fsg, FILE *file)
00884 {
00885     int i;
00886 
00887     fprintf(file, "<eps> 0\n");
00888     for (i = 0; i < fsg_model_n_word(fsg); ++i) {
00889         fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
00890     }
00891     fflush(file);
00892 }
00893 
00894 void
00895 fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file)
00896 {
00897     FILE *fp;
00898 
00899     assert(fsg);
00900 
00901     E_INFO("Writing FSM symbol table '%s'\n", file);
00902 
00903     if ((fp = fopen(file, "w")) == NULL) {
00904         E_ERROR("Failed to open symbol table '%s' for writing: %s\n", file, strerror(errno));
00905         return;
00906     }
00907 
00908     fsg_model_write_symtab(fsg, fp);
00909 
00910     fclose(fp);
00911 }