PocketSphinx 5prealpha
fsg_lextree.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2013 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
34/*
35 * fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
36 *
37 */
38
39#ifndef __S2_FSG_LEXTREE_H__
40#define __S2_FSG_LEXTREE_H__
41
42/* SphinxBase headers. */
43#include <sphinxbase/cmd_ln.h>
44#include <sphinxbase/fsg_model.h>
45
46/* Local headers. */
47#include "hmm.h"
48#include "dict.h"
49#include "dict2pid.h"
50
51/*
52 * Compile-time constant determining the size of the
53 * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.)
54 * But it makes memory allocation simpler and more efficient.
55 * Make it smaller (2) to save memory if your phoneset has less than
56 * 64 phones.
57 */
58#define FSG_PNODE_CTXT_BVSZ 4
59
60typedef struct {
61 uint32 bv[FSG_PNODE_CTXT_BVSZ];
63
64
65/*
66 * All transitions (words) out of any given FSG state represented are by a
67 * phonetic prefix lextree (except for epsilon or null transitions; they
68 * are not part of the lextree). Lextree leaf nodes represent individual
69 * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG
70 * transition probs are distributed along the lextree: the prob at a node
71 * is the max of the probs of all leaf nodes (and, hence, FSG transitions)
72 * reachable from that node.
73 *
74 * To conserve memory, the underlying HMMs with state-level information are
75 * allocated only as needed. Root and leaf nodes must also account for all
76 * the possible phonetic contexts, with an independent HMM for each distinct
77 * context.
78 */
79typedef struct fsg_pnode_s {
80 /*
81 * If this is not a leaf node, the first successor (child) node. Otherwise
82 * the parent FSG transition for which this is the leaf node (for figuring
83 * the FSG destination state, and word emitted by the transition). A node
84 * may have several children. The succ ptr gives just the first; the rest
85 * are linked via the sibling ptr below.
86 */
87 union {
88 struct fsg_pnode_s *succ;
89 fsg_link_t *fsglink;
90 } next;
91
92 /*
93 * For simplicity of memory management (i.e., freeing the pnodes), all
94 * pnodes allocated for all transitions out of a state are maintained in a
95 * linear linked list through the alloc_next pointer.
96 */
97 struct fsg_pnode_s *alloc_next;
98
99 /*
100 * The next node that is also a child of the parent of this node; NULL if
101 * none.
102 */
103 struct fsg_pnode_s *sibling;
104
105 /*
106 * The transition (log) probability to be incurred upon transitioning to
107 * this node. (Transition probabilities are really associated with the
108 * transitions. But a lextree node has exactly one incoming transition.
109 * Hence, the prob can be associated with the node.)
110 * This is a logs2(prob) value, and includes the language weight.
111 */
112 int32 logs2prob;
113
114 /*
115 * The root and leaf positions associated with any transition have to deal
116 * with multiple phonetic contexts. However, different contexts may result
117 * in the same SSID (senone-seq ID), and can share a single pnode with that
118 * SSID. But the pnode should track the set of context CI phones that share
119 * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For
120 * simplicity of implementation, its size is a compile-time constant for
121 * now.) Single phone words would need a 2-D array of context, but that's
122 * too expensive. For now, they simply use SIL as right context, so only
123 * the left context is properly modelled.
124 * (For word-internal phones, this field is unused, of course.)
125 */
126 fsg_pnode_ctxt_t ctxt;
127
128 uint16 ci_ext; /* This node's CIphone as viewed externally (context) */
129 uint8 ppos; /* Phoneme position in pronunciation */
130 uint8 leaf; /* Whether this is a leaf node */
131
132 /* HMM-state-level stuff here */
133 hmm_context_t *ctx;
134 hmm_t hmm;
136
137/* Access macros */
138#define fsg_pnode_leaf(p) ((p)->leaf)
139#define fsg_pnode_logs2prob(p) ((p)->logs2prob)
140#define fsg_pnode_succ(p) ((p)->next.succ)
141#define fsg_pnode_fsglink(p) ((p)->next.fsglink)
142#define fsg_pnode_sibling(p) ((p)->sibling)
143#define fsg_pnode_hmmptr(p) (&((p)->hmm))
144#define fsg_pnode_ci_ext(p) ((p)->ci_ext)
145#define fsg_pnode_ppos(p) ((p)->ppos)
146#define fsg_pnode_leaf(p) ((p)->leaf)
147#define fsg_pnode_ctxt(p) ((p)->ctxt)
148
149#define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
150
151/*
152 * The following is macroized because its called very frequently
153 * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
154 */
155/*
156 * Subtract bitvector sub from bitvector src (src updated with the result).
157 * Return 0 if result is all 0, non-zero otherwise.
158 */
159
160#if (FSG_PNODE_CTXT_BVSZ == 1)
161 #define FSG_PNODE_CTXT_SUB(src,sub) \
162 ((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))
163#elif (FSG_PNODE_CTXT_BVSZ == 2)
164 #define FSG_PNODE_CTXT_SUB(src,sub) \
165 (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
166 ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])))
167#elif (FSG_PNODE_CTXT_BVSZ == 4)
168 #define FSG_PNODE_CTXT_SUB(src,sub) \
169 (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
170 ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \
171 ((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \
172 ((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3])))
173#else
174 #define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub))
175#endif
176
180typedef struct fsg_lextree_s {
181 fsg_model_t *fsg;
187 /*
188 * Left and right CIphone sets for each state.
189 * Left context CIphones for a state S: If word W transitions into S, W's
190 * final CIphone is in S's {lc}. Words transitioning out of S must consider
191 * these left context CIphones.
192 * Similarly, right contexts for state S: If word W transitions out of S,
193 * W's first CIphone is in S's {rc}. Words transitioning into S must consider
194 * these right contexts.
195 *
196 * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
197 * null transitions.
198 * NOTE: Single-phone words are difficult; only SILENCE right context is
199 * modelled for them.
200 * NOTE: Non-silence filler phones aren't included in these sets. Filler
201 * words don't use context, and present the SILENCE phone as context to
202 * adjacent words.
203 */
204 int16 **lc;
205 int16 **rc;
207 fsg_pnode_t **root; /* root[s] = lextree representing all transitions
208 out of state s. Note that the "tree" for each
209 state is actually a collection of trees, linked
210 via fsg_pnode_t.sibling (root[s]->sibling) */
211 fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all
212 pnodes allocated for state s */
213 int32 n_pnode; /* #HMM nodes in search structure */
214 int32 wip;
215 int32 pip;
217
218/* Access macros */
219#define fsg_lextree_root(lt,s) ((lt)->root[s])
220#define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
221
225fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict,
226 dict2pid_t *d2p,
227 bin_mdef_t *mdef, hmm_context_t *ctx,
228 int32 wip, int32 pip);
229
234
238void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh);
239
244
249
254
255#endif
Building triphones for a dictionary.
Operations on dictionary.
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
Definition: fsg_lextree.c:286
void fsg_lextree_dump(fsg_lextree_t *lextree, FILE *fp)
Print an FSG lextree to a file for debugging.
Definition: fsg_lextree.c:273
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
Definition: fsg_lextree.c:832
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
Definition: fsg_lextree.c:328
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
Definition: fsg_lextree.c:215
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
Generic variant for arbitrary size.
Definition: fsg_lextree.c:336
Implementation of HMM base structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
Collection of lextrees for an FSG.
Definition: fsg_lextree.h:180
int16 ** lc
Left context triphone mappings for FSG.
Definition: fsg_lextree.h:204
fsg_model_t * fsg
The fsg for which this lextree is built.
Definition: fsg_lextree.h:181
int16 ** rc
Right context triphone mappings for FSG.
Definition: fsg_lextree.h:205
dict_t * dict
Pronunciation dictionary for this FSG.
Definition: fsg_lextree.h:183
dict2pid_t * d2p
Context-dependent phone mappings for this FSG.
Definition: fsg_lextree.h:184
bin_mdef_t * mdef
Model definition (triphone mappings).
Definition: fsg_lextree.h:185
hmm_context_t * ctx
HMM context structure.
Definition: fsg_lextree.h:182
Shared information between a set of HMMs.
An individual HMM among the HMM search space.