- The added line is THIS COLOR.
- The deleted line is THIS COLOR.
[[Reference of hts_engine API]]
//[[HTS_engine_API_english]]
#contents
&br;
* Preprocessor symbols for conditional compilation [#b930707d]
- For embedded device
HTS_EMBEDDED (slightly faster setting is used)
- Audio device setting
AUDIO_PLAY_WIN32 (for Windows 2000/XP/Vista C++ compiler)
AUDIO_PLAY_WINCE (for Windows Mobile C++ compiler)
AUDIO_PLAY_NONE (default)
- For [[Festival speech synthesis system:http://festvox.org/]]
FESTIVAL
- Endian definition
WORDS_BIGENDIAN (e.g. PowerPC, Cell BE, SPARC, 680x0)
WORDS_LITTLEENDIAN (e.g. x86, Alpha AXP)
* Structures [#y072254d]
** Models [#e904ac7d]
*** HTS_ModelSet [#s84d95d0]
> Set of HMMs and duration models.
int nstate - # of HMM states
int lf0stream - # of stream for F0
int mcpvsize - vector size for spectrum
int *nlf0pdf - # of PDFs at each state position (F0)
int *nmcppdf - # of PDFs at each state position (spectrum)
int ndurpdf - # of PDFs (duration)
double **durpdf - array of PDFs (duration)
double ***mcppdf - array of PDFs (spectrum)
double ****lf0pdf - array of PDFs (F0)
double weight_interp - weight for model interpolation
- Boolean definition (HTS_Boolean)
TRUE
FALSE
*** HTS_Model [#hca72a1d]
> A subword HMM in an utterance HMM.
char *name - name of this HMM (name only)
char *lab - label of this HMM (includes other information)
int durpdf - duration PDF index
int *lf0pdf - F0 PDF indexes
int *mcppdf - spectrum PDF indexes
int *dur - state durations (frame)
int totaldur - total duration in this HMM (frame)
double **lf0mean - mean vectors of F0 PDFs
double **lf0variance - diag variances of F0 PDFs
double **mcpmean - mean vectors of spectrum PDFs
double **mcpvariance - diag variances of spectrum PDFs
HTS_Boolean *voiced - voiced/unvoiced flags in this HMM
struct _HTS_Model *next - pointer to the next subword HMM
HTS_Boolean bool_rate - flag for speaking rate modification
HTS_Boolean bool_dur - flag for duration modification
HTS_Boolean bool_f0_level - flag for f0 level modification
HTS_Boolean bool_f0_range - flag for f0 range modification
HTS_Boolean bool_volume - flag for volume modification
HTS_Boolean bool_alpha - flag for frequency warping modification
double lab_rate - speaking rate specified in the given label
int lab_dur - # of frames specified in the given label
double lab_f0_level - f0 level specified in the given label
double lab_f0_range - f0 range specified in the given label
double lab_volume - volume specified in the given label
double lab_alpha - frequency warping specified in the given label
* Engine structures [#z44fc354]
*** HTS_UttModel [#ke553faa]
> An utterance HMM.
HTS_Model *mhead - list of subword HMMs (head)
HTS_Model *mtail - list of subword HMMs (tail)
int nModel - # of subword HMMs in this utterance HMM
int nState - total # of HMM states in this utterance HMM
int totalframe - total # of frames in this utterance
** Model [#le7d813d]
** Trees [#n794348b]
*** HTS_Pattern [#xdfc3831]
> List of patterns in a question.
char *pat - pattern string
struct _HTS_Pattern *next - pointer to the next pattern
*** HTS_Window [#zb4a3381]
- Window coefficients to calculate dynamic features.
int size - # of windows (static + deltas)
int *l_width - left width of windows
int *r_width - right width of windows
double **coefficient - window coefficients
int max_width - maximum width of windows
*** HTS_Question [#b0a9afa1]
> List of questions in HTS_TreeSet.
char *qName - name of this question
HTS_Pattern *phead - list of patterns (head)
HTS_Pattern *ptail - list of patterns (tail)
struct _HTS_Question *next - pointer to the next question
*** HTS_Pattern [#k48e4d9f]
- List of patterns in a question and a tree.
char *string - pattern string
HTS_Pattern *next - pointer to the next pattern
*** HTS_Node [#v097f6cf]
> List of tree nodes in a decision tree.
int idx - index of this node
int pdf - index of PDF for this node (leaf node only)
struct _HTS_Node *yes - pointer to its child node (yes)
struct _HTS_Node *no - pointer to its child node (no)
struct _HTS_Node *next - pointer to the next node
HTS_Question *quest - question applied at this node
*** HTS_Question [#z77715ac]
- List of questions in a tree.
char *name - name of this question
HTS_Pattern *head - pointer to the head of pattern list
HTS_Question *next - pointer to the next question
*** HTS_Tree [#jb5f426a]
> List of decision trees in HTS_TreeSet.
int state - state position of this tree
HTS_Pattern *phead - list of patterns used in this tree (head)
HTS_Pattern *ptail - list of patterns used in this tree (tail)
struct _HTS_Tree *next - pointer to the next tree
HTS_Node *root - root node of this tree
HTS_Node *leaf - list of leaf nodes in this tree
*** HTS_Node [#c82cbc05]
- List of tree nodes in a tree.
int index - index of this node
int pdf - index of PDF for this node (leaf node only)
HTS_Node *yes - pointer to its child node (yes)
HTS_Node *no - pointer to its child node (no)
HTS_Node *next - pointer to the next node
HTS_Question *quest - question applied at this node
*** HTS_TreeSet [#o01982e3]
> Set of decision trees.
HTS_Question *qhead[HTS_NUMMTYPE] - lists of questions for spectrum, F0 & duration (head)
HTS_Question *qtail[HTS_NUMMTYPE] - lists of questions for spectrum, F0 & duration (tail)
HTS_Tree *thead[HTS_NUMMTYPE] - lists of trees for spectrum, F0 & duration (head)
HTS_Tree *ttail[HTS_NUMMTYPE] - lists of trees for spectrum, F0 & duration (tail)
int nTrees[HTS_NUMMTYPE] - # of trees for spectrum, F0 & duration
*** HTS_Tree [#tccac793]
- List of decision trees in a model.
HTS_Pattern *head - pointer to the head of pattern list for this tree
HTS_Tree *next - pointer to the next tree
HTS_Node *root - root node of this tree
int state - state index of this tree
** Parameter generation [#v6419ed7]
*** HTS_DWin [#w20384ff]
- Window coefficients to calculate dynamic features.
int num - # of windows (static, delta, delta-delta -> 3)
int **width - width of windows [0..num-1][0(left) 1(right)]
double **coef - window coefficients [0..num-1][width[0]..width[1]]
int maxw[2] - maximum width [0(left) 1(right)]
int max_L - maximum width {maxw[0], maxw[1]}
*** HTS_Model [#lb0795d7]
- Set of PDFs, decision trees and questions.
int vector_length - vector length (include static and dynamic features)
int *npdf - # of PDFs at each tree
double ***pdf - PDFs
HTS_Tree *tree - pointer to the list of trees
HTS_Question *question - pointer to the list of questions
*** HTS_SMatrices [#yd54811c]
*** HTS_Stream [#e8ebe6f1]
- Set of models and a window.
int vector_length - vector_length (include static and dynamic features)
HTS_Model *model - models
HTS_DynamicWindow window - window coefficients
HTS_Boolean msd_flag - flag for MSD
int interpolation_size - # of models for interpolation
*** HTS_ModelSet [#n0021e72]
- Set of duration models, HMMs and GV models.
HTS_Stream duration - duration PDFs and trees
HTS_Stream *stream - parameter PDFs, trees and windows
HTS_Stream *gv - GV PDFs
int nstate - # of HMM states
int nstream - # of stream
** Label [#zdbf97c6]
*** HTS_LabelString [#k2bc37e0]
- Individual label string with time infomation.
HTS_LabelString *next - pointer to the next label string
char *name - label string
HTS_Boolean frame_flag - flag for frame length modification
int frame - frame length specified in the given label
*** HTS_Label [#cf3de27c]
- List of label strings.
HTS_LabelString *head - pointer to the head of label string
int size - # of label strings
double speech_speed - speech speed rate
** State stream [#p6180dbc]
*** HTS_SStream [#g80be9f5]
- Individual state stream.
int vector_length - vector length (include static and dynamic features)
double **mean - mean vector sequence
double **vari - variance vector sequence
double *msd - MSD parameter sequence
int win_size - # of windows (static + deltas)
int *win_l_width - left width of windows
int *win_r_width - right width of windows
double **win_coefficient - window coefficients
int win_max_width - maximum width of windows
double *gv_mean - mean vector of GV
double *gv_vari - variance vector of GV
*** HTS_SStreamSet [#c7e181ed]
- Set of state stream.
HTS_SStream *sstream - state streams
int nstream - # of streams
int nstate - # of states
int *duration - duration sequence
int total_state - total state
int total_frame - total frame
** PDF stream [#fbcf0b4b]
*** HTS_SMatrices [#n16e4df2]
- Matrices/Vectors used in the speech parameter generation algorithm.
double **mseq - mean vector sequence
double **ivseq - inverse diag variance sequence
double *g - vector used in the forward substitution
double **WUW - W' U^-1 W
double *WUM - W' U^-1 mu
double **mean - mean vector sequence
double **ivar - inverse diag variance sequence
double *g - vector used in the forward substitution
double **wuw - W' U^-1 W
double *wum - W' U^-1 mu
*** HTS_PStream [#bb9e7952]
- PDF stream used in the speech parameter generation algorithm.
int vSize - vector size of an observation vector (includes static & dynamic features)
int order - vector size of static features
int T - vector length (# of frames)
int width - maximum width of dynamic feature windows
HTS_DWin dw - dynamic feature windows
double **par - output parameter vector
HTS_SMatrices sm - matrices/vectors for parameter generation
HTS_Boolean *voiced - voiced/unvoiced decision
*** HTS_PStream [#v2504efe]
- Individual PDF stream.
int vector_length - vector length (include static and dynamic features)
int static_length - static features length
int length - stream length
int width - width of dynamic window
double **par - output parameter vector
HTS_SMatrices sm - matrices for parameter generation
int win_size - # of windows (static + deltas)
int *win_l_width - left width of windows
int *win_r_width - right width of windows
double **win_coefficient - window coefficients
HTS_Boolean *msd_flag - Boolean sequence for MSD
double *gv_buff - buffer for GV calculation
double *gv_mean - mean vector of GV
double *gv_vari - variance vector of GV
double gv_weight - GV weight
** Global settings [#t58554e0]
*** HTS_globalP [#gd4808ee]
*** HTS_PStreamSet [#fc667ac9]
- Set of PDF streams.
HTS_PStream *pstream - PDF streams
int nstream - # of PDF streams
int total_frame - total frame
** Generated parameter stream [#cb40623b]
*** HTS_GStream [#r1f0e0dc]
- Generated parameter stream.
int static_length - static features length
double **par - generated parameter
*** HTS_GStreamSet [#qab80b0e]
- Set of generated parameter stream.
int total_nsample - total sample
int total_frame - total frame
int nstream - # of streams
HTS_GStream *gstream - generated parameter streams
short *gspeech - generated speech
** Engine [#j629fb64]
*** HTS_Global [#x0a5815e]
- Global settings.
int rate - sampling rate (Hz)
int fperiod - frame shift (points)
double rho - speaking rate
double alpha - frequency warping
double beta - postfiltering coefficient
double f0_std - F0 multiply
double f0_mean - F0 bias
double uv - voiced/unvoiced threshold
double length - total number of frames
HTS_Boolean algnst - use state-level alignments from labels
HTS_Boolean algnph - use phone-level alignments from labels
int totaldur - total frame
int totalframe - total frame
int nsample - # of samples in a synthesized waveform
int buff_size - buffer size of audio output device
short *raw_data - synthesized waveform
HTS_Boolean stored_raw_data - flag to store a synthesized waveform in raw_data
int stage - Gamma=-1/stage : if stage=0 then Gamma=0
HTS_Boolean use_log_gain - log gain flag (for LSP)
int sampling_rate - sampling rate
int fperiod - frame period
double alpha - all-pass constant
double beta - postfiltering coefficient
int audio_buff_size - audio buffer size (for audio device)
double *msd_threshold - MSD thresholds
double *duration_iw - weights for duration interpolation
double **parameter_iw - weights for parameter interpolation
double **gv_iw - weights for GV interpolation
double *gv_weight - GV weights
*** HTS_AudioSet [#i320d4cd]
*** HTS_Engine [#wa56bd53]
- Engine itself.
HTS_Global global - global settings
HTS_ModelSet ms - set of duration models, HMMs and GV models
HTS_Label label - label
HTS_SStreamSet sss - set of state streams
HTS_PStreamSet pss - set of PDF streams
HTS_GStreamSet gss - set of generated parameter streams
* Vocoder structures [#gc6a2eb8]
** Audio [#i33a0fbe]
*** HTS_Audio [#se4ce25f]
- For MS Windows (Windows Mobile) audio output device.
HWAVEOUT hwaveout - audio device handle
WAVEFORMATEX waveformatex - wave formatex
short *buff - current buffer
int buff_size - current buffer size
int which_buff - double buffering flag
HTS_Boolean now_buff_1 - double buffering flag
HTS_Boolean now_buff_2 - double buffering flag
WAVEHDR buff_1 - buffer
WAVEHDR buff_2 - buffer
int max_buff_size - buffer size of audio output device
- For Linux, etc.
int i - make compiler happy
*** HTS_VocoderSet [#aa21d7f5]
- MLSA filter settings. Usually you don't need to set this structure manually.
** Vocoder [#lbf2a363]
*** HTS_Vocoder [#p4d69001]
- MLSA/MGLSA filter settings.
int stage - Gamma=-1/stage : if stage=0 then Gamma=0
double gamma - Gamma
HTS_Boolean use_log_gain - log gain flag (for LSP)
int fprd - frame shift
int iprd - interpolation period
int seed - seed of random generator
int pd - Pade approximation order (4 or 5)
unsigned long next - temporary variable for random generator
HTS_Boolean gauss - flag to use Gaussian noise
double rate - sampling rate
double p1 - used in excitation generation
double pc - used in excitation generation
double pade[21] - Pade coefficients
double *ppade - Pade array
double *c, *cc, *cinc, *d1 - used in the MLSA filter
double rate - sampling rate
double p - used in excitation generation
double inc - used in excitation generation
int sw - switch used in random generator
int x - excitation signal
HTS_Audio *audio - pointer for audio device
double *freqt_buff - used in freqt
int freqt_size - buffer size for freqt
double *spectrum2en_buff - used in spectrum2en
int spectrum2en_size - buffer size for spectrum2en
double r1, r2, s - used in random generator
int x - excitation signal
HTS_AudioSet *as - pointer for audio device
int size - buffer size for postfiltering
double *d - used in postfiltering
double *g - used in postfiltering
double *mc - mel-cepstral coefficients
double *cep - cepstral coefficients
double *ir - impulse response
int o - used in postfiltering
int irleng - length of impulse response
double *postfilter_buff - used in postfiltering
int postfilter_size - buffer size for postfiltering
double *c, *cc, *cinc, *d1 - used in the MLSA/MGLSA filter
double *pade - used in mlsadf
double *lsp2lpc_buff - used in lsp2lpc
int lsp2lpc_size - buffer size of lsp2lpc
double *gc2gc_buff - used in gc2gc
int gc2gc_size - buffer size for gc2gc
*** HTS_Engine [#udd70536]
- HTS_Engine itself.
HTS_ModelSet *ms - sets of HMMs and duration models
HTS_TreeSet *ts - sets of decision trees
HTS_PStream lf0pst - PDF stream for F0
HTS_PStream mcppst - PDF stream for spectrum
HTS_globalP gp - global settings
int num_interp - # of models for interpolation
* Engine functions [#l74e4110]
* Functions [#o5c11fa0]
** HTS_Engine functions [#i1438ea1]
*** HTS_Engine_initialize [#bc3db532]
void HTS_Engine_initialize(HTS_Engine *engine);
- Use: Initialize HTS_Engine structure.
** Initialize engine [#t310270e]
*** HTS_Engine_initialize [#ec43b3cc]
void HTS_Engine_initialize(HTS_Engine *engine, int nstream)
- Use: Initialize engine.
- Arguments:
HTS_Engine *engine - Pointer for HTS_Engine structure
- ''Attention!!:'' To start HTS_Engine module, first you should call this function.
HTS_Engine *engine - pointer to HTS_Engine structure
- ''Attention!!:'' To start engine, first you must call this function.
*** HTS_Engine_load_fn [#m45f8973]
void HTS_Engine_load_fn(HTS_Engine *engine,
char **fn_ms_lf0,char **fn_ms_mcp,char **fn_ms_dur,
char **fn_ts_lf0,char **fn_ts_mcp,char **fn_ts_dur,
int num_ws_lf0,char **fn_ws_lf0,
int num_ws_mcp,char **fn_ws_mcp,
double *rate_interp,int num_interp);
- Use: Load models/trees/windows from files using given filenames.
** Load models [#z7b8be60]
*** HTS_Engine_load_duration_from_fn [#va73cd1a]
void HTS_Engine_load_duration_from_fn(HTS_Engine *engine, char **pdf_fn, char **tree_fn, int interpolation_size)
- Use: Load duration PDFs and trees from files using given file names.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
char **fn_ms_lf0 - F0 PDF file names
char **fn_ms_mcp - spectrum PDF file names
char **fn_ms_dur - duration PDF file names
char **fn_ts_lf0 - F0 tree file names
char **fn_ts_mcp - spectrum tree file names
char **fn_ts_dur - duration tree file names
int num_ws_lf0 - # of dynamic feature windows for F0
char **fn_ws_lf0 - dynamic feature window file names for F0
int num_ws_mcp - # of dynamic feature windows for spectrum
char **fn_ws_mcp - dynamic feature window file names for spectrum
double *rate_interp - model interpolation rates
int num_interp - # of models to be interpolated
- ''Attention!!'': You should initialize variable '''engine''' using HTS_Engine_initialize before calling this function. If rate_interp==NULL, interpolation rates of all models are set to the same value.
HTS_Engine *engine - pointer to HTS_Engine structure
char **pdf_fn - duration PDF file names
char **tree_fn - duration tree file names
int interpolation_size - # of duration models to be interpolated
- ''Attention!!'': You must initialize variable '''engine''' using HTS_Engine_initialize before calling this function.
*** HTS_Engine_load_fp [#w7185327]
void HTS_Engine_load_fp(HTS_Engine *engine,
FILE **fp_ms_lf0,FILE **fp_ms_mcp,FILE **fp_ms_dur,
FILE **fp_ts_lf0,FILE **fp_ts_mcp,FILE **fp_ts_dur,
int num_ws_lf0,FILE **fp_ws_lf0,
int num_ws_mcp,FILE **fp_ws_mcp,
double *rate_interp,int num_interp);
- Use: Load models, trees & windows from files using given file pointers.
*** HTS_Engine_load_duration_from_fp [#wc2481e3]
void HTS_Engine_load_duration_from_fp(HTS_Engine *engine, FILE **pdf_fp, FILE **tree_fp, int interpolation_size)
- Use: load duration PDFs and trees from files using given file pointers.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
FILE **fp_ms_lf0 - F0 PDF file pointers
FILE **fp_ms_mcp - spectrum PDF file pointers
FILE **fp_ms_dur - duration PDF file pointers
FILE **fp_ts_lf0 - F0 tree file pointers
FILE **fp_ts_mcp - spectrum tree file pointers
FILE **fp_ts_dur - duration tree file pointers
int num_ws_lf0 - # of dynamic feature windows for F0
FILE **fp_ws_lf0 - dynamic feature window file pointers for F0
int num_ws_mcp - # of dynamic feature windows for spectrum
FILE **fp_ws_mcp - dynamic feature window file pointers for spectrum
FILE *fp_gv_lf0 - file pointers of F0 GV
FILE *fp_gv_mcp - file pointers of spectrum GV
double *rate_interp - model interpolation rates
int num_interp - # of models to be interpolated
- ''Attention!!'': You should initialize variable '''engine''' using HTS_Engine_initialize before calling this function. If rate_interp==NULL, interpolation rates of all models are set to the same value.
HTS_Engine *engine - pointer to HTS_Engine structure
FILE **pdf_fp - duration PDF file pointers
FILE **tree_fp - duration tree file pointers
int interpolation_size - # of duration models to be interpolated
- ''Attention!!'': You must initialize variable '''engine''' using HTS_Engine_initialize before calling this function.
*** HTS_Engine_load_parameter_from_fn [#kd48cfd2]
void HTS_Engine_load_parameter_from_fn(HTS_Engine *engine, char **pdf_fn, char **tree_fn, char **win_fn,
int stream_index, HTS_Boolean msd_flag, int window_size, int interpolation_size)
- Use: load parameter PDFs, trees and windows from files using given file names.
- Arguments:
HTS_Engine *engine - pointer to HTS_Engine structure
char **pdf_fn - parameter PDF file pointers
char **tree_fn - parameter tree file pointers
char **win_fn - parameter window file pointers
int stream_index - index of stream
HTS_Boolean msd_flag - flag for MSD
int window_size - # of windows
int interpolation_size - # of parameter models to be interpolated
- ''Attention!!'': You must load duration models using HTS_Engine_load_duration_from_fn/fp before calling this function.
*** HTS_Engine_process [#i593ddb8]
void HTS_Engine_process(HTS_Engine *engine,HTS_Model *model,
FILE *wavfp, FILE *rawfp, FILE *lf0fp,
FILE *mcpfp, FILE *durfp, FILE *tracefp,
double *fr_f0, double *fr_power);
- Use: run HMM-based speech synthesis.
*** HTS_Engine_load_parameter_from_fp [#j2b19c02]
void HTS_Engine_load_parameter_from_fp(HTS_Engine *engine, FILE **pdf_fp, FILE **tree_fp, FILE **win_fp,
int stream_index, HTS_Boolean msd_flag, int window_size, int interpolation_size)
- Use: load parameter PDFs, trees and windows from files using given file pointers.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Model *model - HTS_Model structure
FILE *wavfp - file pointer for RIFF waveform output
FILE *rawfp - file pointer for raw audio output
FILE *lf0fp - file pointer for generated (log) F0 sequence
FILE *mcpfp - file pointer for generated spectrum (mel-cepstrum)
FILE *durfp - file pointer for predicted durations
FILE *tracefp - file pointer for trace information
double *fr_f0 - Given F0 values
double *fr_power - Given powers
- ''Attention!!'': You can give F0 values or powers predicted by other modules via fr_f0 and fr_power.
HTS_Engine *engine - pointer to HTS_Engine structure
FILE **pdf_fp - parameter PDF file pointers
FILE **tree_fp - parameter tree file pointers
FILE **win_fp - parameter window file pointers
int stream_index - index of stream
HTS_Boolean msd_flag - MSD flag
int window_size - # of windows
int interpolation_size - # of parameter models to be interpolated
- ''Attention!!'': You must load duration models using HTS_Engine_load_duration_from_fn/fp before calling this function.
*** HTS_Engine_refresh [#k57f1d0c]
void HTS_Engine_refresh(HTS_Engine *engine, Model *mhead);
- Use: free model list.
*** HTS_Engine_load_gv_from_fn [#id008a1a]
void HTS_Engine_load_gv_from_fn(HTS_Engine *engine, char **pdf_fn, int stream_index, int interpolation_size)
- Arguments:
HTS_Engine *engine - HTS_Engine structure
Model *mhead - HTS_Model structure
- ''Attention!!'': You should call this function if you want to run HTS_Engine_process multiple times.
HTS_Engine *engine - pointer to HTS_Engine structure
char **pdf_fn - GV PDF file names
int stream_index - index of streams
int interpolation_size - # of GV models to be interpolated
- ''Attention!!'': You must load parameter models using HTS_Engine_load_parameter_from_fn/fp before calling this function.
*** HTS_Engine_clear [#c3442e8d]
void HTS_Engine_clear(HTS_Engine *engine);
- Use: free memory.
*** HTS_Engine_load_gv_from_fp [#g6d9d662]
void HTS_Engine_load_gv_from_fp(HTS_Engine *engine, FILE **pdf_fp, int stream_index, int interpolation_size)
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
FILE **pdf_fn - GV PDF file pointers
int stream_index - index of streams
int interpolation_size - # of GV models to be interpolated
- ''Attention!!'': You must load parameter models using HTS_Engine_load_parameter_from_fn/fp before calling this function.
** HTS_Engine setting function [#xb2cbf1a]
** Set parameter [#b8dd5d39]
*** HTS_Engine_set_sampling_rate [#q41e65a1]
void HTS_Engine_set_sampling_rate(HTS_Engine *engine,int i);
void HTS_Engine_set_sampling_rate(HTS_Engine *engine,int i)
- Use: set sampling frequency.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
int i - sampling frequency (Hz), 0 < i <= 48000
HTS_Engine *engine - pointer to HTS_Engine structure
int i - sampling frequency (Hz), 1= < i <= 48000
- ''Attention!!:'' Default value is 16000.
*** HTS_Engine_set_fperiod [#c5c89881]
void HTS_Engine_set_fperiod(HTS_Engine *engine,int i);
void HTS_Engine_set_fperiod(HTS_Engine *engine,int i)
- Use: set frame shift.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
int i - frame shift (point), 0 < i <= 2000
HTS_Engine *engine - pointer to HTS_Engine structure
int i - frame shift (point), 1= < i <= 2000
- ''Attention!!:'' Default value is 80.
*** HTS_Engine_set_alpha [#tbe7cee5]
void HTS_Engine_set_alpha(HTS_Engine *engine,double f);
void HTS_Engine_set_alpha(HTS_Engine *engine,double f)
- Use: set frequency warping parameter alpha.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - alpha, 0.0 <= f <= 1.0
HTS_Engine *engine - pointer to HTS_Engine structure
double f - alpha, 0.0 <= f <= 1.0
- ''Attention!!:'' Default value is 0.42.
*** HTS_Engine_set_gamma [#wdc30080]
void HTS_engine_set_gamma(HTS_Engine *engine,int i)
- Use: set Gamma.
- Arguments:
HTS_Engine *engine - pointer to HTS_Engine structure
int i - Gamma=-1/i : if i=0 then Gamma=0, 0 <= i
- ''Attention!!:'' Default value is 0.
*** HTS_Engine_use_log_gain [#lfbc8c63]
void HTS_Engine_set_log_gain(HTS_Engine *engine,HTS_Boolean i)
- Use: set log gain flag.
- Arguments:
HTS_Engine *engine - pointer to HTS_Engine structure
HTS_Boolean i - log gain flag.
- ''Attention!!:'' Default value is FALSE.
*** HTS_Engine_set_beta [#m44c517c]
void HTS_SetBeta(HTS_Engine *engine,double f);
void HTS_SetBeta(HTS_Engine *engine,double f)
- Use: set postfiltering coefficient parameter beta.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - beta, -0.8 <= f <= 0.8
HTS_Engine *engine - pointer to HTS_Engine structure
double f - beta, -0.8 <= f <= 0.8
- ''Attention!!:'' Default value is 0.0. If you set beta large value, formant structure will be emphasized strongly.
*** HTS_Engine_set_rho [#ha467b8e]
void HTS_Engine_set_rho(HTS_Engine *engine,double f);
- Use: set speaking rate control parameter rho.
*** HTS_Engine_set_audio_buff_size [#ua56725a]
void HTS_Engine_set_audio_buff_size(HTS_Engine *engine, int i)
- Use: set buffer size for direct audio output.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - rho, -1.0 <= f <= 1.0
- ''Attention!!:'' Default value is 0.0. If you set beta negative value, speaking rate of synthesized speech becomes fast.
HTS_Engine *engine - pointer to HTS_Engine structure
int i - buffer size (sample), 0 <= i <= 48000
- ''Attention!!:'' Default value is 0. If i=0, direct audio play is turned off.
*** HTS_Engine_set_f0_std [#v7dd1f01]
void HTS_Engine_set_f0_std(HTS_Engine *engine,double f);
- Use: set a parameter to be multiplied to generated F0 values
*** HTS_Engine_set_msd_threshold [#qd019ba4]
void HTS_Engine_set_msd_threshold(HTS_Engine *engine, int stream_index, double f)
- Use: set MSD threshold.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - F0 multiply value, 0.0 <= f <= 5.0
- ''Attention!!:'' Default value is 1.0.
HTS_Engine *engine - pointer to HTS_Engine structure
int stream_index - index of streams
double f - threshold
*** HTS_Engine_set_f0_mean [#eb468ba6]
void HTS_Engine_set_f0_mean(HTS_Engine *engine,double f);
- Use: set a parameter to be added to generated F0 values
*** HTS_Engine_set_duration_interpolation_weight [#rdd5ff69]
void HTS_Engine_set_duration_interpolation_weight(HTS_Engine *engine, int interpolation_index, double f)
- Use: set weight for duration interpolation.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - F0 bias value, 0.0 <= f <= 100.0
- ''Attention!!:'' Default value is 0.0.
HTS_Engine *engine - pointer to HTS_Engine structure
int interpolation_index - index of duration models
double f - interpolation weight
*** HTS_Engine_set_uv [#c0f3dac6]
void HTS_Engine_set_uv(HTS_Engine *engine,double f);
- Use: set voiced/unvoiced threshold.
*** HTS_Engine_set_parameter_interpolation_weight [#k8bc0ee3]
void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine *engine, int stream_index, int interpolation_index, double f)
- Use: set weight for parameter interpolation.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - voiced/unvoiced threshold, 0.0 <= f <= 1.0
- ''Attention!!:'' Default value 0.5.
HTS_Engine *engine - pointer to HTS_Engine structure
int stream_index - index of streams
int interpolation_index - index of parameter models
double f - interpolation weight
*** HTS_Engine_set_length [#b06b19c7]
void HTS_Engine_set_length(HTS_Engine *engine,double f);
- Use: set total length of utterance in second
*** HTS_Engine_set_gv_interpolation_weight [#lc3e00bc]
void HTS_Engine_set_gv_interpolation_weight(HTS_Engine *engine, int stream_index, int interpolation_index, double f)
- Use: set weight for GV interpolation.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
double f - total length of utterance (second), 0.0 <= f <= 30.0
- ''Attention!!:'' Default value is 0.0 (using predicted durations by state duration models).
HTS_Engine *engine - pointer to HTS_Engine structure
int stream_index - index of streams
int interpolation_index - index of GV models
double f - interpolation weight
*** HTS_Engine_set_algnst [#w1aa23b5]
void HTS_Engine_set_algnst(HTS_Engine *engine,HTS_Boolean i);
- Use: set flag whether state-level alignments from given labels is used or not.
*** HTS_Engine_set_gv_weight [#eaa46840]
void HTS_Engine_set_gv_weight(HTS_Engine *engine, int stream_index, double f)
- Use: set GV weight.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Boolean i - flag whether state-level alignments from given labels is used or not
- ''Attention!!:'' Default value is FALSE.
HTS_Engine *engine - pointer to HTS_Engine structure
int stream_index - index of streams
double f - GV weight
*** HTS_Engine_set_algnph [#b326f87f]
void HTS_Engine_set_algnph(HTS_Engine *engine,HTS_Boolean i);
- Use: set flag whether phone-level alignments from given labels is used or not.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Boolean i - flag whether phone-level alignments from given labels is used or not
- ''Attention!!:'' Default value is FALSE.
** Synthesize speech [#sf7321ed]
*** HTS_Engine_set_buff_size [#ud5632df]
void HTS_Engine_set_buff_size(HTS_Engine *engine, int i);
- Use: set buffer size for audio device.
*** HTS_Engine_load_label_from_fn [#j87fa620]
void HTS_Engine_load_label_from_fn(HTS_Engine *engine, char *fn)
- Use: load label from file using given file name.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
int i - buffer size (pt), 0 < i <= 48000
- ''Attention!!:'' Default value is 0. If i==0, direct audio play is turned off.
HTS_Engine *engine - pointer to HTS_Engine structure
char *fn - label file name
*** HTS_Engine_set_stored_raw_data [#k5fc1be0]
void HTS_Engine_set_stored_raw_data(HTS_Engine *engine,HTS_Boolean i);
- Use: set flag whether a synthesized waveform is stored in an array.
*** HTS_Engine_load_label_from_fp [#i892f523]
void HTS_Engine_load_label_from_fp(HTS_Engine *engine, FILE *fp)
- Use: load label from file using given file pointer.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Boolean i - if TRUE, synthesized waveform is stored
- ''Attention!!:'' Default value is FALSE.
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - label file pointer
*** HTS_Engine_get_sampling_rate [#j4e9c970]
int HTS_Engine_get_sampling_rate(HTS_Engine *engine);
- Use: get sampling frequency.
- Return value: sampling frequency (Hz)
*** HTS_Engine_load_label_from_string [#q36e8c4c]
void HTS_Engine_load_label_from_string(HTS_Engine *engine, char *data)
- Use: load label from string.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
char *data - label string
*** HTS_Engine_get_total_dur [#l57d5bb9]
int HTS_Engine_get_total_dur(HTS_Engine *engine);
- Use: get total durations
- Return value: get total durations
*** HTS_Engine_load_label_from_string_list [#i9e769ac]
void HTS_Engine_load_label_from_string_list(HTS_Engine *engine, char **data, int size)
- Use: load label from string list.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
char **data - label string list
int size - size of label string list
*** HTS_Engine_get_total_frame [#fb2b6082]
int HTS_Engine_get_total_frame(HTS_Engine *engine);
- Use: get total # of frame.
- Return value: total # of frames.
*** HTS_Engine_create_sstream [#c2e0aaed]
void HTS_Engine_create_sstream(HTS_Engine *engine)
- Use: parse label, determine state duration and create state stream.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
*** HTS_Engine_get_nsample [#oa0b9847]
int HTS_Engine_get_nsample(HTS_Engine *engine);
- Use: get # of samples in a synthesized waveform.
- Return value: # of samples.
*** HTS_Engine_create_pstream [#b1fda60c]
void HTS_Engine_create_pstream(HTS_Engine *engine)
- Use: create PDF stream using state stream.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
*** HTS_Engine_get_pros_len [#p93969ff]
int HTS_Engine_get_pros_len(HTS_Engine *engine);
- Use: get spectrum, F0 data length.
- Return value: data length.
*** HTS_Engine_create_gstream [#fab90e37]
void HTS_Engine_create_gstream(HTS_Engine *engine)
- Use: synthesis speech and store generated parameter using PDF stream.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
- ''Attention!!:'' To synthesize speech, you must set stream[0]=spectrum models and spectrum[1]=lf0 models.
*** HTS_Engine_get_pros [#e30e0ece]
HTS_Boolean HTS_Engine_get_pros(HTS_Engine *engine,int len,
double *f0_data,double *power_data);
- Use: get mcp, lf0 data array.
- Return value: if len==data length, return TRUE.
*** HTS_Engine_save_infomation [#e7c9463d]
void HTS_Engine_save_infomation(HTS_Engine *engine, FILE *fp)
- Use: output trace infomation.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
int len - data length
double *f0_data - generated F0 sequence
double *power_data - generated spectrum (mel-cepstrum) sequence
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - output file pointer
*** HTS_Engine_get_stored_raw_data [#r88fe434]
short *HTS_Engine_get_stored_raw_data(HTS_Engine *engine);
- Use: get stored raw data.
- Return value: short array which contains a synthesized waveform.
*** HTS_Engine_save_label [#sb2d1f59]
void HTS_Engine_save_label(HTS_Engine *engine, FILE *fp)
- Use: output label with time.
- Arguments:
HTS_Engine *engine - HTS_Engine structure
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - output file pointer
** HTS_Model function [#h27b18c2]
*** HTS_Model_load_from_labfp [#qdd1e7a5]
void HTS_Model_load_from_labfp(HTS_Model *mhead,FILE *labfp);
- Use: load model list from label file pointer.
*** HTS_Engine_save_generated_parameter [#ya641edd]
void HTS_Engine_save_generated_parameter(HTS_Engine *engine, FILE *fp)
- Use: output generated parameter.
- Arguments:
HTS_Model *mhead - HTS_Model structure pointer
FILE *labfp - label file pointer
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - output file pointer
*** HTS_Model_load_from_labfn [#k847cea5]
void HTS_Model_load_from_labfn (HTS_Model *mhead,char *fn);
- Use: load model list from label file name.
*** HTS_Engine_save_generated_speech [#k8327b8d]
void HTS_Engine_save_generated_speech(HTS_Engine *engine, FILE *fp)
- Use: output generated speech.
- Arguments:
HTS_Model *mhead - HTS_Model structure pointer
char *fn - label file name
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - output file pointer
*** HTS_Model_load_from_string [#nb56c314]
void HTS_Model_load_from_string (HTS_Model *mhead,char *labdata);
- Use: load model list from string.
*** HTS_Engine_save_riff [#wb9640f7]
void HTS_Engine_save_riff(HTS_Engine *engine, FILE *fp)
- Use: output riff format file.
- Arguments:
HTS_Model *mhead - HTS_Model structure pointer
char *labdata - label string
HTS_Engine *engine - pointer to HTS_Engine structure
FILE *fp - output file pointer
*** HTS_Model_load_from_string_list [#sc61a8ee]
void HTS_Model_load_from_string_list (HTS_Model *mhead,char **labdata,int size);
- Use: load model list from string list.
*** HTS_Engine_refresh [#a8a1a225]
void HTS_Engine_refresh(HTS_Engine *engine)
- Use: free label, state streams, PDF streams and generated parameter streams.
- Arguments:
HTS_Model *mhead - HTS_Model structure pointer
char **labdata - label string list
int size - label string list size
HTS_Engine *engine - pointer to HTS_Engine structure
** HTS_Model setting function [#t0d05533]
*** HTS_Model_set_rate [#v6e9650b]
void HTS_Model_set_rate(HTS_Model *m, int i, double f);
- Use: set speaking rate for the i-th subword HMM.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
double f - speaking rate
- ''Attention!!:'' 0.2 <= f.
** Free engine [#mef9b660]
*** HTS_Model_set_dur [#jdfe0f95]
void HTS_Model_set_dur(Model *m, int i, int d);
- Use: set phone-level duration for the i-th subword HMM.
*** HTS_Engine_clear [#gd323729]
void HTS_Engine_clear(HTS_Engine *engine)
- Use: free engine.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
int d - # of frames (duration)
- ''Attention!!:'' 0 < d.
HTS_Engine *engine - pointer to HTS_Engine structure
*** HTS_Model_set_f0_level [#s5c8f765]
void HTS_Model_set_f0_level (Model *m, int i, double f);
- Use: set F0 level for the i-th subword HMM.
* Vocoder functions [#tffbab44]
** Initialize vocoder [#v5bc81e4]
*** HTS_Vocoder_initialize [#ff9170fc]
void HTS_Vocoder_initialize(HTS_Vocoder *v, const int m, const int stage, HTS_Boolean use_log_gain, const int rate, const int fperiod, int buff_size)
- Use: initialize the vocoder.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
double f - F0 bias
- ''Attention!!:'' 0.1 <= f.
HTS_Vocoder *v - pointer to HTS_Vocoder structure
int m - order of mel-cepstral coefficients
int stage - Gamma=-1/stage : if stage=0 then Gamma=0
HTS_Boolean use_log_gain - log gain flag
int rate - sampling frequency
int fperiod - frame shift
int buff_size - buffer size for direct audio output
*** HTS_Model_set_f0_range [#k1cb0d03]
void HTS_Model_set_f0_range(Model *m, int i, double f);
- Use: set F0 range for the i-th subword HMM.
** Synthesize speech [#d6645d7f]
*** HTS_Vocoder_synthesize [#h737ba80]
void HTS_Vocoder_synthesize(HTS_Vocoder *v, const int m, double lf0, double *spectrum, double alpha, double beta, short *rawdata)
- Use: run the vocoder and synthesize waveform.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
double f - F0 range
- ''Attention!!:'' 0.0 <= f. Default value is 0.0.
HTS_Vocoder *v - HTS_Vocoder structure pointer
int m - order of spectrum coefficients
double lf0 - log F0 value
double *spectrum - spectrum coefficients
double alpha - frequency warping parameter alpha
double beta - postfiltering parameter beta
short *rawdata - short pointer to store synthesized waveform
*** HTS_Model_set_volume [#b3110391]
void HTS_Model_set_volume(Model *m, int i, double f);
- Use: set volume for the i-th subword HMM.
*** HTS_Vocoder_postfilter_mcp [#r92930b7]
void HTS_Vocoder_postfilter_mcp(HTS_Vocoder *v, double *mcp, const int m, double alpha, double beta)
- Use: postfilter for mel-cepstrum.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
double f - volume
- ''Attention!!:'' 0.01 <= f.
HTS_Vocoder *v - HTS_Vocoder structure pointer
double *mcp - spectrum coefficients
int m - order of mel-cepstral coefficients
double alpha - frequency warping parameter alpha
double beta - postfiltering parameter beta
*** HTS_Model_set_alpha [#idef33c4]
void HTS_Model_set_alpha(Model *m, int i, double f);
- Use: set frequency warping parameter alpha for the i-th subword HMM.
** Free vocoder [#rb2ef784]
*** HTS_Vocoder_clear [#wb420be0]
void HTS_Vocoder_clear(HTS_Vocoder *v)
- Use: free the vocoder.
- Arguments:
HTS_Model *m - HTS_Model list
int i - index of subword HMM to be used in HTS_Model list m
double f - frequency warping parameter alpha
- ''Attention!!:'' 0.0 <= f.
HTS_Vocoder *v - pointer to HTS_Vocoder structure
** HTS_VocoderSet function [#i85cc130]
* Other functions [#q32f7c6d]
*** HTS_VocoderSet_initialize [#g7972a7d]
void HTS_VocoderSet_initialize (HTS_VocoderSet *vs, const int m,
const int rate, const int fperiod, int buff_size);
- Use: initialize the MLSA filter.
** For copyright [#e99f219b]
*** void HTS_show_copyright(FILE *fp) [#y30d6252]
- Use: show hts_engine_API copyright.
- Arguments:
HTS_VocodetSet *vs - HTS_VocodetSet structure
const int m - order of mel-cepstral coefficients
const int rate - sampling frequency (Hz)
const int fperiod - frame shift (point)
int buff_size - buffer size for direct audio output
FILE *fp - output file pointer
*** HTS_VocoderSet_synthesize [#ded581f3]
void HTS_VocoderSet_synthesize (HTS_VocoderSet *vs, const int m,
double p, double *mc, double alpha, double beta,
FILE *wavfp, FILE *rawfp, short *rawdata);
- Use: run the MLSA filter and synthesize waveform.
*** void HTS_get_copyright(char *str) [#d24078ea]
- Use: copy hts_engine_API copyright to string.
- Arguments:
HTS_VocoderSet *vs - HTS_VocodetSet structure
const int m - order of mel-cepstral coefficients
double p - F0 value
double *mc - mel-cepstral coefficients
double alpha - frequency warping parameter alpha
double beta - postfiltering parameter beta
FILE *wavfp - file pointer to store synthesized waveform in RIFF format
FILE *rawfp - file pointer to store synthesized waveform in raw audio
char *str - output string