Use must some value for variables :
DATASET=???SPEAKER=???like :./configure --with-fest-search-path=/home/quang/HTS/festival/examples \ --with-sptk-search-path=/home/quang/HTS/Tools/bin \ --with-hts-search-path=/home/quang/HTS/Tools/bin \ --with-hts-engine-search-path=/home/quang/HTS/Tools/bin \ --with-matlab-search-path=/home/quang/HTS/Tools/Matlab/ bin \ --with-straight-path=/home/quang/HTS/Tools/ STRAIGHTV40pcode \ LOWERF0=160 UPPERF0=360 DATASET=QBT SPEAKER=ChungHC QNAME=ChungHC2017-11-12 20:45 GMT+07:00 Nora Qm <NoraQm@xxxxxxxxxx>:Hello
I’m beginning in speech synthesis field. I trained HTS demo with english and it was run and worked very well. Right now, I need to training the HTS with the Arabic language. I have already my own data, which are: question, labels (mono, full), text, and raw. When I tried to training, this error occurs:
# Extracting features from raw audio
mkdir -p mgc lf0 bap
SAMPKHZ=`echo 48000 | /usr/local/bin/x2x +af | /usr/local/bin/sopr -m 0.001 | /usr/local/bin/x2x +fa`; \
for raw in raw/cmu_us_arctic_slt_*.raw; do \
base=`basename ${raw} .raw`; \
min=`/usr/local/bin/x2x +sf ${raw} | /usr/local/bin/minmax | /usr/local/bin/x2x +fa | head -n 1`; \
max=`/usr/local/bin/x2x +sf ${raw} | /usr/local/bin/minmax | /usr/local/bin/x2x +fa | tail -n 1`; \
if [ -s ${raw} -a ${min} -gt -32768 -a ${max} -lt 32767 ]; then \
echo "Extracting features from ${raw}"; \
if [ 0 -eq 0 ]; then \
/usr/local/bin/x2x +sf ${raw} | /usr/local/bin/pitch -H 280 -L 110 -p 240 -s ${SAMPKHZ} -o 2 > lf0/${base}.lf0; \
if [ 0 -eq 0 ]; then \
/usr/local/bin/x2x +sf ${raw} | \
/usr/local/bin/frame -l 1200 -p 240 | \
/usr/local/bin/window -l 1200 -L 2048 -w 1 -n 1 | \
/usr/local/bin/mcep -a 0.55 -m 34 -l 2048 -e 1.0E-08 > mgc/${base}.mgc; \
else \
if [ 1 -eq 1 ]; then \
GAINOPT="-L"; \
fi; \
/usr/local/bin/x2x +sf ${raw} | \
/usr/local/bin/frame -l 1200 -p 240 | \
/usr/local/bin/window -l 1200 -L 2048 -w 1 -n 1 | \
/usr/local/bin/mcep -a 0.55 -c 0 -m 34 -l 2048 -e 1.0E-08 -o 4 | \
/usr/local/bin/lpc2lsp -m 34 -s ${SAMPKHZ} ${GAINOPT} -n 2048 -p 8 -d 1.0E-08 > mgc/${base}.mgc; \
fi; \
if [ -n "`/usr/local/bin/nan lf0/${base}.lf0`" ]; then \
echo " Failed to extract features from ${raw}"; \
rm -f lf0/${base}.lf0; \
fi; \
if [ -n "`/usr/local/bin/nan mgc/${base}.mgc`" ]; then \
echo " Failed to extract features from ${raw}"; \
rm -f mgc/${base}.mgc; \
fi; \
else \
FRAMESHIFTMS=`echo 240 | /usr/local/bin/x2x +af | /usr/local/bin/sopr -m 1000 -d 48000 | /usr/local/bin/x2x +fa`; \
/usr/local/bin/raw2wav -s ${SAMPKHZ} -d . ${raw}; \
echo "path(path,'');" > ${base}.m; \
echo "prm.F0frameUpdateInterval=${F
RAMESHIFTMS};" >> ${base}.m; \ echo "prm.F0searchUpperBound=280 ;" >> ${base}.m; \
echo "prm.F0searchLowerBound=110 ;" >> ${base}.m; \
echo "prm.spectralUpdateInterval=${
FRAMESHIFTMS};" >> ${base}.m; \ echo "[x,fs]=wavread('${base}.wav')
;" >> ${base}.m; \ echo "[f0,ap] = exstraightsource(x,fs,prm);" >> ${base}.m; \
echo "[sp] = exstraightspec(x,f0,fs,prm);" >> ${base}.m; \
echo "ap = ap';" >> ${base}.m; \
echo "sp = sp';" >> ${base}.m; \
echo "sp = sp*32768.0;" >> ${base}.m; \
echo "save '${base}.f0' f0 -ascii;" >> ${base}.m; \
echo "save '${base}.ap' ap -ascii;" >> ${base}.m; \
echo "save '${base}.sp' sp -ascii;" >> ${base}.m; \
echo "quit;" >> ${base}.m; \
: -nodisplay -nosplash -nojvm < ${base}.m; \
if [ -s ${base}.f0 ]; then \
/usr/local/bin/x2x +af ${base}.f0 | /usr/local/bin/sopr -magic 0.0 -LN -MAGIC -1.0E+10 > lf0/${base}.lf0; \
if [ -n "`/usr/local/bin/nan lf0/${base}.lf0`" ]; then \
echo " Failed to extract features from ${raw}"; \
rm -f lf0/${base}.lf0; \
fi; \
fi; \
if [ -s ${base}.sp ]; then \
if [ 0 -eq 0 ]; then \
/usr/local/bin/x2x +af ${base}.sp | \
/usr/local/bin/mcep -a 0.55 -m 34 -l 2048 -e 1.0E-08 -j 0 -f 0.0 -q 3 > mgc/${base}.mgc; \
else \
if [ 1 -eq 1 ]; then \
GAINOPT="-L"; \
fi; \
/usr/local/bin/x2x +af ${base}.sp | \
/usr/local/bin/mcep -a 0.55 -c 0 -m 34 -l 2048 -e 1.0E-08 -j 0 -f 0.0 -q 3 -o 4 | \
/usr/local/bin/lpc2lsp -m 34 -s ${SAMPKHZ} ${GAINOPT} -n 2048 -p 8 -d 1.0E-08 > mgc/${base}.mgc; \
fi; \
if [ -n "`/usr/local/bin/nan mgc/${base}.mgc`" ]; then \
echo " Failed to extract features from ${raw}"; \
rm -f mgc/${base}.mgc; \
fi; \
fi; \
if [ -s ${base}.ap ]; then \
/usr/local/bin/x2x +af ${base}.ap | \
/usr/local/bin/mcep -a 0.55 -m 24 -l 2048 -e 1.0E-08 -j 0 -f 0.0 -q 1 > bap/${base}.bap; \
if [ -n "`/usr/local/bin/nan bap/${base}.bap`" ]; then \
echo " Failed to extract features from ${raw}"; \
rm -f bap/${base}.bap; \
fi; \
fi; \
rm -f ${base}.m ${base}.wav ${base}.f0 ${base}.ap ${base}.sp; \
fi; \
fi; \
done
Cannot open file raw/cmu_us_arctic_slt_*.raw!
Cannot open file raw/cmu_us_arctic_slt_*.raw!
# Composing training data files from extracted features
mkdir -p cmp
for raw in raw/cmu_us_arctic_slt_*.raw; do \
base=`basename ${raw} .raw`; \
echo "Composing training data for ${base}"; \
if [ 0 -eq 0 ]; then \
MGCDIM=`expr 34 + 1`; \
LF0DIM=1; \
MGCWINDIM=`expr 3 \* ${MGCDIM}`; \
LF0WINDIM=`expr 3 \* ${LF0DIM}`; \
BYTEPERFRAME=`expr 4 \* \( ${MGCWINDIM} + ${LF0WINDIM} \)`; \
if [ -s mgc/${base}.mgc -a -s lf0/${base}.lf0 ]; then \
MGCWINS=""; \
i=1; \
while [ ${i} -le 3 ]; do \
eval MGCWINS=\"${MGCWINS} win/mgc.win${i}\"; \
i=`expr ${i} + 1`; \
done; \
/usr/bin/perl scripts/window.pl ${MGCDIM} mgc/${base}.mgc ${MGCWINS} > tmp.mgc; \
LF0WINS=""; \
i=1; \
while [ ${i} -le 3 ]; do \
eval LF0WINS=\"${LF0WINS} win/lf0.win${i}\"; \
i=`expr ${i} + 1`; \
done; \
/usr/bin/perl scripts/window.pl ${LF0DIM} lf0/${base}.lf0 ${LF0WINS} > tmp.lf0; \
/usr/local/bin/merge +f -s 0 -l ${LF0WINDIM} -L ${MGCWINDIM} tmp.mgc < tmp.lf0 > tmp.cmp; \
/usr/bin/perl scripts/addhtkheader.pl 48000 240 ${BYTEPERFRAME} 9 tmp.cmp > cmp/${base}.cmp; \
rm -f tmp.mgc tmp.lf0 tmp.cmp; \
fi; \
else \
MGCDIM=`expr 34 + 1`; \
LF0DIM=1; \
BAPDIM=`expr 24 + 1`; \
MGCWINDIM=`expr 3 \* ${MGCDIM}`; \
LF0WINDIM=`expr 3 \* ${LF0DIM}`; \
BAPWINDIM=`expr 3 \* ${BAPDIM}`; \
MGCLF0WINDIM=`expr ${MGCWINDIM} + ${LF0WINDIM}`; \
BYTEPERFRAME=`expr 4 \* \( ${MGCWINDIM} + ${LF0WINDIM} + ${BAPWINDIM} \)`; \
if [ -s mgc/${base}.mgc -a -s lf0/${base}.lf0 -a -s bap/${base}.bap ]; then \
MGCWINS=""; \
i=1; \
while [ ${i} -le 3 ]; do \
eval MGCWINS=\"${MGCWINS} win/mgc.win${i}\"; \
i=`expr ${i} + 1`; \
done; \
/usr/bin/perl scripts/window.pl ${MGCDIM} mgc/${base}.mgc ${MGCWINS} > tmp.mgc; \
LF0WINS=""; \
i=1; \
while [ ${i} -le 3 ]; do \
eval LF0WINS=\"${LF0WINS} win/lf0.win${i}\"; \
i=`expr ${i} + 1`; \
done; \
/usr/bin/perl scripts/window.pl ${LF0DIM} lf0/${base}.lf0 ${LF0WINS} > tmp.lf0; \
BAPWINS=""; \
i=1; \
while [ ${i} -le 3 ]; do \
eval BAPWINS=\"${BAPWINS} win/bap.win${i}\"; \
i=`expr ${i} + 1`; \
done; \
/usr/bin/perl scripts/window.pl ${BAPDIM} bap/${base}.bap ${BAPWINS} > tmp.bap; \
/usr/local/bin/merge +f -s 0 -l ${LF0WINDIM} -L ${MGCWINDIM} tmp.mgc < tmp.lf0 > tmp.mgc+lf0; \
/usr/local/bin/merge +f -s 0 -l ${BAPWINDIM} -L ${MGCLF0WINDIM} tmp.mgc+lf0 < tmp.bap > tmp.cmp; \
/usr/bin/perl scripts/addhtkheader.pl 48000 240 ${BYTEPERFRAME} 9 tmp.cmp > cmp/${base}.cmp; \
rm -f tmp.mgc tmp.lf0 tmp.bap tmp.mgc+lf0 tmp.cmp; \
fi; \
fi; \
done
Composing training data for cmu_us_arctic_slt_*
# Extracting monophone and fullcontext labels
mkdir -p labels/mono
mkdir -p labels/full
if [ 1 -eq 1 ]; then \
for utt in utts/cmu_us_arctic_slt_*.utt; do \
base=`basename ${utt} .utt`; \
if [ -s ${utt} ]; then \
echo "Extracting labels from ${utt}"; \
/usr/local/TTS_System/Festival
TTS_2/festival/examples/ dumpfeats -eval scripts/extra_feats.scm -relation Segment -feats scripts/label.feats -output tmp.feats ${utt}; \ fi; \
if [ -s tmp.feats ]; then \
awk -f scripts/label-full.awk tmp.feats > labels/full/${base}.lab; \
awk -f scripts/label-mono.awk tmp.feats > labels/mono/${base}.lab; \
rm -f tmp.feats; \
fi; \
done; \
else \
for txt in txt/cmu_us_arctic_slt_*.txt; do \
base=`basename ${txt} .txt`; \
if [ -s ${txt} ]; then \
echo "Extracting labels from ${txt}"; \
/usr/bin/perl scripts/normtext.pl ${txt} > tmp.txt; \
/usr/local/TTS_System/Festival
TTS_2/festival/examples/ text2utt tmp.txt > tmp.utt; \ /usr/local/TTS_System/Festival
TTS_2/festival/examples/ dumpfeats -eval scripts/extra_feats.scm -relation Segment -feats scripts/label.feats -output tmp.feats tmp.utt; \ rm -f tmp.txt tmp.utt; \
fi; \
if [ -s tmp.feats ]; then \
awk -f scripts/label-full.awk tmp.feats > labels/full/${base}.lab; \
awk -f scripts/label-mono.awk tmp.feats > labels/mono/${base}.lab; \
rm -f tmp.feats; \
fi; \
done; \
fi
# Generating monophone and fullcontext Master Label Files (MLF)
echo "#!MLF!#" > labels/mono.mlf
echo "\"*/cmu_us_arctic_slt_*.lab\" -> \"/usr/local/TTS_System/HTS-de
mo_ARABIC/data/labels/mono\"" >> labels/mono.mlf echo "#!MLF!#" > labels/full.mlf
echo "\"*/cmu_us_arctic_slt_*.lab\" -> \"/usr/local/TTS_System/HTS-de
mo_ARABIC/data/labels/full\"" >> labels/full.mlf # Generating a fullcontext model list file
mkdir -p lists
rm -f tmp
for lab in labels/full/cmu_us_arctic_slt_
*.lab; do \ if [ -s ${lab} -a -s labels/mono/`basename ${lab}` -a -s cmp/`basename ${lab} .lab`.cmp ]; then \
sed -e "s/.* //g" ${lab} >> tmp; \
fi \
done
sort -u tmp > lists/full.list
sort: No such file or directory
make[1]: *** [list] Error 2
make: *** [data] Error 2
I don’t know why this happened! Is it because the (utterance file) and (gen label)? where I didn’t use them with the training?
I read the manual for Festival tool about creating utterance file, but the steps it was not clear. Thus, what is the step ? and which the files are required to create utterance file?