How To: Use HTK Hidden Markov modelling toolkit with SFS

$ tar xvfz HTK-3.3.tar.gz

export HTKCF='-O2 -DCYGWIN'
export HTKLF='-o a.out'
export HTKCC='gcc'
export HBIN='..'
export Arch=ASCII
export CPU=cygwin
export PATH=~/htk-3.3/bin.i686:$PATH

$ source ~/htk.env

$ source ~/htk.env
$ cd ~/htk-3.3
$ ./configure --prefix=`echo ~/htk-3.3`

CFLAGS = -Wall -Wno-switch -g -O2 -I. -DCYGWIN -DARCH=ASCII 

HSLab: HSLab.c $(HTKLIB)
	if [ ! -d $(bindir) ] ; then mkdir $(bindir) ; fi
	$(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS)
	$(INSTALL) -m 755 $@ $(bindir)

$ cd ~/htk-3.3
$ mkdir bin.i686
$ cd HTKLib
$ cp HGraf.null.c HGraf.c
$ cd ..
$ make

# doloadsfs.sh - load scribe data into new SFS files
for s in c e f h m
do
  for f in 0001 0002 0003 0004
  do
    hed -n ma$s.$f.sfs
    slink -isp -f 20000 c:/data/scribe/scribe/dr1/mt/ma$s/a${s}pa$f.pes \
                                                            ma$s.$f.sfs
    anload -S c:/data/scribe/scribe/dr1/mt/ma$s/a${s}pa$f.pea ma$s.$f.sfs
  done
done

$ mkdir tutorial1
$ cd tutorial1
$ sh doloadsfs.sh

$ apply voc19 ma*.sfs

/* ancollect.sml -- collect inventory of labels used */

/* table to hold annotation labels */
string    table[1:1000];
var    tcount;

/* function to check/add label */
function var checklabel(str)
{
    string str;

    if (entry(str,table)) return(0);
    tcount=tcount+1;
    table[tcount]=str;
    return(1);
}

/* for each input file */
main {
    var    i,num;

    num=numberof(".");
    for (i=1;i<=num;i=i+1) checklabel(matchn(".",i));
}

/* output sorted list */
summary {
    var    i,j;
    string    t;

    /* insertion sort */
    for (i=2;i<=tcount;i=i+1) {
        j=i;
        t=table[j];
        while (compare(t,table[j-1])<0) {
            table[j] = table[j-1];
            j=j-1;
            if (j==1) break;
        }
        table[j]=t;
    }

    /* output list */
    for (i=1;i<=tcount;i=i+1) print table[i],"\n";
}

$ sml ancollect.sml ma*.sfs >svumap.txt

# SIL
## SIL
%tc SIL
+ SIL
/ SIL
3: VOI
3:? UNV
3:a UNV
3:af UNV
3:f UNV
3:~ VOI
=l VOI
=lx VOI
=lx? VOI
=lxf UNV
=lxf0 UNV
=m VOI
=mf UNV
=n VOI 
...

$ apply "anmap -m svumap.txt" ma*.sfs

$ apply "colist -H" ma*.sfs

$ apply "anlist -h -O" ma*.sfs

# config.txt - HTK basic parameters
SOURCEFORMAT = HTK
TARGETKIND = FBANK
NATURALREADORDER = T

<BeginHMM>
<NumStates> 3 <VecSize> 19 <FBANK>
 <State> 2
  <Mean> 19
   0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
  <Variance> 19
   1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
 <TransP> 3
  0.0 1.0 0.0
  0.0 0.9 0.1
  0.0 0.0 1.0
<EndHMM>

mac.0002.dat
mac.0003.dat
mac.0004.dat
mae.0001.dat
mae.0003.dat
mae.0004.dat
maf.0001.dat
maf.0002.dat
maf.0004.dat
mah.0001.dat
mah.0002.dat
mah.0003.dat

# dotrain.sh
for s in SIL VOI UNV
do
    cp proto-1-19.hmm $s.hmm
    HRest -T 1 -C config.txt -S train.lst -l $s $s.hmm
done

SIL
VOI
UNV

SIL    SIL
VOI    VOI
UNV    UNV

$ HBuild phone.lst phone.net

VERSION=1.0
N=7    L=9    
I=0    W=!NULL               
I=1    W=!NULL               
I=2    W=SIL                 
I=3    W=VOI                 
I=4    W=UNV                 
I=5    W=!NULL               
I=6    W=!NULL               
J=0     S=0    E=1    l=0.00     
J=1     S=5    E=1    l=0.00     
J=2     S=1    E=2    l=-1.10    
J=3     S=1    E=3    l=-1.10    
J=4     S=1    E=4    l=-1.10    
J=5     S=2    E=5    l=0.00     
J=6     S=3    E=5    l=0.00     
J=7     S=4    E=5    l=0.00     
J=8     S=5    E=6    l=0.00     

mac.0001.dat
mae.0002.dat
maf.0003.dat
mah.0004.dat
mam.0001.dat
mam.0002.dat
mam.0003.dat
mam.0004.dat

# dotest.sh
HVite -T 1 -C config.txt -w phone.net -o S -S test.lst phone.dic phone.lst
for f in `cat test.lst`
do
    g=`echo $f | sed s/.dat//`
    anload -h $g.rec $g.sfs
done

$ ancomp -r an.02 -t an.03 -f mac.0001.sfs
      SIL  UNV  VOI
SIL: 1114   31    6
UNV:   57  639  100
VOI:  140  138 1514

# doperf.sh
(for f in `cat test.lst`
do
    g=`echo $f|sed s/.dat//`
    ancomp -r an.02 -t an.03 -f -m - $g.sfs
done) | conmat -esl

$ sh doperf.sh
Processing date     : Mon Jun 28 12:44:05 2004
Confusion data from : stdin

    Confusion Matrix

     |  SIL   UNV   VOI
-----+-----------------
  SIL| 8130   855   151   9136 total  88%
  UNV|  888  5010  1631   7529 total  66%
  VOI|  736   863 12298  13897 total  88%

Number of matches = 30562
Recognition rate  =  83.2%

$ ancomp -r an.01 -t an.03 -f mac.0001.sfs
       SIL UNV VOI
    #:  12   1   0
   ##: 903   2   0
    +:  10   7   0
    /:   0   0   0
   3::   0   0  28
  3:?:   0   0   4
   =n:  15   6  25
  =nf:   0   0   1
    @:   1   6 144
   @?:   2   7  15
  @U@:   0   1  28
  @UU:   0   0  18
   @f:   0   8  11
   @~:   0   1  65
   A::   0   1  54
  A:f:   0   0   1
  A:~:   0   0   3
    D:  11   5  14
...

# dogetnames.sh - get basenames of files for training and testing
rm -f basetrain.lst
for d in c:/data/wsjcam0/si_tr/C0*
do
  echo processing $d
  for f in $d/???C*.PHN
  do
    g=`echo $f | sed s/.PHN//`
    if test -e $g.WV1
    then
       h=`echo $g | sed s%c:/data/wsjcam0/si_tr/%%`
        echo $h >>basetrain.lst
    fi
  done
done
rm -f basetest.lst
for d in c:/data/wsjcam0/si_tr/C1[0-9]
do
  echo processing $d
  for f in $d/???C*.PHN
  do
    g=`echo $f | sed s/.PHN//`
    if test -e $g.WV1
    then
        h=`echo $g | sed s%c:/data/wsjcam0/si_tr/%%`
        echo $h >>basetest.lst
    fi
  done
done

# domakesfs.sh
#
# 1. make training and testing directories
#
mkdir train test
for d in 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
do
  mkdir train/C0$d
done
for d in 0 1 2 3 4 5 6 7 8 9
do
  mkdir test/C1$d
done
#
# 2. convert audio and labels to SFS
#
for f in `cat basetrain.lst`
do
  hed -n train/$f.sfs
  cnv2sfs c:/data/wsjcam0/si_tr/$f.wv1 train/$f.sfs
  anload -f 16000 -s c:/data/wsjcam0/si_tr/$f.phn train/$f.sfs
done
for f in `cat basetest.lst`
do
  hed -n test/$f.sfs
  cnv2sfs c:/data/wsjcam0/si_tr/$f.wv1 test/$f.sfs
  anload -f 16000 -s c:/data/wsjcam0/si_tr/$f.phn test/$f.sfs
done

$ mkdir tutorial2
$ cd tutorial2
$ sh dogetnames.sh
$ sh domakesfs.sh

# domakedat.sh
#
rm -f train.lst
for f in `cat basetrain.lst`
do
  mfcc -n12 -e -l100 -h6000 train/$f.sfs
  colist -H train/$f.sfs
  echo train/$f.dat >>train.lst
done
rm -f test.lst
for f in `cat basetest.lst`
do
  mfcc -n12 -e -l100 -h6000 test/$f.sfs
  colist -H test/$f.sfs
  echo test/$f.dat >>test.lst
done

/* makemlf.sml - make HTK MLF file from files */

/* table to hold annotation labels */
string    table[1:1000];
var    tcount;

/* MLF file */
file    op;

/* function to check/add label */
function var checklabel(str)
{
    string str;

    if (entry(str,table)) return(0);
    tcount=tcount+1;
    table[tcount]=str;
    return(1);
}

/*initialise */
init {
    openout(op,"phone.mlf");
    print#op "#!MLF!#\n";
}

/* for each input file */
main {
    var        i,num;
    string     basename;
    string    label;

    /* print filename */
    print $filename,"\n"
    i=index("\.",$filename);
    if (i) basename=$filename:1:i-1 else basename=$filename;
    print#op "\"",basename,".lab\"\n";

    /* print annotations */
    num=numberof(".");
    for (i=1;i<=num;i=i+1) {
        label = matchn(".",i);
        print#op label,"\n";
        checklabel(label);
    }
    print#op ".\n"
}

/* output phone list and dictionary */
summary {
    var    i,j;
    string    t;

    /* insertion sort */
    for (i=2;i<=tcount;i=i+1) {
        j=i;
        t=table[j];
        while (compare(t,table[j-1])<0) {
            table[j] = table[j-1];
            j=j-1;
            if (j==1) break;
        }
        table[j]=t;
    }

    /* close MLF file */
    close(op);

    /* write phone list */
    openout(op,"phone.lst");
    for (i=1;i<=tcount;i=i+1) print#op table[i],"\n";
    close(op);

    /* write phone+ list */
    openout(op,"phone+.lst");
    print#op "!ENTER\n";
    print#op "!EXIT\n";
    for (i=1;i<=tcount;i=i+1) print#op table[i],"\n";
    close(op);

    /* write phone dictionary */
    openout(op,"phone.dic");
    print#op "!ENTER []\n";
    print#op "!EXIT []\n";
    for (i=1;i<=tcount;i=i+1) print#op table[i],"\t",table[i],"\n";
    close(op);    
}

$ sml -f makemlf.sml train test

# config.txt - HTK basic parameters
SOURCEFORMAT = HTK
TARGETKIND = MFCC_E
NATURALREADORDER = T

<BeginHMM>
<NumStates> 5 <VecSize> 13 <MFCC_E>
 <State> 2
  <Mean> 13
   0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
  <Variance> 13
   1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
 <State> 3
  <Mean> 13
   0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
  <Variance> 13
   1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
 <State> 4
  <Mean> 13
   0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
  <Variance> 13
   1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
 <TransP> 5
  0.0 1.0 0.0 0.0 0.0
  0.0 0.6 0.4 0.0 0.0
  0.0 0.0 0.6 0.4 0.0
  0.0 0.0 0.0 0.7 0.3
  0.0 0.0 0.0 0.0 1.0
<EndHMM>

$ HCompV -T 1 -C config.txt -m -S train.lst -o proto.hmm proto-3-13.hmm

# domakehmm.sh
HCompV -T 1 -C config.txt -m -S train.lst -o proto.hmm proto-3-13.hmm
head -3 proto.hmm > hmmdefs
for s in `cat phone.lst`
do
  echo "~h \"$s\"" >>hmmdefs
  gawk '/BEGINHMM/,/ENDHMM/ { print $0 }' proto.hmm >>hmmdefs
done

$ HERest -C config.txt -I phone.mlf -S train.lst -H hmmdefs phone.lst

$ HLStats -T 1 -C config.txt -b phone.big -o phone.lst phone.mlf
$ HBuild -T 1 -C config.txt -n phone.big phone+.lst phone.net

$ HVite -T 1 -C config.txt -H hmmdefs -S test.lst -i recout.mlf \
-w phone.net phone.dic phone.lst

$ HResults -I phone.mlf phone.lst recout.mlf
====================== HTK Results Analysis =======================
  Date: Thu Jul  1 09:10:58 2004
  Ref : phone.mlf
  Rec : recout.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=0.00 [H=0, S=903, N=903]
WORD: %Corr=48.03, Acc=41.52 [H=31884, D=10898, S=23605, I=4319, N=66387]
===================================================================

# dotrainrec.sh
rm -f log
for n in 1 2 3 4 5 6 7 8 9 10
do
  HERest -T 1 -C config.txt -I phone.mlf -S train.lst -H hmmdefs phone.lst
  HVite -T 1 -C config.txt -H hmmdefs -S test.lst -i recout.mlf \
  -w phone.net phone.dic phone.lst
  echo "Cycle $n:" >>log
  HResults -I phone.mlf phone.lst recout.mlf >>log
done

/* codist.sml - plot distributions of MFCC data values */

/* raw data */
var    rdata[12,100000];
var    rcount;

/* distributions */
stat    rst[12];

/* segment label to analyse */
string    label;

/* graphics output */
file gop;

/* normal distribution */
function var normal(st,x)
stat st;
{
    var x;
    x = x - st.mean;
    return(exp(-0.5*x*x/st.variance)/sqrt(2*3.14159*st.variance));
}

/* plot histogram overlaid with normal distribution */
function var plotdist(gno,st,tab,tcnt)
stat st;
var tab[];
{
    var gno;
    var tcnt;
    var i,j,nbins,bsize;
    var hist[0:100];
    var xdata[1:2];
    var ydata[0:10000];

    /* find maximum and minimum in table */
    xdata[1]=tab[gno,1];
    xdata[2]=tab[gno,1];
    for (i=2;i<=tcnt;i=i+1) {
        if (tab[gno,i] < xdata[1]) xdata[1]=tab[gno,i];
        if (tab[gno,i] > xdata[2]) xdata[2]=tab[gno,i];
    }

    /* set up x-axes */    
    plotxdata(xdata,1)

    /* estimate bin size */
    nbins = sqrt(tcnt);
    if (nbins > 100) nbins=100;
    bsize = (xdata[2]-xdata[1])/nbins;

    /* calculate histogram */
    for (i=1;i<=tcnt;i=i+1) {
        j=trunc((tab[gno,i]-xdata[1])/bsize);
        hist[j]=hist[j]+1/tcnt;
    }

    /* plot histogram */
    plotparam("title=C"++istr(gno));
    plotparam("type=hist");
    plot(gop,gno,hist,nbins);

    /* plot normal distribution */
    plotparam("type=line");
    for (i=0;i<=10*nbins;i=i+1) ydata[i]=bsize*normal(st,(xdata[1]+i*bsize/10));
    plot(gop,gno,ydata,10*nbins);
}

/* get segment name */
init {
    print#stderr "For segment : ";
    input label;
}

/* for each input file */
main {
    var i,j,num
    var    t,et;

    if (rcount >= 100000) break;

    num=numberof(label);
    for (i=1;i<=num;i=i+1) {
        t = next(CO,timen(label,i));
        et = t + lengthn(label,i);
        while (t < et) {
            if (rcount >= 100000) break;
            rcount=rcount+1;
            for (j=1;j<=12;j=j+1) {
                rdata[j,rcount] = co(4+j,t);
                rst[j] += co(4+j,t);
            }
            t = next(CO,t);
        }
    }
}

/* plot */
summary {
    var j;

    openout(gop,"|dig -g -s 500x375 -o dig.gif");
    plottitle(gop,"MFCC Distributions for /"++label++"/");
    plotparam("horizontal=4");
    plotparam("vertical=3");

    for (j=1;j<=12;j=j+1) plotdist(j,rst[j],rdata,rcount);
}

$ sml -f codist.sml train
For segment : r

MU 2 {*.state[2-4].mix}

$ HHed -H hmmdefs mix2.hed phone.lst

$ cp test/c10/c10c020v.sfs .
$ anload -H recout.mlf test/c10/c10c020v.rec c10c020v.sfs
$ eswin -isp -aan c10c020v.sfs

$ cp test/c10/c10c020v.sfs .
$ anload -H recout.mlf test/c10/c10c020v.rec c10c020v.sfs
$ ancomp -l c10c020v.sfs
Subst=21 Delete=6 Insert=5 Total=64 Accuracy=59.4%

# doancomp.sh
#
# collect mappings
(for f in `cat basetest.lst`
do
    cp test/$f.sfs temp.sfs
    anload -H recout.mlf test/$f.rec temp.sfs
    ancomp -l -m - temp.sfs
done) >ancomp.lst
rm temp.sfs
#
# build confusion matrix
conmat -esl ancomp.lst >conmat.lst

$ gawk '{ if ( $1 != $2 ) print $0 }' ancomp.lst | sort | uniq -c | \
  sort -rn | head -20
    882 [] sil
    820 ax []
    724 t []
    616 ih []
    468 ih ax
    415 n m
    409 ih iy
    373 ih uw
    373 d []
    367 n []
    325 s z
    313 l []
    289 r []
    283 t s
    278 n ng
    268 dh []
    266 [] d
    264 l ao
    262 ax ih
    262 ax ah

ZERO           z ia r ow
ZERO           ow
ONE            w ah n
TWO            t uw
THREE          th r iy
FOUR           f ao
FOUR           f ao r
FIVE           f ay v
SIX            s ih k s
SEVEN          s eh v n
EIGHT          ey t
NINE           n ay n
WHAT-IS        w oh t ih z
PLUS           p l ah s
MINUS          m ay n ax s
TIMES          t ay m z
DIVIDED-BY     d ih v ay d ih d b ay
SIL    []      sil

$digit = ONE | TWO | THREE | FOUR | FIVE | SIX | SEVEN | EIGHT | NINE | ZERO;
$operation = PLUS | MINUS | TIMES | DIVIDED-BY;
( SIL WHAT-IS <$digit> $operation <$digit> SIL )

$ HParse digits.grm digits.net 

$ HVite -T 1 -C config.txt -H hmmdefs -w digits.net digits.dic \
  phone.lst inp.dat

# doreclive.sh
rm -f inp.sfs
hed -n inp.sfs >/dev/null
echo "To STOP this script, type CTRL/C"
#
remove -e inp.sfs >NUL
echo "***** Say Word *****"
while record -q -e -f 16000 inp.sfs
do
  replay inp.sfs
  mfcc -n12 -e -l100 -h6000 inp.sfs 
  colist -H inp.sfs
  HVite -T 1 -C config.txt -H hmmdefs -w digits.net digits.dic \
    phone.lst inp.dat
  remove -e inp.sfs >NUL
  echo "***** Say Word *****"
done

$ mfcc -n12 -e -l100 -h6000 six.sfs

$ anload -t phone -T "sil s ih k s p l ah s th r iy iy k w ax l z n ay n sil" six.sfs

$ colist -H six.sfs
$ anlist -h -O six.sfs

$ HVite -C config.txt -a -o SM -H hmmdefs phone.dic phone.lst six.dat

$ anload -h six.rec six.sfs

AFTER       aa f t ax
AFTER       ae f t ax
TEA         t iy
FATHER      f aa dh ax
FATHER      f ae dh ax
FED         f eh d
THE         dh ax
CAT         k aa t
CAT         k ae t
SIL    []   sil

( SIL AFTER TEA FATHER FED THE CAT SIL )

$ HParse accents.grm accents.net

$ mfcc -n12 -e -l100 -h6000 brm.sfs
$ mfcc -n12 -e -l100 -h6000 sse.sfs
$ colist -H brm.sfs
$ colist -H sse.sfs

$ HVite -C config.txt -H hmmdefs -w accents.net -m -o ST accents.dic \
  phone.lst brm.dat sse.dat

sil SIL
ae AFTER
f
t
ax
t TEA
iy
f FATHER
aa
dh
ax
f FED
eh
d
dh THE
ax
k CAT
ae
t
sil SIL

sil SIL
aa AFTER
f
t
ax
t TEA
iy
f FATHER
aa
dh
ax
f FED
eh
d
dh THE
ax
k CAT
ae
t
sil SIL

$ mfcc -n12 -e -l 100 -h 6000 dysfluent.sfs
$ colist -H dysfluent.sfs
$ HBuild phone.lst phone.net
$ HVite -C config.txt -H hmmdefs -w phone.net -o S phone.dic phone.lst dysfluent.dat
$ anload -h dysfluent.rec dysfluent.sfs

/* dysfind.sml - find dysfluencies from phone recogniser output */

/* input and output annotation sets */
item    ian;
item    oan;

/* table to hold dysfluent events */
var    times[1000,2];
var    tcount;

/* add times of dysfluencies to table */
function var addtime(posn,size)
{
    var posn,size;
    var i;

    /* put event in sorted position */
    i=tcount+1;
    if (i > 1) {
        while (posn < times[i-1,1]) {
            times[i,1] = times[i-1,1];
            times[i,2] = times[i-1,2];
            i = i - 1;
            if (i==1) break;
        }
    }
    times[i,1]=posn;
    times[i,2]=size;
    tcount=tcount+1;
}

/* check for continuant */
function var iscontinuant(label)
{
    string label;

    if (compare(label,"sil")==0) return(0);
    if (index("^[ptkpbg]$",label)) return(0);
    if (compare(label,"j")==0) return(0);
    if (compare(label,"ch")==0) return(0);
    return(1);
}

/* process each input file */
main {
    var    i,numf,fdur,dmin;
    var ocnt,size;
    string lab1,lab2,lab3;

    /* get input & output */
    sfsgetitem(ian,$filename,str(selectitem(AN),4,2));
    numf=sfsgetparam(ian,"numframes");
    fdur=sfsgetparam(ian,"frameduration");
    sfsnewitem(oan,AN,fdur,sfsgetparam(ian,"offset"),1,numf);

    /* put minimum dysfluency length = 0.33s */
    dmin = 0.33/fdur;

    /* look for long continuant annotations */
    tcount=0;
    for (i=1;i<=numf;i=i+1) {
        size = sfsgetfield(ian,i,1);
        if (size > dmin) {
            lab1 = sfsgetstring(ian,i);
            if (iscontinuant(lab1)==1) {
                addtime(sfsgetfield(ian,i,0),size);
            }
        }
    }

    /* look for patterns like AA */
    for (i=2;i<=numf;i=i+1) {
        lab1 = sfsgetstring(ian,i-1);
        lab2 = sfsgetstring(ian,i);
        if (compare(lab1,lab2)==0) {
            size = sfsgetfield(ian,i-1,1)+sfsgetfield(ian,i,1);
            if (size > dmin) {
                if (iscontinuant(lab1)==1) {
                    addtime(sfsgetfield(ian,i-1,0),size);
                }
            }
        }
    }

    /* look for patterns like AAA */
    for (i=3;i<=numf;i=i+1) {
        lab1 = sfsgetstring(ian,i-2);
        lab2 = sfsgetstring(ian,i-1);
        if (compare(lab1,lab2)==0) {
            lab3 = sfsgetstring(ian,i);
            if (compare(lab1,lab3)==0) {
                size = sfsgetfield(ian,i-2,1)+sfsgetfield(ian,i-1,1)+sfsgetfield(ian,i,1);
                if (size > dmin) {
                    if (iscontinuant(lab1)==1) {
                        addtime(sfsgetfield(ian,i,0),size);
                    }
                }
            }
        }
    }

    /* look for patterns like ABABA */
    for (i=5;i<=numf;i=i+1) {
        lab1 = sfsgetstring(ian,i-4);
        lab2 = sfsgetstring(ian,i-2);
        if ((compare(lab1,lab2)==0)&&(compare(lab1,"sil")!=0)) {
            lab3 = sfsgetstring(ian,i);
            if (compare(lab1,lab3)==0) {
                lab1 = sfsgetstring(ian,i-3);
                lab2 = sfsgetstring(ian,i-1);
                if (compare(lab1,lab2)==0) {
                    size = sfsgetfield(ian,i,0)+sfsgetfield(ian,i,1)-sfsgetfield(ian,i-4,0);
                    addtime(sfsgetfield(ian,i-4,0),size);
                }
            }
        }
    }

    /* convert times to annotations (ignoring overlaps) */
    ocnt=0;
    for (i=1;i<=tcount;i=i+1) {
        sfssetfield(oan,ocnt,0,times[i,1]);
        sfssetfield(oan,ocnt,1,times[i,2]);
        sfssetstring(oan,ocnt,"(D)");
        ocnt=ocnt+1;
        if ((i==tcount)||(times[i+1,1]>times[i,1]+times[i,2])) {
            sfssetfield(oan,ocnt,0,times[i,1]+times[i,2]);
            sfssetfield(oan,ocnt,1,times[i+1,1]-times[i,1]-times[i,2]);
            sfssetstring(oan,ocnt,"/");
            ocnt=ocnt+1;
        }
    }

    /* save output back to file */
    sfsputitem(oan,$filename,ocnt);
}

$ sml -ian^anload dysfind.sml dysfluent.sfs

/* dysmark.sml - mark performance of dysfluency recogniser */

var mtab[1000,3];        /* time of each reference event */
var mcnt                /* number of reference events */
var nhit,nmiss,nfa        /* # hits, misses, false alarms */
var rtotal;                /* total reference events */
var ttotal;                /* total recognised events */

/* compare a recognised event to reference table */
function var markevent(t1,t2)
{
    var    t1,t2
    var    i

    for (i=1;i<=mcnt;i=i+1) {
        /* check for any overlap in time */
        if ((mtab[i,1]<=t1)&&(t1*lt;=mtab[i,2])) {
            nhit=nhit+1;
            mtab[i,3]=1;
            return(1);
        }
        if ((mtab[i,1]<=t2)&&(t2<=mtab[i,2])) {
            nhit=nhit+1;
            mtab[i,3]=1;
            return(1);
        }
    }
    /* must be a false alarm */
    nfa=nfa+1;
    return(0);
}

main {
    string    match
    var i,num
    var    t1,t2;

    /* load reference annotations into table */
    selectmatch("an^*fluency*word");
    match="^.*\(.*\).*$";
    num=numberof(match);
    for (i=1;i<=num;i=i+1) {
        mtab[i,1] = timen(match,i);
        mtab[i,2] = mtab[i,1] + lengthn(match,i);
        mtab[i,3] = 0;
    }
    mcnt=num;

    /* compare to recognised annotations */
    selectmatch("an^*dysfind");
    num=numberof("\(D\)");
    for (i=1;i<=num;i=i+1) {
        t1=timen("\(D\)",i);
        t2=t1+lengthn("\(D\)",i);
        markevent(t1,t2);
    }

    /* count misses */
    for (i=1;i<=mcnt;i=i+1) if (mtab[i,3]==0) nmiss=nmiss+1;

    /* update totals */
    rtotal = rtotal + mcnt;
    ttotal = ttotal + num;
}

summary {
    print "Total reference    = ",rtotal:1,"\n";
    print "Total recognised   = ",ttotal:1,"\n";
    print "Total hits         = ",nhit:1,"\n";
    print "Total misses       = ",nmiss:1,"\n";
    print "Total false alarms = ",nfa:1,"\n";
}

$ sml dysmark.sml dysfluent.sfs
Total reference    = 36
Total recognised   = 12
Total hits         = 2
Total misses       = 34
Total false alarms = 10

Speaker	Training		Testing
Speaker	Signal	Label	Signal	Label
mac	acpa0002.pes acpa0003.pes acpa0004.pes	acpa0002.pea acpa0003.pea acpa0004.pea	acpa0001.pes	acpa0001.pea
mae	aepa0001.pes aepa0003.pes aepa0004.pes	aepa0001.pea aepa0003.pea aepa0004.pea	aepa0002.pes	aepa0002.pea
maf	afpa0001.pes afpa0002.pes afpa0004.pes	afpa0001.pea afpa0002.pea afpa0004.pea	afpa0003.pes	afpa0003.pea
mah	ahpa0001.pes ahpa0002.pes ahpa0003.pes	ahpa0001.pea ahpa0002.pea ahpa0003.pea	ahpa0004.pes	ahpa0004.pea
mam			ampa0001.pes ampa0002.pes ampa0003.pes ampa0004.pes	ampa0001.pea ampa0002.pea ampa0003.pea ampa0004.pea

Speech Filing System

How To: Use HTK Hidden Markov modelling toolkit with SFS

1. Installation

Installation of CYGWIN

Installation of a Text Editor

Installation of HTK

Installation of SFS

2. Phone-class recognition

Source data

Loading source data into SFS

Data Preparation

Export of data to HTK format

HTK configuration

HTK training

HTK testing

Performance evaluation

3. Phone recognition

Source Data

Making HTK data files

Training HMMs

Parameter Analysis

Viewing recognition results in SFS

Enhancements

4. Word recognition

Dictionary

Grammar

Word recogniser

Enhancement

5. Phone alignment

6. Pronunciation variation analysis

7. Dysfluency recognition

Bibliography

Feedback

Some other pages on our site you may enjoy:

PROREC - Speech Prompt & Record System

WASP - Record & display speech signals

`brm.rec`		`sse.rec`
sil SIL ae AFTER f t ax t TEA iy f FATHER aa dh ax f FED eh d dh THE ax k CAT ae t sil SIL		sil SIL aa AFTER f t ax t TEA iy f FATHER aa dh ax f FED eh d dh THE ax k CAT ae t sil SIL