Content - e943bb9d676d314b9a64115f76698ab8a6346f98 - d2af955/Documentation/CNTK-TechReport/lyx/CNTKBook_ExampleSetup_Chapter.lyx

visit type:
Tip revision: e8e69302885e62a437c84c8a4c22f69a19ef4ed9 authored by Nikos Karampatziakis on 11 June 2017, 03:11:12 UTC
fix hostname
Tip revision: e8e6930
CNTKBook_ExampleSetup_Chapter.lyx
#LyX 2.1 created this file. For more info see http://www.lyx.org/
\lyxformat 474
\begin_document
\begin_header
\textclass extbook
\begin_preamble
\usepackage{algorithm}
\usepackage{algpseudocode}  
\end_preamble
\use_default_options false
\master CNTKBook-master.lyx
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman default
\font_sans default
\font_typewriter default
\font_math auto
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize 11
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 2
\use_package cancel 0
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 0
\use_package mhchem 1
\use_package stackrel 0
\use_package stmaryrd 0
\use_package undertilde 0
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\listings_params "basicstyle={\small},breaklines=true,frame=tb"
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Chapter
Example Setups
\begin_inset CommandInset label
LatexCommand label
name "chap:ExampleSetup"

\end_inset


\end_layout

\begin_layout Section
Acoustic Model
\end_layout

\begin_layout Standard
In this section we describe how CNTK can be used to build several standard
 models that can be used for acoustic modeling for speech recognition applicatio
ns.
 All examples are based on the TIMIT corpus for phonetic recognition but
 can easily be modified for use for large vocabulary continuous speech recogniti
on.
 The only significant change is that context-independent phonetic states
 used in the TIMIT example would be replaced by context-dependent senone
 targets for large vocabulary tasks.
 We note that these examples are not meant to be representative of state
 of the art performance, but rather to demonstrate how CNTK can be used
 in a variety of speech recognition applications.
 All examples are located in the ExampleSetups folder (ExampleSetups
\backslash
ASR
\backslash
TIMIT) 
\end_layout

\begin_layout Subsection
Training a DNN with SimpleNetworkBuilder
\begin_inset Index idx
status open

\begin_layout Plain Layout
SimpleNetworkBuilder
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The simplest way to create an acoustic model with CNTK is to use the SimpleNetwo
rkBuilder.
 This network builder constructs a fully-connected feed-forward network
 of user-specified depth and size.
 The configuration file is shown below.
 As you can see there are several key blocks of data specified
\end_layout

\begin_layout Itemize

\emph on
SimpleNetworkBuilder
\emph default
: the overall network topology is specified here using the layerSizes parameter.
 In this example, the network has 792 inputs (an 11-frame context window
 of a 72-dimensional feature vector), 3 hidden layers of 512 sigmoidal neurons
 and 183 outputs, corresponding to the TIMIT phoneme states (3 x 61 phonemes).
 The cross entropy is the criterion for training and the frame error rate
 will also be monitored during training using the evalCriterion parameter.
 The input data will be mean and variance normalized since applyMeanVarNorm
 has been set to true.
 In addition, if needPrior is set to true, the prior probabilities of the
 labels will be computed and a ScaledLogLikelihood node in the network will
 be automatically created.
 This is important if this network will be used to generate acoustic scores
 in a speech recognition decoder.
 
\end_layout

\begin_layout Itemize

\emph on
SGD
\emph default

\begin_inset Index idx
status open

\begin_layout Plain Layout
SGD
\end_layout

\end_inset

: this block specifies the parameters for stochastic gradient descent optimizati
on.
 In this example, a total of 25 epochs will be run using a fixed learning
 schedule, with a learning rate of 0.8 for the first epoch, 3.2 for the next
 14 epochs, and then 0.08 for all remaining epochs.
 A minibatch size of 256 will be used for the first epoch, and all remaining
 epochs will use a minibatch size of 1024.
 A momentum value of 0.9 will also be used.
\end_layout

\begin_layout Itemize

\emph on
reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

:
\emph default
 this example uses the HTKMLFReader
\begin_inset Index idx
status open

\begin_layout Plain Layout
HTKMLFReader
\end_layout

\end_inset

.
 For SimpleNetworkBuilder, the inputs must be called 
\begin_inset Quotes eld
\end_inset

features
\begin_inset Quotes erd
\end_inset

 and the output labels must be called 
\begin_inset Quotes eld
\end_inset

labels
\begin_inset Quotes erd
\end_inset

.
 The scpFile
\begin_inset Index idx
status open

\begin_layout Plain Layout
scpFile
\end_layout

\end_inset

 contains the list of files to be processed and the mlfFile contains the
 labels for these files.
 More details about the HTKMLFReader can be found in Section 1.4.2.
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

TIMIT_TrainSimple=[     
\end_layout

\begin_layout Plain Layout

  action="train"
\end_layout

\begin_layout Plain Layout

  modelPath="$ExpDir$
\backslash
TrainSimpleNetwork
\backslash
model
\backslash
cntkSpeech.dnn"
\end_layout

\begin_layout Plain Layout

  deviceId=$DeviceNumber$
\end_layout

\begin_layout Plain Layout

  traceLevel=1
\end_layout

\begin_layout Plain Layout

  SimpleNetworkBuilder=[
\end_layout

\begin_layout Plain Layout

    layerSizes=792:512*3:183
\end_layout

\begin_layout Plain Layout

    trainingCriterion="CrossEntropyWithSoftmax"
\end_layout

\begin_layout Plain Layout

    evalCriterion="ClassificationError"
\end_layout

\begin_layout Plain Layout

    layerTypes="Sigmoid"
\end_layout

\begin_layout Plain Layout

    initValueScale=1.0
\end_layout

\begin_layout Plain Layout

    applyMeanVarNorm=true
\end_layout

\begin_layout Plain Layout

    uniformInit=true
\end_layout

\begin_layout Plain Layout

    needPrior=true
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

  SGD=[
\end_layout

\begin_layout Plain Layout

    epochSize=0
\end_layout

\begin_layout Plain Layout

    minibatchSize=256:1024
\end_layout

\begin_layout Plain Layout

    learningRatesPerMB=0.8:3.2*14:0.08
\end_layout

\begin_layout Plain Layout

    momentumPerMB=0.9
\end_layout

\begin_layout Plain Layout

    maxEpochs=25
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

  reader=[
\end_layout

\begin_layout Plain Layout

    readerType="HTKMLFReader"
\end_layout

\begin_layout Plain Layout

    readMethod="rollingWindow"
\end_layout

\begin_layout Plain Layout

    miniBatchMode="partial"
\end_layout

\begin_layout Plain Layout

    randomize="auto"
\end_layout

\begin_layout Plain Layout

    verbosity=1
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

    features=[
\end_layout

\begin_layout Plain Layout

      dim=792
\end_layout

\begin_layout Plain Layout

      scpFile="$ScpDir$
\backslash
TIMIT.train.scp.fbank.fullpath"
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  
\end_layout

\begin_layout Plain Layout

    labels=[
\end_layout

\begin_layout Plain Layout

      mlfFile="$MlfDir$
\backslash
TIMIT.train.align_cistate.mlf.cntk"
\end_layout

\begin_layout Plain Layout

      labelDim=183
\end_layout

\begin_layout Plain Layout

      labelMappingFile="$MlfDir$
\backslash
TIMIT.statelist"
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

]  
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Adapting the learning rate based on development data
\end_layout

\begin_layout Standard
In addition to using a fixed learning schedule, CNTK can adjust the learning
 rate based on the performance on a held out development set.
 To do this, the development set needs to be specified using a second data
 reader known as a 
\begin_inset Quotes eld
\end_inset

cvReader
\begin_inset Quotes erd
\end_inset

 and the appropriate learning rate adjustment parameters need to specified
 using an 
\begin_inset Quotes eld
\end_inset

AutoAdjust
\begin_inset Index idx
status open

\begin_layout Plain Layout
autoAdjust
\end_layout

\end_inset


\begin_inset Quotes erd
\end_inset

 configuration block within the SGD configuration block.
 For example, the following cvReader could be added to the configuration
 file in the previous example as follows:
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

  cvReader=[
\end_layout

\begin_layout Plain Layout

    readerType="HTKMLFReader"
\end_layout

\begin_layout Plain Layout

    readMethod="rollingWindow"
\end_layout

\begin_layout Plain Layout

    miniBatchMode="partial"
\end_layout

\begin_layout Plain Layout

    randomize="auto"
\end_layout

\begin_layout Plain Layout

    verbosity=1
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

    features=[
\end_layout

\begin_layout Plain Layout

      dim=792
\end_layout

\begin_layout Plain Layout

      scpFile="$ScpDir$
\backslash
TIMIT.dev.scp.fbank.fullpath"
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  
\end_layout

\begin_layout Plain Layout

    labels=[
\end_layout

\begin_layout Plain Layout

      mlfFile="$MlfDir$
\backslash
TIMIT.dev.align_cistate.mlf.cntk"
\end_layout

\begin_layout Plain Layout

      labelDim=183
\end_layout

\begin_layout Plain Layout

      labelMappingFile="$MlfDir$
\backslash
TIMIT.statelist"
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The learning rate adjustment is specified within the SGD block as follows.
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

  SGD=[
\end_layout

\begin_layout Plain Layout

    epochSize=0
\end_layout

\begin_layout Plain Layout

    minibatchSize=256:1024
\end_layout

\begin_layout Plain Layout

    learningRatesPerMB=0.8:3.2*14:0.08
\end_layout

\begin_layout Plain Layout

    momentumPerMB=0.9
\end_layout

\begin_layout Plain Layout

    maxEpochs=25
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

    AutoAdjust=[
\end_layout

\begin_layout Plain Layout

      autoAdjustLR="adjustAfterEpoch"
\end_layout

\begin_layout Plain Layout

      reduceLearnRateIfImproveLessThan=0
\end_layout

\begin_layout Plain Layout

      loadBestModel=true
\end_layout

\begin_layout Plain Layout

      increaseLearnRateIfImproveMoreThan=1000000000
\end_layout

\begin_layout Plain Layout

      learnRateDecreaseFactor=0.5
\end_layout

\begin_layout Plain Layout

      learnRateIncreaseFactor=1.382
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In this example, the learning rate will be reduced by a factor of 0.5 if
 the error on the held out data gets worse.
 While the learning rate can also be increased based on the performance
 on a held-out set, this is effectively turned off by setting the criterion
 for increasing the learning rate to a high value.
 
\end_layout

\begin_layout Subsection
Training a DNN with NDLNetworkBuilder
\begin_inset Index idx
status open

\begin_layout Plain Layout
NDLNetworkBuilder
\end_layout

\end_inset


\end_layout

\begin_layout Standard
While standard feedforward architectures are simple to specify with SimpleNetwor
kBuilder, NDLNetworkBuilder can be used when alternative structures are
 desired.
 In this case, the SimpleNetworkBuilder configuration block in the previous
 example is replaced with an NDLNetworkBuilder block.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

  NDLNetworkBuilder=[
\end_layout

\begin_layout Plain Layout

    ndlMacros="$NdlDir$
\backslash
default_macros.ndl"
\end_layout

\begin_layout Plain Layout

    networkDescription="$NdlDir$
\backslash
classify.ndl"
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In this example, a file contained several NDL macros will be loaded and
 then the NDL file containing the actual network description will be loaded.
 Note that macros can be defined in both files.
 For example, commonly used macros used across of variety of experiments
 can be specified by ndlMacros while macros specific to a particular network
 description can be specified within the networkDescription file.
 This example creates the exact same network as the first case, but demonstrates
 how it can be done using NDL.
 
\end_layout

\begin_layout Standard
One key thing to note is that when NDL is used, there are no longer restrictions
 on the names for the network's inputs and outputs.
 In this case, the inputs are associated with a node called 
\begin_inset Quotes eld
\end_inset

myFeatures
\begin_inset Quotes erd
\end_inset

 and the labels are associated with a node called 
\begin_inset Quotes eld
\end_inset

myLabels
\begin_inset Quotes erd
\end_inset

.
 Note that these new node names need to be used in the 
\emph on
reader
\emph default
 block in the main configuration file.
 
\end_layout

\begin_layout Subsection
Training an autoencoder
\begin_inset Index idx
status open

\begin_layout Plain Layout
autoencoder
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Training an autoencoder is straightforward using either SimpleNetworkBuilder
 or NDLNetworkBuilder by using the SquareError
\begin_inset Index idx
status open

\begin_layout Plain Layout
SquareError
\end_layout

\end_inset

 criterion node rather than the CrossEntropyWithSoftmax criterion node.
 In this example, the network is constructed with NDL.
 Below is a snippet from the NDL file for this example.
 This autoencoder has three hidden layers including a middle bottleneck
 layer of 64 neurons.
 A macro is defined to perform mean and variance normalization and it is
 applied to both the input and target features.
 Also, 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

load=ndlMacroDefine 
\end_layout

\begin_layout Plain Layout

run=ndlCreateNetwork
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

ndlMacroDefine=[
\end_layout

\begin_layout Plain Layout

  MeanVarNorm(x)
\end_layout

\begin_layout Plain Layout

  {    		
\end_layout

\begin_layout Plain Layout

    xMean = Mean(x)
\end_layout

\begin_layout Plain Layout

    xStdDev = InvStdDev(x)
\end_layout

\begin_layout Plain Layout

    xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
\end_layout

\begin_layout Plain Layout

  }
\end_layout

\begin_layout Plain Layout

  
\end_layout

\begin_layout Plain Layout

  MSEBFF(x,r,c,labels)     
\end_layout

\begin_layout Plain Layout

  {         
\end_layout

\begin_layout Plain Layout

    BFF=BFF(x,r,c)
\end_layout

\begin_layout Plain Layout

    MSE=SquareError(labels,BFF)
\end_layout

\begin_layout Plain Layout

  } 
\end_layout

\begin_layout Plain Layout

]
\end_layout

\begin_layout Plain Layout

ndlCreateNetwork=[
\end_layout

\begin_layout Plain Layout

  featInDim=792
\end_layout

\begin_layout Plain Layout

  featOutDim=792
\end_layout

\begin_layout Plain Layout

  hiddenDim=512
\end_layout

\begin_layout Plain Layout

  bottleneckDim=64
\end_layout

\begin_layout Plain Layout

  featIn=Input(featInDim,tag="feature")
\end_layout

\begin_layout Plain Layout

  featOut=Input(featOutDim,tag="feature") 			       			        	
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

  featNormIn = MeanVarNorm(featIn)
\end_layout

\begin_layout Plain Layout

  featNormOut = MeanVarNorm(featOut)
\end_layout

\begin_layout Plain Layout

  L1 = SBFF(featNormIn,hiddenDim,featInDim)
\end_layout

\begin_layout Plain Layout

  L2 = SBFF(L1,bottleneckDim,hiddenDim)
\end_layout

\begin_layout Plain Layout

  L3 = SBFF(L2,hiddenDim,bottleneckDim)
\end_layout

\begin_layout Plain Layout

  MeanSqErr = MSEBFF(L3, featOutDim, hiddenDim, featNormOut,tag="criterion")
\end_layout

\begin_layout Plain Layout

  OutputNodes=(MeanSqErr.BFF.FF.P)
\end_layout

\begin_layout Plain Layout

  EvalNodes=(MeanSqErr)
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
For an autoencoder the reader will process features for both the input and
 desired output (targets) for the network.
 Thus, the reader configuration block in the main configuration file has
 two entries 
\begin_inset Quotes eld
\end_inset

featIn
\begin_inset Quotes erd
\end_inset

 and 
\begin_inset Quotes eld
\end_inset

featOut
\begin_inset Quotes erd
\end_inset

 that both have scpFiles specified.
 In this case, they point to the same file, but this is not required.
 For example, if a network was trained for feature enhancement, then featIn
 could be reading the noisy features while featOut would be reading the
 desired clean feature targets.
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status collapsed

\begin_layout Plain Layout

featIn=[
\end_layout

\begin_layout Plain Layout

  scpFile=$ScpDir$
\backslash
TIMIT.train.scp.fbank.fullpath
\end_layout

\begin_layout Plain Layout

  dim=792
\end_layout

\begin_layout Plain Layout

]
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

featOut=[
\end_layout

\begin_layout Plain Layout

  scpFile=$ScpDir$
\backslash
TIMIT.train.scp.fbank.fullpath
\end_layout

\begin_layout Plain Layout

  dim=792
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Using layer-by-layer 
\begin_inset Index idx
status open

\begin_layout Plain Layout
discriminative pre-training
\end_layout

\end_inset

discriminative pre-training
\end_layout

\begin_layout Standard
It is well known that deep networks can be difficult to optimize, especially
 when a limited amount of training data is available.
 As a result, a number of approaches to initializing the parameters of these
 networks have been proposed.
 One of these methods is known as discriminative pre-training.
 In this approach, a network with a single hidden layer is trained starting
 from random initialization.
 Then the network is grown one layer at a time, from one hidden layer to
 two hidden layers and so on.
 Each model is initialized using the parameters learned from the previous
 model and random initialization for of the topmost output layer.
\end_layout

\begin_layout Standard
This process can be performed in CNTK using alternating steps of model training
 and model editing using the Model Editing Language
\begin_inset Index idx
status open

\begin_layout Plain Layout
Model Editing Language
\end_layout

\end_inset

 (MEL
\begin_inset Index idx
status open

\begin_layout Plain Layout
MEL
\end_layout

\end_inset

).
 The config file example shows how to do this using a series of execution
 commands.
 The top level command contains five configuration blocks to execute in
 sequence, 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

command="TIMIT_DiscrimPreTrain1:TIMIT_AddLayer2:TIMIT_DiscrimPreTrain2:TIMIT_Add
Layer3:TIMIT_Train3"
\end_layout

\end_inset


\end_layout

\begin_layout Standard
This main configuration file is a good example of how certain config blocks
 can be placed at the top level of the config file.
 In this example, the 
\emph on
SGD
\begin_inset Index idx
status open

\begin_layout Plain Layout
SGD
\end_layout

\end_inset


\emph default
 and 
\emph on
reader
\emph default
 blocks are shared across all stages of processing.
 The first three configuration blocks to be processed are shown below:
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

TIMIT_DiscrimPreTrain1=[
\end_layout

\begin_layout Plain Layout

  action="train"
\end_layout

\begin_layout Plain Layout

  modelPath="$ExpDir$
\backslash
TrainWithPreTrain
\backslash
dptmodel1
\backslash
cntkSpeech.dnn"
\end_layout

\begin_layout Plain Layout

  NDLNetworkBuilder=[
\end_layout

\begin_layout Plain Layout

    NetworkDescription="$NdlDir$
\backslash
create_1layer.ndl"
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

]
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

TIMIT_AddLayer2=[
\end_layout

\begin_layout Plain Layout

  action="edit"
\end_layout

\begin_layout Plain Layout

  CurrLayer=1
\end_layout

\begin_layout Plain Layout

  NewLayer=2
\end_layout

\begin_layout Plain Layout

  CurrModel="$ExpDir$
\backslash
TrainWithPreTrain
\backslash
dptmodel1
\backslash
cntkSpeech.dnn"
\end_layout

\begin_layout Plain Layout

  NewModel="$ExpDir$
\backslash
TrainWithPreTrain
\backslash
dptmodel2
\backslash
cntkSpeech.dnn.0"
\end_layout

\begin_layout Plain Layout

  editPath="$MelDir$
\backslash
add_layer.mel"
\end_layout

\begin_layout Plain Layout

]
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

TIMIT_DiscrimPreTrain2=[
\end_layout

\begin_layout Plain Layout

  action="train"
\end_layout

\begin_layout Plain Layout

  modelPath="$ExpDir$
\backslash
TrainWithPreTrain
\backslash
dptmodel2
\backslash
cntkSpeech.dnn"
\end_layout

\begin_layout Plain Layout

  NDLNetworkBuilder=[
\end_layout

\begin_layout Plain Layout

    NetworkDescription="$NdlDir$
\backslash
create_1layer.ndl"
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In the first block, TIMIT_DiscrimPreTrain1, the initial model is trained
 according to the network description language file 
\begin_inset Quotes eld
\end_inset

create_1layer.ndl
\begin_inset Quotes erd
\end_inset

 and stored in a folder called dptmodel1.
 Next, in TIMIT_AddLayer2, the previous model is edited according to the
 MEL script 
\begin_inset Quotes eld
\end_inset

add_layer.mel
\begin_inset Quotes erd
\end_inset

 which will add a layer to the existing model and write the new model out
 to a new model file.
 This script will process the variables set in this configuration block
 that refer to the current layer, new layer, current model, and new model.
 Note that the new model has an extension 
\begin_inset Quotes eld
\end_inset

0
\begin_inset Quotes erd
\end_inset

 and has been placed in a folder called 
\begin_inset Quotes eld
\end_inset

dptlayer2
\begin_inset Quotes erd
\end_inset

.
 Now, the third configuration block 
\begin_inset Quotes eld
\end_inset

TIMIT_DiscrimPreTrain2
\begin_inset Quotes erd
\end_inset

 will train a model to be located in the 
\begin_inset Quotes eld
\end_inset

dptmodel2
\begin_inset Quotes erd
\end_inset

 folder.
 Because, the previous step created a model in that location with the extension
 
\begin_inset Quotes eld
\end_inset

0
\begin_inset Quotes erd
\end_inset

, the training tool will use that model as the initial model rather than
 creating a model from scratch.
 This process repeats itself until the network contains the total number
 of layers desired.
 
\end_layout

\begin_layout Standard
To see how a new layer is added, we can look at the MEL script 
\begin_inset Quotes eld
\end_inset

add_layer.mel
\begin_inset Quotes erd
\end_inset


\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

m1=LoadModel("$currModel$", format="cntk")
\end_layout

\begin_layout Plain Layout

SetDefaultModel(m1)
\end_layout

\begin_layout Plain Layout

HDim=512
\end_layout

\begin_layout Plain Layout

L$newLayer$=SBFF(L$currLayer$.S, HDim, HDim)
\end_layout

\begin_layout Plain Layout

SetInput(CE.*.T, 1, L$newLayer$.S)
\end_layout

\begin_layout Plain Layout

SetInput(L$newLayer$.*.T, 1, L$currLayer$.S)
\end_layout

\begin_layout Plain Layout

SaveModel(m1, "$newModel$", format="cntk")
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In this script, the initial model is loaded and set as the default model.
 A new layer is created using the macro SBFF (the default macros have been
 loaded in the top-level configuration file).
 Then the new connections are made, with the input to the cross entropy
 layer of the existing model connected to the output of the new layer, and
 the input of the new layer connected to the output of the previous model's
 top layer.
 The new model is then saved to the specified file.
 Note that through the use of configuration variables, this same script
 can be reused anytime a new layer can be added.
 If different layer sizes were desired, the HDim variable could be set by
 the higher level configuration file in the the appropriate edit block,
 rather than from within the MEL script.
 
\end_layout

\begin_layout Subsection
Training a network with multi-task learning
\begin_inset Index idx
status open

\begin_layout Plain Layout
multi-task learning
\end_layout

\end_inset


\end_layout

\begin_layout Standard
One interesting approach to network training is multi-task learning, where
 the network is trained to optimize two objective functions simultaneously.
 This can be done in CNTK through the appropriate use of NDL.
 Let's assume that we have a network specified in NDL that has three hidden
 layers and output of the third hidden layer is defined as L3.
 Furthermore, let's assume we want to create a network that optimizes a
 weighted combination of phoneme classification and the dialect region of
 the speaker (dr1, dr2, etc in the TIMIT corpus).
 To do this, we first define a macro that can compute the weighted sum of
 two nodes as follows:
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

WtObjFcn(o1,w1,o2,w2)
\end_layout

\begin_layout Plain Layout

{
\end_layout

\begin_layout Plain Layout

  A1=Constant(w1)
\end_layout

\begin_layout Plain Layout

  A2=Constant(w2)
\end_layout

\begin_layout Plain Layout

  T1=Times(A1,o1)
\end_layout

\begin_layout Plain Layout

  T2=Times(A2,o2)
\end_layout

\begin_layout Plain Layout

  O=Plus(T1,T2)
\end_layout

\begin_layout Plain Layout

}
\end_layout

\end_inset

This macro can then be used to create a network for multi-task learning.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

#objective function 1 
\end_layout

\begin_layout Plain Layout

BFF1=BFF(L3,LabelDim1,HiddenDim) 
\end_layout

\begin_layout Plain Layout

CE1=CrossEntropyWithSoftmax(labels,BFF1.FF.P,tag="evaluation")
\end_layout

\begin_layout Plain Layout

FER1 = ClassificationError(labels,BFF1.FF.P,tag="evaluation")
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

# objective function 2
\end_layout

\begin_layout Plain Layout

BFF2=BFF(L3,LabelDim2,HiddenDim)
\end_layout

\begin_layout Plain Layout

CE2=CrossEntropyWithSoftmax(regions,BFF2.FF.P,tag="evaluation")
\end_layout

\begin_layout Plain Layout

FER2 = ClassificationError(regions,BFF2.FF.P,tag="evaluation")
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

# weighted final objective function
\end_layout

\begin_layout Plain Layout

Alpha1=0.8
\end_layout

\begin_layout Plain Layout

Alpha2=0.2
\end_layout

\begin_layout Plain Layout

ObjFcn = WtObjFcn(CE1,Alpha1,CE2,Alpha2,tag="criterion")
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

# for decoding
\end_layout

\begin_layout Plain Layout

ScaledLogLikelihood=Minus(BFF1.FF.P, LogPrior)
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

# root Nodes
\end_layout

\begin_layout Plain Layout

OutputNodes=(ScaledLogLikelihood) 
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The output of the hidden layer L3 is connected to two cross entropy nodes,
 one that predicts 
\begin_inset Quotes eld
\end_inset

labels
\begin_inset Quotes erd
\end_inset

 which correspond to phonetic labels in this example, and one that predict
 
\begin_inset Quotes eld
\end_inset

region
\begin_inset Quotes erd
\end_inset

, the dialect region of the speaker.
 The final criterion used for training is the weighted combination of these
 two criteria.
 By tagging the individual CrossEntropyWithSoftmax and ErrorPredition nodes
 with the 
\begin_inset Quotes eld
\end_inset

eval
\begin_inset Quotes erd
\end_inset

 tag, the values of these nodes can also be monitored and logged during
 training.
 As before, the ScaledLogLikelihood is also computed for use in a decoder.
 
\end_layout

\begin_layout Subsection
Training a network with multiple inputs
\begin_inset Index idx
status open

\begin_layout Plain Layout
multiple inputs
\end_layout

\end_inset


\end_layout

\begin_layout Standard
There are instances where it's desirable to input multiple features into
 a network, such as MFCC and FBANK coefficients.
 In this case, multiple feature inputs (two, in this example) need to be
 specified in the 
\emph on
reader
\emph default
, and the network needs to be constructed appropriately using NDL.
 The following is a snippet from reader configuration block showing how
 two different feature types, defined as 
\emph on
features1
\emph default
 and 
\emph on
features2
\emph default
 can be read, along with the phonetic labels.
 In this case, the log mel filterbank features which are 72-dimensional
 will use an 11-frame context window, while the MFCC features which are
 39-dimensional will use a single frame of input.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

features1=[
\end_layout

\begin_layout Plain Layout

  dim=792
\end_layout

\begin_layout Plain Layout

  scpFile="$ScpDir$
\backslash
TIMIT.train.scp.fbank.fullpath"
\end_layout

\begin_layout Plain Layout

]
\end_layout

\begin_layout Plain Layout

features2=[
\end_layout

\begin_layout Plain Layout

  dim=39
\end_layout

\begin_layout Plain Layout

  scpFile="$ScpDir$
\backslash
TIMIT.train.scp.mfcc.fullpath"
\end_layout

\begin_layout Plain Layout

] 
\end_layout

\begin_layout Plain Layout

labels=[
\end_layout

\begin_layout Plain Layout

  mlfFile="$MlfDir$
\backslash
TIMIT.train.align_cistate.mlf.cntk"
\end_layout

\begin_layout Plain Layout

  labelMappingFile="$MlfDir$
\backslash
TIMIT.statelist"
\end_layout

\begin_layout Plain Layout

  labelDim=183
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The NDL for constructing a network with these inputs and outputs can be
 done in a number of ways.
 One way is to construct a macro that constructs a layer that takes two inputs,
 as follows:
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

SBFF2(input1,rowCount,colCount1,input2,colCount2)
\end_layout

\begin_layout Plain Layout

{
\end_layout

\begin_layout Plain Layout

  B=Parameter(rowCount,init="fixedValue",value=0)
\end_layout

\begin_layout Plain Layout

  W1=Parameter(rowCount, colCount1)
\end_layout

\begin_layout Plain Layout

  W2=Parameter(rowCount, colCount2)
\end_layout

\begin_layout Plain Layout

  T1=Times(W1,input1) T2=Times(W2,input2)
\end_layout

\begin_layout Plain Layout

  P1=Plus(T1,T2)
\end_layout

\begin_layout Plain Layout

  P2=Plus(P1,B)
\end_layout

\begin_layout Plain Layout

  S=Sigmoid(P2) 
\end_layout

\begin_layout Plain Layout

}
\end_layout

\end_inset

This macro can then be used to create the network.
 For example, inputs and the first layer would be declared using the NDL
 code below.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

FeatDim1 = 792
\end_layout

\begin_layout Plain Layout

FeatDim2 = 39
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

features1=Input(FeatDim1,tag="feature")
\end_layout

\begin_layout Plain Layout

features2=Input(FeatDim2,tag="feature") 
\end_layout

\begin_layout Plain Layout

labels=Input(LabelDim1,tag="label")
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

featInput1=MeanVarNorm(features1) 
\end_layout

\begin_layout Plain Layout

featInput2=MeanVarNorm(features2)
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

L1 = SBFF2(featInput1, HiddenDim, FeatDim1, featInput2, FeatDim2)
\end_layout

\end_inset

The rest of the hidden layers and the output layer with a cross entropy
 objective function would be the same as previous examples.
 Notice that the names and dimensionality of the input and output data have
 to the same in both the NDL model description and the reader configuration.
 
\end_layout

\begin_layout Subsection
Evaluating networks
\begin_inset Index idx
status open

\begin_layout Plain Layout
Evaluating networks
\end_layout

\end_inset

 and cross validation
\begin_inset Index idx
status open

\begin_layout Plain Layout
cross validation
\end_layout

\end_inset


\end_layout

\begin_layout Standard
One a network has been trained, we would like to test its performance.
 To do so, we use a configuration very similar to training, except the 
\begin_inset Quotes eld
\end_inset

train
\begin_inset Quotes erd
\end_inset

 action is replaced with the action 
\begin_inset Quotes eld
\end_inset

eval
\begin_inset Quotes erd
\end_inset

, and the data reader needs to be updated to process the development or
 evaluation data rather than the training data.
 Of course, the SGD optimization block is not necessary and can be omitted.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

action="eval"
\end_layout

\end_inset


\end_layout

\begin_layout Standard
One version of cross validation is early stopping, where a fixed learning
 schedule is used during training but all intermediate models are evaluated
 and the one with the best performance on a development set is selected
 for evaluation.
 If you perform network training for a large number of epochs, you can efficient
ly evaluate the performance of a series of models using the 
\begin_inset Quotes eld
\end_inset

cv
\begin_inset Quotes erd
\end_inset

 action.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

action="cv"
\end_layout

\end_inset

In addition, you can choose the models you wish to evaluate if you do not
 want the output of every single epoch.
 This is done using the 
\begin_inset Quotes eld
\end_inset

crossValidationInterval
\begin_inset Quotes erd
\end_inset

 configuration parameters, which takes 3 colon-separated terms which are
 interpreted as a Matlab-style specification of an array.
 In the following example, the models from epoch 0, 2, 4, etc.
 up to the final epoch will be evaluated
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

crossValidationInterval=0:2:25
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Writing network outputs
\begin_inset Index idx
status open

\begin_layout Plain Layout
Writing network outputs
\end_layout

\end_inset

 to files
\end_layout

\begin_layout Standard
There are many examples where it is necessary to write either the output
 or an internal representation of the network to a file.
 In speech recognition, some common examples of this are writing the output
 of the network to a file for decoding or construction of a tandem style
 acoustic model, or writing the internal representation to create bottleneck
 features.
 CNTK support this by the use of the 
\begin_inset Quotes eld
\end_inset

write
\begin_inset Quotes erd
\end_inset

 action, the specification of a desired output node, and the use of a data
 
\emph on
writer
\emph default
.
 To write data from a particular node, the node is specified using the configura
tion parameter 
\begin_inset Quotes eld
\end_inset

outputNodeNames
\begin_inset Quotes erd
\end_inset

.
 For example, for decoding, this would typically be the ScaledLogLikelihood
 node.
 A data reader is used to specify the names of the files that will be input
 to the network and a data writer is used to specify the names of the files
 to store the output.
 All nodes specified in the reader and writer must have SCP files that have
 a line-by-line correspondence to each other.
 Data is read from the first feature file listed in the SCP file specified
 in the reader and processed by the network.
 The values at the desired output node are then passed to the writer and
 written to the first file in the SCP file that corresponds to that node
 in the writer configuration.
 The rest of the files are processed in a similar manner, going line-by-line
 down the SCP files in the reader and writer.
 The data is written as an HTK format feature file with the 
\begin_inset Quotes eld
\end_inset

USER
\begin_inset Quotes erd
\end_inset

 parameter kind.
 
\end_layout

\begin_layout Standard
In the following example, data from the autoencoder described previously
 will be written to files.
 In particular, the activations in the second hidden layer prior to the
 sigmoid non-linearity will be captured.
 This node is specified as 
\begin_inset Quotes eld
\end_inset

L2.BFF.FF.P
\begin_inset Quotes erd
\end_inset

.
 node will be written to files.
 The output file names are specified using the SCP file in the writer, which
 is line-by-line parallel with the SCP file used by the reader.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

TIMIT_WriteBottleneck=[
\end_layout

\begin_layout Plain Layout

  action="write"
\end_layout

\begin_layout Plain Layout

  modelPath="$ExpDir$
\backslash
TrainAutoEncoder
\backslash
model
\backslash
cntkSpeech.dnn" 
\end_layout

\begin_layout Plain Layout

  outputNodeNames="L2.BFF.FF.P" 
\end_layout

\begin_layout Plain Layout

  reader=[
\end_layout

\begin_layout Plain Layout

    readerType="HTKMLFReader"
\end_layout

\begin_layout Plain Layout

    features=[
\end_layout

\begin_layout Plain Layout

    dim=792
\end_layout

\begin_layout Plain Layout

    scpFile="$ScpDir$
\backslash
TIMIT.core.scp.fbank.fullpath"
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

  writer=[
\end_layout

\begin_layout Plain Layout

    writerType="HTKMLFWriter"
\end_layout

\begin_layout Plain Layout

    L2.BFF.FF.P = [
\end_layout

\begin_layout Plain Layout

      dim=64
\end_layout

\begin_layout Plain Layout

      scpFile="$ScpDir$
\backslash
TIMIT.core.scp.bottleneck.fullpath"
\end_layout

\begin_layout Plain Layout

    ]
\end_layout

\begin_layout Plain Layout

  ]
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
For writing scaled log likelihoods, the configuration file would look very
 similar, except a different node would be specified, e.g.
 ScaledLogLikelihood, and the writer should be changed appropriately.
 
\end_layout

\begin_layout Standard
If you are unsure of the exact node name you need to specify, you can use
 the 
\begin_inset Quotes eld
\end_inset

dumpnode
\begin_inset Quotes erd
\end_inset

 action with printValues set to false.
 This will generate a text file listing of all valid nodes in the network.
 
\end_layout

\begin_layout Standard
\begin_inset listings
inline false
status open

\begin_layout Plain Layout

TIMIT_DumpNodes=[
\end_layout

\begin_layout Plain Layout

  action=dumpnode
\end_layout

\begin_layout Plain Layout

  modelPath="$ExpDir$
\backslash
TrainAutoEncoder
\backslash
model
\backslash
cntkSpeech.dnn" 
\end_layout

\begin_layout Plain Layout

  printValues=false
\end_layout

\begin_layout Plain Layout

]
\end_layout

\end_inset


\end_layout

\begin_layout Section
RNN Language Model
\begin_inset Index idx
status open

\begin_layout Plain Layout
RNN ! Language Model
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Recurrent neural network
\begin_inset Index idx
status open

\begin_layout Plain Layout
Recurrent ! neural network (RNN)
\end_layout

\end_inset

 (RNN
\begin_inset Index idx
status open

\begin_layout Plain Layout
RNN
\end_layout

\end_inset

) language models have been proven to obtain state-of-the-art performance
 in language modeling.
 We use Penn Treebank data set to demonstrate how to build a RNN language
 model with CNTK.
 In particular, we will build a class based RNN language model.
 The setup file is CNTK
\backslash
MachineLearning
\backslash
cn
\backslash
rnnlmConfig.txt, which consists of two components: train and test.
 
\end_layout

\begin_layout Subsection
Train
\begin_inset Index idx
status open

\begin_layout Plain Layout
Train
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Training parameters are specified in this section, with the most important
 ones listed as follows.
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

train
\begin_inset Quotes erd
\end_inset

.
 It indicates the section is for training.
\end_layout

\begin_layout Itemize
minibatchSize
\begin_inset Index idx
status open

\begin_layout Plain Layout
minibatchSize
\end_layout

\end_inset

=10.
 In RNN training, the minibatch size is the truncation size of the truncated
 BPTT.
 That is, model parameters are updated after 10 words.
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1.
 -1 means CPU.
 One can change to GPU by specifying a non-negative GPU id.
\end_layout

\begin_layout Itemize
epochSize
\begin_inset Index idx
status open

\begin_layout Plain Layout
epochSize
\end_layout

\end_inset

=4430000.
 The max number of words used to train RNN model in each epoch.
 This provides a way to use a proportion of entire training data for model
 training.
 
\end_layout

\begin_layout Itemize
SimpleNetworkBuilder
\begin_inset Index idx
status open

\begin_layout Plain Layout
SimpleNetworkBuilder
\end_layout

\end_inset

 section
\end_layout

\begin_deeper
\begin_layout Itemize
rnnType
\begin_inset Index idx
status open

\begin_layout Plain Layout
rnnType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

CLASSLM
\begin_inset Quotes erd
\end_inset

.
 The RNN network structure.
 It consists of an input layer, a recurrent hidden layer and an output layer
 (including classes and vocabularies).
 
\end_layout

\begin_layout Itemize
trainingCriterion
\begin_inset Index idx
status open

\begin_layout Plain Layout
trainingCriterion
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

ClassCrossEntropyWithSoftmax
\begin_inset Quotes erd
\end_inset

.
 Training criterion used in model training.
\end_layout

\begin_layout Itemize
nodeType
\begin_inset Index idx
status open

\begin_layout Plain Layout
nodeType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

Sigmoid
\begin_inset Quotes erd
\end_inset

.
 Non-linearity function used in hidden layer.
\end_layout

\begin_layout Itemize
layerSizes
\begin_inset Index idx
status open

\begin_layout Plain Layout
layerSizes
\end_layout

\end_inset

=10000:200:10000.
 Sizes of input, hidden and output layers.
 Input layer size is equal to vocabulary size, hidden layer is normally
 in the range of 50 to 500, output layer size is the vocabulary size.
\end_layout

\begin_layout Itemize
uniformInit
\begin_inset Index idx
status open

\begin_layout Plain Layout
uniformInit
\end_layout

\end_inset

=true.
 Whether to use uniformly randomized values for initial parameter weights.
\end_layout

\begin_layout Itemize
vocabSize=10000.
 The vocabulary size
\end_layout

\begin_layout Itemize
nbrClass=50 .
 number of word classes.
\end_layout

\end_deeper
\begin_layout Itemize
SGD
\begin_inset Index idx
status open

\begin_layout Plain Layout
SGD
\end_layout

\end_inset

 section
\end_layout

\begin_deeper
\begin_layout Itemize
learningRatesPerSample
\begin_inset Index idx
status open

\begin_layout Plain Layout
learningRatesPerSample
\end_layout

\end_inset

=0.1.
 Learning rate in stochastic gradient descent.
\end_layout

\begin_layout Itemize
momentumPerMB
\begin_inset Index idx
status open

\begin_layout Plain Layout
momentumPerMB
\end_layout

\end_inset

=0.
 Momentum used in updating parameter weights.
 The updating equation is 
\begin_inset Formula $g_{new}=(1-m)*g+m*g_{old}$
\end_inset

, where 
\begin_inset Formula $m$
\end_inset

 is momentum, 
\begin_inset Formula $g$
\end_inset

 is the gradient computed in backward pass, 
\begin_inset Formula $g_{old}$
\end_inset

 is the gradient value in previous mini-batch, and 
\begin_inset Formula $g_{new}$
\end_inset

 is the new gradient for the current min-batch.
\end_layout

\begin_layout Itemize
gradientClippingWithTruncation
\begin_inset Index idx
status open

\begin_layout Plain Layout
gradientClippingWithTruncation
\end_layout

\end_inset

=true.
 Whether to control the upper bound of the gradient values using simple
 truncation or norm based approach (higher cost).
\end_layout

\begin_layout Itemize
clippingThresholdPerSample
\begin_inset Index idx
status open

\begin_layout Plain Layout
clippingThresholdPerSample
\end_layout

\end_inset

=15.0.
 Indicates that the absolute value of the gradient should never be larger
 than 15.
\end_layout

\begin_layout Itemize
maxEpochs
\begin_inset Index idx
status open

\begin_layout Plain Layout
maxEpochs
\end_layout

\end_inset

=40.
 Maximum number of training epochs.
\end_layout

\begin_layout Itemize
numMBsToShowResult
\begin_inset Index idx
status open

\begin_layout Plain Layout
numMBsToShowResult
\end_layout

\end_inset

=2000.
 The frequency of showing training/validation loss results (in terms of
 how many mini-batches are processed).
\end_layout

\begin_layout Itemize
gradUpdateType
\begin_inset Index idx
status open

\begin_layout Plain Layout
gradUpdateType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

none
\begin_inset Quotes erd
\end_inset

.
 How the gradients are computed.
 None stands for standard gradient update (no special treatment).
 One can also choose adagrad or rmsprop.
\end_layout

\begin_layout Itemize
modelPath
\begin_inset Index idx
status open

\begin_layout Plain Layout
modelPath
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
log
\backslash
modelRnnCNTK
\begin_inset Quotes erd
\end_inset

.
 The resulting RNN model file location.
\end_layout

\begin_layout Itemize
loadBestModel
\begin_inset Index idx
status open

\begin_layout Plain Layout
loadBestModel
\end_layout

\end_inset

=true.
 Choose to use the best of previous models to start training in each epoch.
\end_layout

\begin_layout Itemize
reduceLearnRateIfImproveLessThan
\begin_inset Index idx
status open

\begin_layout Plain Layout
reduceLearnRateIfImproveLessThan
\end_layout

\end_inset

=0.001.
 The learning parameter is reduced if the difference between previous criterion
 and current criterion is smaller than previous criterion multiplied by
 reduceLearnRateIfImproveLessThan.
\end_layout

\begin_layout Itemize
continueReduce
\begin_inset Index idx
status open

\begin_layout Plain Layout
continueReduce
\end_layout

\end_inset

=true.
 If true, the learning rate is always reduced per epoch once it is reduced.
\end_layout

\begin_layout Itemize
learnRateDecreaseFactor
\begin_inset Index idx
status open

\begin_layout Plain Layout
learnRateDecreaseFactor
\end_layout

\end_inset

=0.5.
 Learning rate decrease factor.
\end_layout

\end_deeper
\begin_layout Itemize
reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

 section
\end_layout

\begin_deeper
\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
vocab.txt
\begin_inset Quotes erd
\end_inset

.
 Word class file which contains words, their ids and their class ids, in
 the following format 
\end_layout

\begin_deeper
\begin_layout Standard
word_id 
\backslash
t frequency 
\backslash
t word_string 
\backslash
t word_class
\end_layout

\begin_layout Standard
word_id is a unique non-negative integer, frequency is the frequency of
 word (optional), word_string is the word string (low frequent words may
 be mapped to <unk>), and word_class is the class id of word.
 Word class can be derived using frequency based heuristics 
\begin_inset CommandInset citation
LatexCommand cite
key "Extensions-RNN-LM-Mikolov:2011"

\end_inset

or more sophisticated way
\begin_inset CommandInset citation
LatexCommand cite
key "SpeedRegularization-Zweig:2013"

\end_inset

.
 
\end_layout

\end_deeper
\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
ptb.train.cntk.txt
\begin_inset Quotes erd
\end_inset

.
 The location of training data file which has the following format
\end_layout

\begin_deeper
\begin_layout Standard
</s> word1 word2 ...
 </s>
\end_layout

\end_deeper
\end_deeper
\begin_layout Itemize
cvReader
\begin_inset Index idx
status open

\begin_layout Plain Layout
cvReader
\end_layout

\end_inset

 section
\end_layout

\begin_deeper
\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
vocab.txt
\begin_inset Quotes erd
\end_inset

.
 Word class file which contains words, their ids and their class ids.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
ptb.valid.cntk.txt
\begin_inset Quotes erd
\end_inset

.
 Validation data file location.
 It has the same format as training data.
\end_layout

\end_deeper
\begin_layout Subsection
Test
\begin_inset Index idx
status open

\begin_layout Plain Layout
Test
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Test parameters are specified in this section, with the most important ones
 specified as follows.
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

eval
\begin_inset Quotes erd
\end_inset

.
 It indicates the section is for test.
\end_layout

\begin_layout Itemize
minibatchSize
\begin_inset Index idx
status open

\begin_layout Plain Layout
minibatchSize
\end_layout

\end_inset

=100.
 Evaluation truncation size is 100.
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1.
 Use CPU device.
 
\end_layout

\begin_layout Itemize
reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

 section
\end_layout

\begin_deeper
\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
vocab.txt
\begin_inset Quotes erd
\end_inset

.
 Word class file which contains words, their ids and their class ids.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
ile
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

C:
\backslash
CNTKExp
\backslash
RNN
\backslash
data
\backslash
PennTreeBank
\backslash
ptb.test.cntk.txt
\begin_inset Quotes erd
\end_inset

.
 Test data file location.
 It has the same format as training data.
\end_layout

\end_deeper
\begin_layout Section
LSTM
\begin_inset Index idx
status open

\begin_layout Plain Layout
LSTM
\end_layout

\end_inset

 Language Model
\end_layout

\begin_layout Standard
We apply long-short-term memory
\begin_inset Index idx
status open

\begin_layout Plain Layout
long-short-term memory
\end_layout

\end_inset

 (LSTM) recurrent neural network for language modeling task.
 The example setup is at ExampleSetups
\backslash
LM
\backslash
LSTMLM
\backslash
lstmlmconfig.txt.
 In this setup, the following need to specified for training
\end_layout

\begin_layout Subsection
Training
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

train
\begin_inset Quotes erd
\end_inset

 : this informs CNTK to train a model.
 
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1 : this specifies using CPU.
\end_layout

\begin_layout Itemize
SimpleNetworkBuilder
\begin_inset Index idx
status open

\begin_layout Plain Layout
SimpleNetworkBuilder
\end_layout

\end_inset

: 
\end_layout

\begin_deeper
\begin_layout Itemize
recurrentLayer
\begin_inset Index idx
status open

\begin_layout Plain Layout
recurrentLayer
\end_layout

\end_inset

=1 : this specifies that layer 1 is recurrent layer.
\end_layout

\begin_layout Itemize
rnnType
\begin_inset Index idx
status open

\begin_layout Plain Layout
rnnType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

CLASSLM
\begin_inset Quotes erd
\end_inset

 : this informs CNTK to call class-based LSTM function in simplenetworkbuilder.
\end_layout

\begin_layout Itemize
trainingCriterion
\begin_inset Index idx
status open

\begin_layout Plain Layout
trainingCriterion
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

ClassCrossEntropyWithSoftmax
\begin_inset Quotes erd
\end_inset

 specifies that training set to use class-based cross entropy
\end_layout

\begin_layout Itemize
evalCriterion
\begin_inset Index idx
status open

\begin_layout Plain Layout
evalCriterion
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

ClassCrossEntropyWithSoftmax
\begin_inset Quotes erd
\end_inset

 specifies validation set also uses class-based cross entropy for evaluation.
\end_layout

\begin_layout Itemize
layerSizes
\begin_inset Index idx
status open

\begin_layout Plain Layout
layerSizes
\end_layout

\end_inset

=10000:200:10000 : this specifies input, hidden and output layer sizes.
\end_layout

\begin_layout Itemize
vocabSize=10000.
 The vocabulary size
\end_layout

\begin_layout Itemize
nbrClass=50 .
 number of word classes.
\end_layout

\end_deeper
\begin_layout Itemize
SGD
\begin_inset Index idx
status open

\begin_layout Plain Layout
SGD
\end_layout

\end_inset


\end_layout

\begin_deeper
\begin_layout Itemize
useAdagrad
\begin_inset Index idx
status open

\begin_layout Plain Layout
useAdagrad
\end_layout

\end_inset

=true : this specifies using AdaGrad for weight update.
\end_layout

\begin_layout Itemize
modelPath
\begin_inset Index idx
status open

\begin_layout Plain Layout
modelPath
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
temp
\backslash
penntreebank
\backslash
cntkdebug.dnn
\begin_inset Quotes erd
\end_inset

 : this is the trained model file name.
\end_layout

\end_deeper
\begin_layout Itemize
Reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

: specifies training reader
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LMSequenceReader
\begin_inset Quotes erd
\end_inset

 : specifies using language model sequence reader.
\end_layout

\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
wordclass.txt
\begin_inset Quotes erd
\end_inset

 : specifies word class info.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
ptb.train.cntk.txt
\begin_inset Quotes erd
\end_inset

 : specifies training file
\end_layout

\end_deeper
\begin_layout Itemize
cvReader
\begin_inset Index idx
status open

\begin_layout Plain Layout
cvReader
\end_layout

\end_inset

: specifies cross-validation reader
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LMSequenceReader
\begin_inset Quotes erd
\end_inset

 : specifies using language model sequence reader.
\end_layout

\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
wordclass.txt
\begin_inset Quotes erd
\end_inset

 : specifies word class info.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
ptb.valid.cntk.txt
\begin_inset Quotes erd
\end_inset

 : specifies validation file
\end_layout

\end_deeper
\begin_layout Subsection
Test
\begin_inset Index idx
status open

\begin_layout Plain Layout
Test
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In this setup, the following need to specified for testing
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

eval
\begin_inset Quotes erd
\end_inset

 : this informs CNTK to call simplenetwork evaluation.
 
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1 : this specifies using CPU.
\end_layout

\begin_layout Itemize
modelPath
\begin_inset Index idx
status open

\begin_layout Plain Layout
modelPath
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
temp
\backslash
penntreebank
\backslash
cntkdebug.dnn
\begin_inset Quotes erd
\end_inset

 : this is the trained model file name.
\end_layout

\begin_layout Itemize
Reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

: specifies test set reader
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LMSequenceReader
\begin_inset Quotes erd
\end_inset

 : specifies using sequence reader.
\end_layout

\begin_layout Itemize
wordclass
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordclass
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
wordclass.txt
\begin_inset Quotes erd
\end_inset

 : specifies word class info.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
penntreebank
\backslash
data
\backslash
ptb.test.cntk.txt
\begin_inset Quotes erd
\end_inset

 : specifies testing file
\end_layout

\end_deeper
\begin_layout Section
Spoken Language Understanding
\begin_inset Index idx
status open

\begin_layout Plain Layout
Spoken Language Understanding
\end_layout

\end_inset


\end_layout

\begin_layout Standard
One of the important tasks in spoken language understanding is labeling
 input sequence with semantic tags.
 In this example, we show how CNTK can be used to train a LSTM recurrent
 network for the labeling task.
 The setup file is under ExampleSetups
\backslash
SLU
\backslash
rnnlu.cntk.
 The data is ATIS, which consists of 944 unique words, including <unk>,
 in the training/dev set.
 Output has 127 dimension, each corresponding to a semantic tag in ATIS.
 Unseen words in test will be mapped to <unk>.
 A file provides such mapping from one word to the other, which is useful
 to map low-frequency input or unseen input to a common input.
 In this case, the common input is <unk>.
 
\end_layout

\begin_layout Subsection
Training
\begin_inset Index idx
status open

\begin_layout Plain Layout
Training
\end_layout

\end_inset


\end_layout

\begin_layout Standard
In this setup, the following need to specified for training
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

train
\begin_inset Quotes erd
\end_inset

 : this informs CNTK to train a model.
 
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1 : this specifies using CPU.
\end_layout

\begin_layout Itemize
minibatchSize
\begin_inset Index idx
status open

\begin_layout Plain Layout
minibatchSize
\end_layout

\end_inset

 = 10 : this specifies the maximum number of words per minibatch.
\end_layout

\begin_layout Itemize
SimpleNetworkBuilder
\begin_inset Index idx
status open

\begin_layout Plain Layout
SimpleNetworkBuilder
\end_layout

\end_inset

: 
\end_layout

\begin_deeper
\begin_layout Itemize
recurrentLayer
\begin_inset Index idx
status open

\begin_layout Plain Layout
recurrentLayer
\end_layout

\end_inset

=2 : this specifies that layer 2 is recurrent layer.
\end_layout

\begin_layout Itemize
rnnType
\begin_inset Index idx
status open

\begin_layout Plain Layout
rnnType
\end_layout

\end_inset

=LSTM: this informs CNTK to call LSTM function in simplenetworkbuilder.
\end_layout

\begin_layout Itemize
lookupTableOrder
\begin_inset Index idx
status open

\begin_layout Plain Layout
lookupTableOrder
\end_layout

\end_inset

=3: this specifies forming a context-dependent input with a context window
 size of 3.
\end_layout

\begin_layout Itemize
trainingCriterion
\begin_inset Index idx
status open

\begin_layout Plain Layout
trainingCriterion
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

ClassCrossEntropyWithSoftmax
\begin_inset Quotes erd
\end_inset

 specifies that training set to use cross entropy
\end_layout

\begin_layout Itemize
evalCriterion
\begin_inset Index idx
status open

\begin_layout Plain Layout
evalCriterion
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

ClassCrossEntropyWithSoftmax
\begin_inset Quotes erd
\end_inset

 specifies validation set also uses cross entropy for evaluation.
\end_layout

\begin_layout Itemize
layerSizes
\begin_inset Index idx
status open

\begin_layout Plain Layout
layerSizes
\end_layout

\end_inset

=2832:50:300:127: this specifies input, hidden and output layer sizes.
 Notice that input layer has dimension of 2832, which is 3 times 944.
 This number is obtained in consideration of context window size and the
 number of unique words, which 944.
 If lookupTableOrder is set to 1, the input layer size should be set to
 944.
 
\end_layout

\end_deeper
\begin_layout Itemize
SGD
\begin_inset Index idx
status open

\begin_layout Plain Layout
SGD
\end_layout

\end_inset


\end_layout

\begin_deeper
\begin_layout Itemize
learningRatePerSample
\begin_inset Index idx
status open

\begin_layout Plain Layout
learningRatePerSample
\end_layout

\end_inset

=0.1 : this specifies learning rate per sample.
\end_layout

\begin_layout Itemize
modelPath
\begin_inset Index idx
status open

\begin_layout Plain Layout
modelPath
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
temp
\backslash
exp
\backslash
ATIS
\backslash
temp
\backslash
cntkdebug.dnn
\begin_inset Quotes erd
\end_inset

 : this is the trained model file name.
\end_layout

\begin_layout Itemize
gradUpdateType
\begin_inset Index idx
status open

\begin_layout Plain Layout
gradUpdateType
\end_layout

\end_inset

=AdaGrad : this specifies using AdaGrad for updating weights.
\end_layout

\end_deeper
\begin_layout Itemize
Reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
Reader
\end_layout

\end_inset

: specifies training reader
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=LUSequenceReader : specifies using LUsequence reader.
\end_layout

\begin_layout Itemize
nbruttsineachrecurrentiter
\begin_inset Index idx
status open

\begin_layout Plain Layout
nbruttsineachrecurrentiter
\end_layout

\end_inset

=10 : this specifies using maximum of 10 sentences for each minibatch.
\end_layout

\begin_layout Itemize
wordcontext
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordContext
\end_layout

\end_inset

=0:1:2 : this specifies the time indices for forming a context window.
 In this example, this setup corresponds to using the current input, the
 next input, and the input after the next input for a context window of
 size 3.
 User can also use other cases such as wordcontext=0:-1:1 to form a context
 window of 3 but using the current input, the previous input, and the next
 input.
 
\end_layout

\begin_layout Itemize
wordmap
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordmap
\end_layout

\end_inset

=c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
inputmap.txt : specifies a file that lists a mapping from word to the other
 word.
 This mapping file should be constructed only from training/dev sets.
 
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=c:
\backslash
exp
\backslash
ATIS
\backslash
data
\backslash
atis.train.apos.pred.pos.head.IOB.simple: specifies training file
\end_layout

\begin_layout Itemize
labelIn
\begin_inset Index idx
status open

\begin_layout Plain Layout
labelIn
\end_layout

\end_inset

 : this specifies information of inputs
\end_layout

\begin_deeper
\begin_layout Itemize
beginingSequence
\begin_inset Index idx
status open

\begin_layout Plain Layout
beginingSequence
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

BOS
\begin_inset Quotes erd
\end_inset

 : this specifies the symbol of sequence beginning.
\end_layout

\begin_layout Itemize
endSequence
\begin_inset Index idx
status open

\begin_layout Plain Layout
endSequence
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

EOS
\begin_inset Quotes erd
\end_inset

 : this specifies the symbol of sequence ending.
\end_layout

\begin_layout Itemize
token
\begin_inset Index idx
status open

\begin_layout Plain Layout
token
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
input.txt
\begin_inset Quotes erd
\end_inset

 : this specifies a list of word as input.
 
\end_layout

\end_deeper
\begin_layout Itemize
labels
\begin_inset Index idx
status open

\begin_layout Plain Layout
labels
\end_layout

\end_inset

 : this specifies information of labels
\end_layout

\begin_deeper
\begin_layout Itemize
token
\begin_inset Index idx
status open

\begin_layout Plain Layout
token
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
output.txt
\begin_inset Quotes erd
\end_inset

 : this specifies output semantic labels.
\end_layout

\end_deeper
\end_deeper
\begin_layout Itemize
cvReader
\begin_inset Index idx
status open

\begin_layout Plain Layout
cvReader
\end_layout

\end_inset

: specifies cross-validation reader.
 Most of setups should be same as those in Reader section.
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LUSequenceReader
\begin_inset Quotes erd
\end_inset

 : specifies using sequence reader.
\end_layout

\begin_layout Itemize
wordcontext
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordContext
\end_layout

\end_inset

=0:1:2 : this specifies the time indices for forming a context window.
 This should be the same as specified in Reader section.
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
ATIS
\backslash
data
\backslash
atis.dev.apos.pred.pos.head.IOB.simple
\begin_inset Quotes erd
\end_inset

: specifies validation file
\end_layout

\end_deeper
\begin_layout Subsection
Test
\end_layout

\begin_layout Standard
In this setup, the following need to specified for writing/decoding test
 set.
 It uses LUSequenceWriter to decode test set word sequence to their semantic
 tags.
 
\end_layout

\begin_layout Itemize
action
\begin_inset Index idx
status open

\begin_layout Plain Layout
action
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

write
\begin_inset Quotes erd
\end_inset

: this informs CNTK to call simplenetwork evaluation and write outputs,
 which will be specified below.
 
\end_layout

\begin_layout Itemize
deviceId
\begin_inset Index idx
status open

\begin_layout Plain Layout
deviceId
\end_layout

\end_inset

=-1 : this specifies using CPU.
\end_layout

\begin_layout Itemize
modelPath
\begin_inset Index idx
status open

\begin_layout Plain Layout
modelPath
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
temp
\backslash
exp
\backslash
ATIS
\backslash
temp
\backslash
cntkdebug.dnn
\begin_inset Quotes erd
\end_inset

: this is the trained model file name.
\end_layout

\begin_layout Itemize
outputNodeNames
\begin_inset Index idx
status open

\begin_layout Plain Layout
outputNodeNames
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

outputs:labels
\begin_inset Quotes erd
\end_inset

 : this specifies which nodes to output results.
 These node names are pre-specified in CNTK's simple network builder.
 The node 
\begin_inset Quotes eld
\end_inset

outputs
\begin_inset Quotes erd
\end_inset

 is the node that output activates before softmax.
 The node 
\begin_inset Quotes eld
\end_inset

labels
\begin_inset Quotes erd
\end_inset

 is the node that has reference labels for comparison.
\end_layout

\begin_layout Itemize
reader
\begin_inset Index idx
status open

\begin_layout Plain Layout
reader
\end_layout

\end_inset

: specifies test set reader
\end_layout

\begin_deeper
\begin_layout Itemize
readerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
readerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LUSequenceReader
\begin_inset Quotes erd
\end_inset

 : specifies using LUsequence reader.
\end_layout

\begin_layout Itemize
wordmap
\begin_inset Index idx
status open

\begin_layout Plain Layout
wordmap
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
inputmap.txt
\begin_inset Quotes erd
\end_inset

: specifies word map file, which should be the same as those used in training/va
lidation readers.
 
\end_layout

\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
atis.test.apos.pred.pos.head.IOB.simple
\begin_inset Quotes erd
\end_inset

: specifies testing file
\end_layout

\end_deeper
\begin_layout Itemize
writer
\begin_inset Index idx
status open

\begin_layout Plain Layout
writer
\end_layout

\end_inset

 : this specifies where to write 
\end_layout

\begin_deeper
\begin_layout Itemize
writerType
\begin_inset Index idx
status open

\begin_layout Plain Layout
writerType
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

LUSequenceWriter
\begin_inset Quotes erd
\end_inset

 : this specifies using LUSequenceWriter.
\end_layout

\begin_layout Itemize
outputs
\begin_inset Index idx
status open

\begin_layout Plain Layout
outputs
\end_layout

\end_inset

 : specifies where to write for the outputs node.
\end_layout

\begin_deeper
\begin_layout Itemize
file
\begin_inset Index idx
status open

\begin_layout Plain Layout
file
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
temp
\backslash
exp
\backslash
atis
\backslash
output
\backslash
output.rec.txt
\begin_inset Quotes erd
\end_inset

 : the file name for writing decode results from LUSequenceWriter.
 
\end_layout

\begin_layout Itemize
token
\begin_inset Index idx
status open

\begin_layout Plain Layout
token
\end_layout

\end_inset

=
\begin_inset Quotes erd
\end_inset

c:
\backslash
exp
\backslash
atis
\backslash
data
\backslash
output.txt
\begin_inset Quotes erd
\end_inset

 : this specifies the semantic labels.
 
\end_layout

\end_deeper
\end_deeper
\end_body
\end_document
Browse the archive

https://github.com/Microsoft/CNTK