https://github.com/Microsoft/CNTK
Tip revision: c12bfdf740da3c6950126d2dadbef8fcdcea528d authored by Mark Hillebrand on 18 January 2016, 08:34:24 UTC
License change
License change
Tip revision: c12bfdf
NDLExamples.ndl
load=ndlMacroDefine
#run=ndlFull
#run=ndlTestMDN
#run=ndlTestGMM
#run=ndlTestCosDist
#run=ndlMacroUseCNNSubSample2
run=ndlMacroUseCNN
ndlTestCosDist=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=256
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# compute mean/stddev for mean/stddev normalization
fmean = Mean(features);
finvstd=InvStdDev(features)
finput=PerDimMeanVarNormalization(features, fmean, finvstd)
# Layer operations
L1 = RBFF(finput, HDim, SDim)
L2 = RBFF(L1, HDim, HDim)
L3 = RBFF(L2, LDim, HDim)
L4 = SoftMax(L3);
CD = CosDistance(L4, labels);
CDAll=SumElements(CD)
NCD=Negate(CDALL, tag=Criteria)
Err=ErrorPrediction(labels, L4, tag=Eval)
# rootNodes defined here
OutputNodes=(L4)
]
ndlTestGMM=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=10
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# Layer operations
L1=RBFF(features, HDim, SDim)
L2=RBFF(L1, HDim, HDim)
L3=BFF(L2, HDim, HDim)
GMMComponent=4
SizeOfAllMeans=40
UnnormedPrior=Parameter(GMMComponent, 1)
Means=Parameter(SizeOfAllMeans, 1)
LogStddev=Parameter(GMMComponent, 1)
LL=GMMLL(UnnormedPrior, Means, LogStddev, L3)
LLAll=SumElements(LL)
NLL=Negate(LLALL, tag=Criteria)
# rootNodes defined here
OutputNodes=(NLL)
EvalNodes=(NLL)
]
ndlTestMDN=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=256
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# Layer operations
L1=RBFF(features, HDim, SDim)
L2=RBFF(L1, HDim, HDim)
L3=RBFF(L2, HDim, HDim)
GMMComponent=4
SizeOfAllMeans=40
UnnormedPrior0 = BFF(L3, GMMComponent, HDim)
UnnormedPrior1=Tanh(UnnormedPrior0)
scaleFPrior=Const(2)
UnnormedPrior=Scale(scaleFPrior, UnnormedPrior1)
Means=BFF(L3, SizeOfAllMeans, HDim)
#it's important to control the range of logstddev
LogStddev0=BFF(L3, GMMComponent, MDim)
LogStddev1=Tanh(LogStddev0)
scaleFStddev=Const(2)
LogStddev=Scale(scaleFStddev, LogStddev1)
LL=GMMLL(UnnormedPrior, Means, LogStddev, labels)
LLAll=SumElements(LL)
NLL=Negate(LLALL, tag=Criteria)
# rootNodes defined here
OutputNodes=(NLL)
EvalNodes=(NLL)
]
ndlFull=[
SDim=784
HDim=256
LDim=10
B0=Parameter(HDim)
W0=Parameter(HDim, SDim)
features=Input(SDim)
labels=Input(LDim)
mean0=Mean(features)
invstd0=InvStdDev(features)
normedFeat=PerDimMeanVarNormalization(features, mean0, invstd0)
Times1=Times(W0, normedFeat)
Plus1=Plus(Times1, B0)
RL1=RectifiedLinear(Plus1)
B1=Parameter(LDim, 1)
W1=Parameter(LDim, HDim)
Times2=Times(W1, RL1)
Plus2=Plus(Times2, B1)
#LogSM=LogSoftmax(Plus2)
#SM=Exp(LogSM)
#SM=Softmax(Plus2)
#CE=CrossEntropy(labels, SM)
CE=CrossEntropyWithSoftmax(labels, Plus2)
ErrPredict=ErrorPrediction(labels, Plus2)
FeatureNodes=(features)
LabelNodes=(labels)
CriteriaNodes=(CE)
EvalNodes=(ErrPredict)
OutputNodes=(Plus2)
]
ndlMacroDefine=[
# Macro definitions
#inline Rectified Linear Feed Forward
RFF_R(x1, w1, b1)=RectifiedLinear(Plus(Times(w1,x1),b1))
#Feed Forward
FF(X1, W1, B1)
[
T=Times(W1,X1);
P=Plus(T, B1);
]
#Base feed Forward network, defines Bias and wieght parameters
BFF(in, rows, cols)
{
B=Parameter(rows, init=fixedvalue, value=0)
W=Parameter(rows, cols)
FF = FF(in, w, b)
}
#RectifiedLinear Base Feed Forward
RBFF(in,rowCount,colCount)
{
BFF = BFF(in, rowCount, colCount);
RL = RectifiedLinear(BFF);
}
#Rectified Linear Feed Forward
RFF(X2,W2,B2)=[
FF = FF(X2, W2, B2);
RL = RectifiedLinear(FF);
]
#RectifiedLinear Feed Forward with Dropout
RFFD(X3,W3,B3)
{
RFF=RFF(X3, W3, B3)
DO=Dropout(RFF)
}
#Sigmoid Base Feed Forward
SBFF(in,rowCount,colCount)
{
BFF = BFF(in, rowCount, colCount);
S = Sigmoid(BFF);
}
#Sigmoid Feed Forward
SFF(X2,W2,B2)=[
FF = FF(X2, W2, B2);
S = Sigmoid(FF);
]
#Sigmoid Feed Forward with Dropout
SFFD(X3,W3,B3)
{
SFF=SFF(X3, W3, B3)
DO=Dropout(SFF)
}
#SoftMax Feed Forward
SMFF(x,y,z, labels)
{
FF = FF(x,y,z);
SM = CrossEntropyWithSoftmax(labels, FF)
}
#SoftMax Base Feed Forward
SMBFF(x,r,c, labels)
{
BFF = BFF(x,r,c);
SM = CrossEntropyWithSoftmax(labels, BFF)
}
RFFD_R(x1, w1, b1)={Dropout(RectifiedLinear(Plus(Times(w1,x1),b1)))}
WtObjFcn(o1,w1,o2,w2)
{
A1=Constant(w1)
A2=Constant(w2)
T1=Times(A1,o1)
T2=Times(A2,o2)
O=Plus(T1,T2)
}
]
ndlMacroUse2=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=256
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# compute mean/stddev for mean/stddev normalization
fmean = Mean(features);
finvstd=InvStdDev(features)
finput=PerDimMeanVarNormalization(features, fmean, finvstd)
# Layer operations
L1 = RBFF(finput, HDim, SDim)
L2 = RBFF(L1, HDim, HDim)
L3 = RBFF(L2, HDim, HDim)
CE = SMBFF(L3, LDim, HDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# rootNodes defined here
OutputNodes=(CE.BFF)
]
ndlMacroUseCNNSubSample2ZeroPadding=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
#convolution
inputWidth=28
inputHeight=28
inputChannels=1
kernelWidth=5
kernelHeight=5
outputChannels=24
horizontalSubsample=2
verticalSubsample=2
# weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
cvweight=Parameter(outputChannels, 25)
cv = Convolution(cvweight, features, inputWidth, inputHeight, inputChannels, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=true)
#one bias per channel
cvbias=Parameter(outputChannels, 1)
cvplusbias=Plus(cv, cvbias);
nlcv=Sigmoid(cvplusbias);
#outputWidth = (m_inputWidth-1)/m_horizontalSubsample + 1;
outputWidth=14
#outputHeight = (m_inputHeight-1)/m_verticalSubsample + 1;
outputHeight=14
#maxpooling
windowWidth=2
windowHeight=2
stepW=2
stepH=2
mp=MaxPooling(nlcv, outputWidth, outputHeight, outputChannels, windowWidth, windowHeight, stepW, stepH)
#m_outputWidth = (m_inputWidth-m_windowWidth)/m_horizontalSubsample + 1;
mpoutputWidth=7
#m_outputHeight = (m_inputHeight-m_windowHeight)/m_verticalSubsample + 1;
mpoutputHeight=7
#m_outputSizePerSample = m_outputWidth * m_outputHeight * m_channels;
mpoutputSizePerSample=1176
# Layer operations
HDim=128
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# rootNodes defined here
OutputNodes=(CE.BFF)
]
ndlMacroUseCNNSubSample2=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
LDim=10
features=ImageInput(28,28, 1, tag=feature)
labels=Input(LDim, tag=label)
#convolution
inputWidth=28
inputHeight=28
inputChannels=1
kernelWidth=5
kernelHeight=5
outputChannels=24
horizontalSubsample=2
verticalSubsample=2
# weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
cvweight=Parameter(outputChannels, 25)
cv = Convolution(cvweight, features, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=false)
#one bias per channel
cvbias=Parameter(outputChannels, 1)
cvplusbias=Plus(cv, cvbias);
nlcv=Sigmoid(cvplusbias);
#nlcv=Sigmoid(cv);
#outputWidth = (m_inputWidth-m_kernelWidth)/m_horizontalSubsample + 1;
outputWidth=12
#outputHeight = (m_inputHeight-m_kernelHeight)/m_verticalSubsample + 1;
outputHeight=12
#maxpooling
windowWidth=2
windowHeight=2
stepW=2
stepH=2
mp=MaxPooling(nlcv, windowWidth, windowHeight, stepW, stepH)
#m_outputWidth = (m_inputWidth-m_windowWidth)/m_horizontalSubsample + 1;
mpoutputWidth=6
#m_outputHeight = (m_inputHeight-m_windowHeight)/m_verticalSubsample + 1;
mpoutputHeight=6
#m_outputSizePerSample = m_outputWidth * m_outputHeight * m_channels;
mpoutputSizePerSample=864
# Layer operations
HDim=128
L1 = SBFF(mp, HDim, mpoutputSizePerSample)
CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# rootNodes defined here
OutputNodes=(CE.BFF)
]
ndlMacroUseCNN=[
# constants defined
# Sample, Hidden, and Label dimensions
inputWidth=28
inputHeight=28
inputChannels=1
SDim=784
LDim=10
features=ImageInput(inputWidth, inputHeight, inputChannels, 1, tag=feature)
labels=Input(LDim, tag=label)
#convolution
kernelWidth=5
kernelHeight=5
outputChannels=24
horizontalSubsample=1
verticalSubsample=1
# weight[outputChannels, kernelWidth * kernelHeight * inputChannels]
# cvweight=Parameter(outputChannels, 25)
cvweight=Parameter(0, 0) #CNTK will derive the dimension automatically
cv = Convolution(cvweight, features, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample, zeroPadding=false)
#one bias per channel
cvbias=Parameter(outputChannels, 1)
cvplusbias=Plus(cv, cvbias);
nlcv=Sigmoid(cvplusbias);
#maxpooling
windowWidth=2
windowHeight=2
stepW=2
stepH=2
mp=MaxPooling(nlcv, windowWidth, windowHeight, stepW, stepH)
mpoutputSizePerSample=0
# Layer operations
HDim=128
L1 = SBFF(mp, HDim, 0)
CE = SMBFF(L1, LDim, HDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# rootNodes defined here
OutputNodes=(CE.BFF)
]
ndlMacroUseNoBase=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=256
LDim=10
# Weight, Bias, features and label inputs
B0=Parameter(HDim, init=fixedvalue, value=0)
W0=Parameter(HDim, SDim, init=uniform)
B1=Parameter(HDim, init=fixedvalue, value=0)
W1=Parameter(HDim, HDim, init=uniform)
B2=Parameter(HDim, init=fixedvalue, value=0)
W2=Parameter(HDim, HDim, init=uniform)
BTop=Parameter(LDim, init=fixedvalue, value=0)
WTop=Parameter(LDim, HDim, init=uniform)
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# Layer operations
L1 = RFFD(features, HDim, SDim)
L2 = RFFD(L1, HDim, HDim)
L3 = RFFD(L2, HDim, HDim)
CE = SMFF(L3, LDim, SDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# rootNodes defined here
OutputNodes=(CE.BFF)
]
ndlMacroUseMask=[
# constants defined
# Sample, Hidden, and Label dimensions
SDim=784
HDim=256
LDim=10
features=Input(SDim, tag=feature)
labels=Input(LDim, tag=label)
# compute mean/stddev for mean/stddev normalization
fmean = Mean(features);
finvstd=InvStdDev(features)
finput=PerDimMeanVarNormalization(features, fmean, finvstd)
# Layer operations
L1 = RBFF(finput, HDim, SDim)
L2 = RBFF(L1, HDim, HDim)
L3 = RBFF(L2, HDim, HDim)
# mask layers
ML1=RBFF(finput, HDim, SDim)
ML2=RBFF(ML1, HDim, HDim)
# mask
L4=ElementTimes(L3, ML2)
CE = SMBFF(L4, LDim, HDim, labels, tag=Criteria)
Err=ErrorPrediction(labels, CE.BFF, tag=Eval)
# output nodes
Prior=Mean(labels)
LP=Log(Prior)
O=Minus(CE.BFF, LP)
# rootNodes defined here
OutputNodes=(O)
CriteriaNodes=(CE)
EvaluationNodes(Err)
]