https://github.com/sannetenoever/STiMCON
Raw File
Tip revision: 873a2bf5c79fe2f828e72e14ef74db409d387854 authored by Sanne ten Oever on 23 July 2021, 08:31:05 UTC
Update README.md
Tip revision: 873a2bf
RNN_subFun.cpython-37.pyc
B

bÒÕ`&:ã@sÞdZddlZddlZddlZddlmZddlmZm	Z	m
Z
mZmZm
Z
ddlZddlmZddlmZddlmZdd	d
„Zdd„Zd
d„Zddd„Zgdgfdd„Zddd„ZGdd„dejjƒZGdd„dejjƒZdS)z6
Created on Mon Mar 16 10:41:49 2020

@author: sanoev
éN)Ú
Sequential)ÚDenseÚDropoutÚ
ActivationÚLSTMÚ
BidirectionalÚ	Embedding)Úpadded_everygram_pipeline)ÚMLE)Ú
VocabularyécCsÖtdƒg}g}g}x>tj t|ƒ¡D]*}| |¡| ||¡| ||¡q&Wtt|ƒd|dƒ}|d|…||d…}}	|d|…||d…}
}tdt|ƒƒtdt|ƒƒ||
f|	|f||ffS)NzShuffling sentencesgð?gY@zSize of training set = %dzSize of test set = %d)ÚprintÚnpÚrandomÚpermutationÚlenÚappendÚint)Zsentences_originalZ
next_originalZpercentage_testZ
tmp_sentencesZtmp_sentences_inxZ
tmp_next_wordÚiZ	cut_indexÚx_trainÚx_testÚy_trainÚy_test©rú/data/workspaces/lacns/workspaces/lacns-sanoev/working_data/Experiments/E002_DNN_N400/01_CompModel/01_Scripts/Git/RNN_subFun.pyÚshuffle_and_split_training_sets
rccs d}x–tj||ftjd}tj|tjd}xbt|ƒD]V}x0t||t|ƒƒD]\}	}
||
|||	f<qNW|||t|ƒ||<|d}q4W||fVqWdS)Nr)Údtypeé)rÚzerosÚint32ÚrangeÚ	enumerater)Ú
sentence_listÚnext_word_listÚ
batch_sizeÚSEQUENCE_LENÚword_indicesÚindexÚxÚyrÚtÚwrrrÚ	generator)sr,cCs”d}tj||ftjd}tj|tjd}xbt|ƒD]V}x0t||t|ƒƒD]\}	}
||
|||	f<qLW|||t|ƒ||<|d}q2W||fS)Nr)rr)rrrr r!r)r"r#r$r%r&r'r(r)rr*r+rrrÚgenerator_return6sr-c	sVd}‡fdd„t|ƒDƒ}dd„|Dƒ}dd„|Dƒ}t|ƒ}t|ƒ}| ||¡|S)Nrcsg|]\}}|ˆ|g‘qSrr)Ú.0ÚitÚsen)Ú
next_wordsrrú
<listcomp>Dsz(cal_perplexity_model.<locals>.<listcomp>cSsg|]}tj|ddd‘qS)F)Ú	pad_rightÚpad_left)ÚnltkÚbigrams)r.r*rrrr2EscSsg|]}|D]}|‘qqSrr)r.ÚsentÚwordrrrr2Fs)r!rr
Úfit)	Ú	sentencesr1ZNgramÚnÚtokenized_textZ
train_dataÚwordsZpadded_vocabZ	NLTKmodelr)r1rÚcal_perplexity_modelAsr>gš™™™™™É?cCsºtdƒtƒ}t|ƒdkr2| tt|ƒdd¡n<| tt|ƒt|dƒd¡|jd |g¡d|jd_| tt	dƒƒ¡|dkr–| t
|ƒ¡| tt|ƒƒ¡| tdƒ¡|S)NzBuild model...ri,)Ú	input_dimÚ
output_dimFé€Úsoftmax)
r
rrÚaddrÚlayersÚset_weightsÚ	trainablerrrrr)r=r%ÚdropoutZEmbLayÚmodelrrrÚ	get_modelLsrIçð?cCsPt |¡ d¡}t |¡|}t |¡}|t |¡}tj d|d¡}t |¡S)NÚfloat64r)	rÚasarrayÚastypeÚlogÚexpÚsumrÚmultinomialÚargmax)ÚpredsÚtemperatureZ	exp_predsZprobasrrrÚsample^s
rUc@seZdZdd„Zdd„ZdS)ÚCustomModelEvalcCs@||_||_||_|	|_||_||_||_||_||_|
|_	dS)N)
r:r1Úsentences_testÚnext_words_testÚ
examples_fileÚ
meanProb_filer%r&Úindices_wordÚbigram)ÚselfrYrZr%r&r[r:r1rWrXÚbigramModelrrrÚ__init__jszCustomModelEval.__init__c*
sâˆj d|¡tj tˆjˆjƒ¡}ˆjˆj|}xødD]ð}|}ˆj dt|ƒd¡ˆj dd 	|¡d¡ˆj d 	|¡¡x–t
dƒD]Š}t d	ˆjf¡}x&t
|ƒD]\}	}
ˆj|
|d
|	f<q²Wˆjj|d
dd
}t||ƒ}ˆj|}
|d	d…}| |
¡ˆj d|
¡q”Wˆj d¡q>Wˆj d¡ˆj ¡d
}ttˆjƒ|ƒ}t g¡}t g¡‰xôt
dƒD]è}||||d	g}||}|dkr¸tˆjƒd	|d	<tˆj|d
|d	…ˆj|d
|d	…tˆj|d
|d	…ƒd
ˆjƒ\}}ˆj |¡}t ˆt |d	¡¡‰x8t
|jd
ƒD]&}t ||dd…|||f¡}q8Wq|W‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}d
}d
}d
}d
}d
}x„t
tˆjƒƒD]r}|||}ˆj ||¡} ˆj ||¡}!t | ¡dkr,|| }|d	}t |!¡dkrÜ||!}|d	}qÜW|tˆjƒ}"||}#||}$tˆjˆjtˆjƒd
ˆjƒ\}}ˆj |¡}t |d	¡‰‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}d
}d
}d
}d
}%d
}&xŒt
tˆjƒƒD]z}|||||f}ˆj ||¡} ˆj ||¡}!t | ¡dkrt|| }|%d	}%t |!¡dkr||!}|&d	}&qW|tˆjƒ}'||%}(||&})ˆj  d|¡ˆj  dt|"ƒdttˆjƒƒd¡ˆj  dt|'ƒdttˆjƒƒd¡ˆj  dt|#ƒdt|tˆjƒƒd¡ˆj  dt|(ƒdt|%tˆjƒƒd¡ˆj  d t|$ƒdt|tˆjƒƒd¡ˆj  d!t|)ƒdt|&tˆjƒƒd¡ˆj  ¡dS)"Nz'
----- Generating text after Epoch: %d
)g333333Ó?gš™™™™™Ù?gà?g333333ã?gffffffæ?z----- Diversity:Ú
z----- Generating with seed:
"ú z"
é2rr)ÚverbosezQ================================================================================
é
éécs$g|]\}}|ˆjˆ|g‘qSr)r[)r.r/r0)Ú
indMaxWordr]rrr2©sz0CustomModelEval.on_epoch_end.<locals>.<listcomp>cSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2ªscs*g|]"\}}|dgˆjˆ|g‘qS)éÿÿÿÿ)r[)r.r/r0)rgr]rrr2«scSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2¬sFcs$g|]\}}|ˆjˆ|g‘qSr)r[)r.r/r0)rgr]rrr2ÄscSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2Åscs*g|]"\}}|dgˆjˆ|g‘qS)rh)r[)r.r/r0)rgr]rrr2ÆscSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2Çsz'
----- Prop/Per/Bigram after Epoch: %d
z	PropTest	ú	zPropVal	zPerplexicityTest	zPerplexityVal	zBigramTest	z
BigramVal	)!rYÚwriterrÚrandintrr:rWÚstrÚjoinr rr%r!r&rHÚpredictrUr[rÚflushrÚarrayr-r1rRÚshaper\Ú
perplexityÚmathÚisinfrXrZ)*r]ÚepochÚlogsÚ
seed_indexÚseedÚ	diversityÚsentencerÚx_predr*r8rSÚ
next_indexÚ	next_wordÚbsZstepTZAPZbvZrangeVrCÚ
modelInputÚlabelsr/r<Ú	test_dataÚtokenized_text_biÚtest_data_bigramÚadprÚadperÚadbiÚcntperÚcntbiÚperÚbiÚmeanPÚmeanPerÚmeanBiÚ	cntperValÚcntbiValÚmeanPValÚ
meanPerValÚ	meanBiValr)rgr]rÚon_epoch_endvs´







P, **....zCustomModelEval.on_epoch_endN)Ú__name__Ú
__module__Ú__qualname__r_r“rrrrrVisrVc@seZdZdd„Zdd„ZdS)ÚCustomModelEval_v2cCs@||_||_||_|	|_||_||_||_||_||_|
|_	dS)N)
r:r1rWrXrYrZr%r&r[r\)r]rYrZr%r&r[r:r1rWrXr^rrrr_äszCustomModelEval_v2.__init__c$sΈj d|¡tj tˆjˆjƒ¡}ˆjˆj|}xødD]ð}|}ˆj dt|ƒd¡ˆj dd 	|¡d¡ˆj d 	|¡¡x–t
dƒD]Š}t d	ˆjf¡}x&t
|ƒD]\}	}
ˆj|
|d
|	f<q²Wˆjj|d
dd
}t||ƒ}ˆj|}
|d	d…}| |
¡ˆj d|
¡q”Wˆj d¡q>Wˆj d¡ˆj ¡tˆjˆjtˆjƒd
ˆjƒ\}}ˆj |¡}t |d	¡‰‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}‡‡fdd„t
ˆjƒDƒ}dd„|Dƒ}d
}d
}d
}d
}d
}xŒt
tˆjƒƒD]z}|||||f}ˆj ||¡}ˆj ||¡}t |¡dkrL||}|d	}t |¡dkrô||}|d	}qôW|tˆjƒ}||}||}d
}d
} d
}!d
}"d
}#ˆj d|¡ˆj dt| ƒdttˆjƒƒd¡ˆj dt|ƒdttˆjƒƒd¡ˆj dt|!ƒdt|tˆjƒƒd¡ˆj dt|ƒdt|tˆjƒƒd¡ˆj dt|"ƒdt|#tˆjƒƒd¡ˆj dt|ƒdt|tˆjƒƒd¡ˆj ¡dS)Nz'
----- Generating text after Epoch: %d
)g333333Ó?gš™™™™™Ù?gà?g333333ã?gffffffæ?z----- Diversity:r`z----- Generating with seed:
"raz"
rbrr)rczQ================================================================================
rdcs$g|]\}}|ˆjˆ|g‘qSr)r[)r.r/r0)rgr]rrr2sz3CustomModelEval_v2.on_epoch_end.<locals>.<listcomp>cSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2scs*g|]"\}}|dgˆjˆ|g‘qS)rh)r[)r.r/r0)rgr]rrr2scSsg|]}tj|ddd‘qS)F)r3r4)r5r6)r.r*rrrr2sFz'
----- Prop/Per/Bigram after Epoch: %d
z	PropTest	rizPropVal	zPerplexicityTest	zPerplexityVal	zBigramTest	z
BigramVal	)rYrjrrrkrr:rWrlrmr rr%r!r&rHrnrUr[rror-rXrRr\rrrsrtrZr1)$r]rurvrwrxryrzrr{r*r8rSr|r}rr€r<rr‚rƒr„r…r†rŽrr/r‰rŠrr‘r’r‡r‹rŒrrˆr)rgr]rr“ðsv




 **....zCustomModelEval_v2.on_epoch_endN)r”r•r–r_r“rrrrr—ãsr—)r)r)rJ)Ú__doc__rsÚnumpyrÚkerasZkeras.modelsrZkeras.layersrrrrrrr5Znltk.lm.preprocessingr	Znltk.lmr
rrr,r-r>rIrUÚ	callbacksÚCallbackrVr—rrrrÚ<module>s" 



z
back to top