https://github.com/wasiur/dynamic_survival_analysis
Tip revision: 2a9c54080481714d1ac48a92e04bf496981969a4 authored by Henry.L on 17 August 2020, 13:19:28 UTC
Update README2.md
Update README2.md
Tip revision: 2a9c540
dsacore.py
## Import packages
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import seaborn as sns
import scipy as sc
from scipy.integrate import solve_ivp
from scipy.interpolate import interp1d
from scipy.optimize import minimize
from scipy.stats import stats
from scipy.stats import gamma, beta, norm, uniform, expon, gaussian_kde
from scipy.integrate import cumtrapz
from scipy.optimize import basinhopping, dual_annealing, shgo, differential_evolution
from tabulate import tabulate
from scipy.special import lambertw
from numpy.random import RandomState
rand = RandomState()
try:
import cPickle as pickle
except ModuleNotFoundError:
import pickle
import pystan
from mycolours import *
def my_plot_configs():
plt.style.use('seaborn-bright')
plt.rcParams["figure.frameon"] = False
plt.rcParams['ps.useafm'] = True
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['text.usetex'] = True
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Helvetica'
plt.rcParams['axes.labelweight'] = 'bold'
def fig_save(fig, Plot_Folder, fname):
# fig.savefig(os.path.join (Plot_Folder, fname),dpi=300)
fig.savefig(os.path.join (Plot_Folder, fname + "." + 'pdf'),
format='pdf', transparent=True)
def euler1d(odefun, endpoints, ic=1.0):
timepoints = np.linspace(endpoints[0], endpoints[1], 1000)
stepsize = timepoints[1] - timepoints[0]
sol = np.zeros(len(timepoints), dtype=np.float64)
sol[0] = ic
for i in range(1,len(timepoints)):
t = timepoints[i-1]
y = sol[i-1]
sol[i] = sol[i-1] + np.float64(odefun(t,y)) * stepsize
return timepoints, sol
class DSA():
def __init__(self, df=None, a=0.4, b=0.6, rho=1e-6, parent=None, **kwargs):
self.df = df
self.a = a
self.b = b
self.rho = rho
self.parent = parent
self.T = np.ceil(self.df['infection'].max())
if kwargs.get('bounds') is None:
self.bounda = (0.1, 1.0)
self.boundb = (0.1, 1.0)
self.boundrho = (1e-9, 1e-3)
else:
self.bounda, self.boundb, self.boundrho = kwargs.get('bounds')
self.bounds = [self.bounda, self.boundb, self.boundrho]
if kwargs.get('priordist') is None:
a_prior = uniform(loc=self.bounda[0],
scale=self.bounda[1] - self.bounda[0])
b_prior = uniform(loc=self.boundb[0],
scale=self.boundb[1] - self.boundb[0])
r_prior = uniform(loc=self.boundrho[0],
scale=self.boundrho[1] - self.boundrho[0])
self.priordist = [a_prior, b_prior, r_prior]
else:
self.priordist = kwargs.get('priordist')
# if kwargs.get('proposaldist') is None:
# self.proposaldist = lambda theta: self.priordist
# else:
# self.proposaldist = kwargs.get('proposaldist')
if kwargs.get('timepoints') is None:
self.timepoints = np.linspace(0.0, self.T, 1000)
else:
self.timepoints = kwargs.get('timepoints')
self.fits = None
self.g = None
self.offset = None
self.data = None
self.bounds_gamma = None
if kwargs.get('mh_chains') is None:
self.mh_chains = None
else:
self.mh_chains = kwargs.get('mh_chains')
# if kwargs.get('stan_model') is None:
# self.stan_model = None
# else:
# self.stan_model = kwargs.get('stan_model')
# if kwargs.get('stan_fit') is None:
# self.stan_fit = None
# else:
# self.stan_fit = kwargs.get('stan_fit')
def proposaldist(self, theta):
return self.priordist
@classmethod
def poisson_ode_fun(cls,t, S, a, b, rho):
dsdt = [np.float64(- a * S * np.log(S, dtype=np.float64) - b * (S - S * S) - b * rho * S)]
return dsdt
@classmethod
def data_likelihood(cls, df, theta):
a, b, rho = theta
S0 = [1.0]
T = np.max(df.values)
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
timepoints, sol = euler1d(odefun=odefun, endpoints=[0.0, T])
S = interp1d(timepoints, sol)
smax = S(T)
factor = 1 - smax
Z = 1
j = 0
for x in df.values:
s = S(x)
if s > 0:
z = (a * s * np.log(s, dtype=np.float64) + b * (s - s ** 2) + b * rho * s) / factor
Z *= z
return Z
@classmethod
def data_loglikelihood(cls, df, theta):
a, b, rho = theta
S0 = [1.0]
T = np.max(df.values)
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
timepoints, sol = euler1d(odefun=odefun, endpoints=[0.0, T])
S = interp1d(timepoints, sol)
smax = S(T)
factor = 1 - smax
Z = 0
j = 0
for x in df.values:
s = S(x)
if s > 0:
z = (a * s * np.log(s, dtype=np.float64) + b * (s - s ** 2) + b * rho * s) / factor
if z > 0:
Z += np.log(z, dtype=np.float64)
j += 1
return Z
@classmethod
def prior_density(cls, p, theta):
a, b, rho = theta
res = p[0].pdf(a) * p[1].pdf(b) * p[2].pdf(rho)
return res
@classmethod
def log_prior(cls, p, theta):
a, b, rho = theta
res = p[0].logpdf(a) + p[1].logpdf(b) + p[2].logpdf(rho)
# res = p[0].pdf(a) * p[1].pdf(b) * p[2].pdf(rho)
if res > 0:
return res
else:
return -np.inf
@classmethod
def draw_from_prior(cls, p):
return [p[0].rvs(), p[1].rvs(), p[2].rvs()]
@classmethod
def posterior_likelihood(cls, df, priordist, theta):
dl = DSA.data_likelihood(df, theta=theta)
pl = DSA.prior_density(p=priordist, theta=theta)
return dl * pl
@classmethod
def neg_log_posterior(cls, df, priordist, theta):
dl = DSA.data_loglikelihood(df, theta=theta)
lp = DSA.log_prior(p=priordist, theta=theta)
res = - dl - lp
return res
@classmethod
def log_posterior(cls, df, priordist, theta):
dl = DSA.data_loglikelihood(df, theta=theta)
lp = DSA.log_prior(p=priordist, theta=theta)
res = dl + lp
return res
@classmethod
def draw_proposal(cls, proposal):
ss = [proposal[0].rvs(), proposal[1].rvs(), proposal[2].rvs()]
return ss
@classmethod
def proposal_loglikelihood(cls, proposal, theta):
ss = [proposal[0].logpdf(theta[0]),
proposal[1].logpdf(theta[1]),
proposal[2].logpdf(theta[2])]
return np.sum(ss)
@classmethod
def proposal_likelihood(cls, proposal, theta):
ss = [proposal[0].pdf(theta[0]),
proposal[1].pdf(theta[1]),
proposal[2].pdf(theta[2])]
return ss[0]*ss[1]*ss[2]
@classmethod
def accept_prob(cls, df, priordist, proposal, theta1, theta2):
l1 = DSA.posterior_likelihood(df, priordist, theta1)
l2 = DSA.posterior_likelihood(df, priordist, theta2)
g21 = proposal(theta = theta2)
p21 = DSA.proposal_likelihood(proposal=g21, theta=theta1)
g12 = proposal(theta = theta1)
p12 = DSA.proposal_likelihood(proposal=g12, theta=theta2)
if l1*p12 > 0:
a = min(1.0, (l2*p21)/(l1*p12))
else:
a = 1.0
return a
@classmethod
def mh(cls, df, p, proposal, burnin=10 ** 3, chain_length=10 ** 4, thinning=10):
chain = np.zeros((chain_length, 3), dtype=np.float)
chain[0] = DSA.draw_from_prior(p)
for i in np.arange(1, chain_length):
current_state = chain[i-1]
prop = proposal(theta=current_state)
new = DSA.draw_proposal(prop)
a = DSA.accept_prob(df=df, priordist=p, proposal=proposal, theta1=current_state, theta2=new)
if uniform.rvs() < a:
chain[i] = new
else:
chain[i] = chain[i - 1]
after_burnin = chain[burnin:chain_length - 1]
idx = np.arange(0, chain_length - burnin, thinning)
return after_burnin[idx]
@property
def c(self):
return self.b * self.rho
@property
def R0(self):
return self.b / self.a
@property
def invrho(self):
return 1.0/self.rho
@property
def delta(self):
if self.g is not None:
return self.a - self.g
else:
return 0
@property
def tau(self):
return ((self.R0 + lambertw(-self.R0 * np.exp(-self.R0 * (1 + self.rho)))) / self.R0).real
@property
def kT(self):
if self.parent is None:
return self.df['infection'].shape[0]
else:
return self.parent.kT
@property
def rescale(self):
return 1 - self.S(self.T)
@property
def n(self):
return self.kT / self.rescale
@property
def sT(self):
return self.n - self.kT
@property
def kinfty(self):
return self.tau * self.n
@property
def sinfty(self):
return (1 - self.tau) * self.n
@property
def theta(self):
return [self.a, self.b, self.rho]
@property
def S(self):
a, b, rho = self.theta
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
timepoints, sol = euler1d(odefun=odefun, endpoints=[0.0, self.T])
S = interp1d(timepoints, sol)
return S
def var_T(self):
if self.fits is not None:
return np.var(list(f.T for f in self.fits))
else:
return 0.0
def var_a(self):
if self.fits is not None:
return np.var(list(f.a for f in self.fits))
else:
return 0.0
def var_b(self):
if self.fits is not None:
return np.var(list(f.b for f in self.fits))
else:
return 0.0
def var_c(self):
if self.fits is not None:
return np.var(list(f.c for f in self.fits))
else:
return 0.0
def cov_abc(self):
if self.fits is not None:
fitted_parms = np.zeros((len(self.theta), len(self.fits)), dtype=np.float64)
fitted_parms[0] = list(f.a for f in self.fits)
fitted_parms[1] = list(f.b for f in self.fits)
fitted_parms[2] = list(f.c for f in self.fits)
return np.cov(fitted_parms)
else:
return np.zeros((len(self.theta), len(self.fits)), dtype=np.float64)
def cov_abr(self):
if self.fits is not None:
fitted_parms = np.zeros((len(self.theta), len(self.fits)), dtype=np.float64)
fitted_parms[0] = list(f.a for f in self.fits)
fitted_parms[1] = list(f.b for f in self.fits)
fitted_parms[2] = list(f.rho for f in self.fits)
return np.cov(fitted_parms)
else:
return np.zeros((len(self.theta), len(self.fits)), dtype=np.float64)
def var_R0(self):
if self.fits is not None:
return np.var(list(f.R0 for f in self.fits))
else:
return 0.0
def var_rho(self):
if self.fits is not None:
return np.var(list(f.rho for f in self.fits))
else:
return 0.0
def var_invrho(self):
if self.fits is not None:
return np.var(list(f.invrho for f in self.fits))
else:
return 0.0
def var_tau(self):
if self.fits is not None:
return np.var(list(f.tau for f in self.fits))
else:
return 0.0
def var_n(self):
if self.fits is not None:
return np.var(list(f.n for f in self.fits))
else:
return 0.0
def var_kT(self):
if self.fits is not None:
return np.var(list(f.kT for f in self.fits))
else:
return 0.0
def var_sT(self):
if self.fits is not None:
return np.var(list(f.sT for f in self.fits))
else:
return 0.0
def var_kinfty(self):
if self.fits is not None:
return np.var(list(f.kinfty for f in self.fits))
else:
return 0.0
def var_sinfty(self):
if self.fits is not None:
return np.var(list(f.sinfty for f in self.fits))
else:
return 0.0
def mean_T(self):
if self.fits is not None:
return np.mean(list(f.T for f in self.fits))
else:
return 0.0
def mean_a(self):
if self.fits is not None:
return np.mean(list(f.a for f in self.fits))
else:
return 0.0
def mean_b(self):
if self.fits is not None:
return np.mean(list(f.b for f in self.fits))
else:
return 0.0
def mean_c(self):
if self.fits is not None:
return np.mean(list(f.c for f in self.fits))
else:
return 0.0
def mean_R0(self):
if self.fits is not None:
return np.mean(list(f.R0 for f in self.fits))
else:
return self.R0
def mean_rho(self):
if self.fits is not None:
return np.mean(list(f.rho for f in self.fits))
else:
return self.rho
def mean_invrho(self):
if self.fits is not None:
return np.mean(list(f.invrho for f in self.fits))
else:
return self.invrho
def mean_tau(self):
if self.fits is not None:
return np.mean(list(f.tau for f in self.fits))
else:
return self.tau
def mean_n(self):
if self.fits is not None:
return np.mean(list(f.n for f in self.fits))
else:
return self.n
def mean_kT(self):
if self.fits is not None:
return np.mean(list(f.kT for f in self.fits))
else:
return self.kT
def mean_sT(self):
if self.fits is not None:
return np.mean(list(f.sT for f in self.fits))
else:
return self.sT
def mean_kinfty(self):
if self.fits is not None:
return np.mean(list(f.kinfty for f in self.fits))
else:
return self.kinfty
def mean_sinfty(self):
if self.fits is not None:
return np.mean(list(f.sinfty for f in self.fits))
else:
return self.sinfty
def fit_till_success(self, objfun, x0, bounds,
ifglobal=True):
if ifglobal:
flag = True
while flag:
minobj = differential_evolution(objfun,
bounds=bounds)
if minobj.success:
flag = False
else:
x0 = DSA.draw_from_prior(p=self.priordist)
else:
flag = True
while flag:
minobj = minimize(objfun, x0=tuple(x0),
bounds=bounds,
options={'disp': False})
if minobj.success:
flag = False
else:
x0 = DSA.draw_from_prior(p=self.priordist)
return minobj
def fit(self, N=None, laplace=True, summary=False,
ifSave=False, fname='summary.tex'):
if N is None:
self.data = self.df['infection']
elif N > self.df.size:
self.data = self.df['infection']
N = self.df.size
else:
self.data = self.df['infection'].sample(N, replace=True)
x0 = self.theta
bounds = [self.bounda, self.boundb, self.boundrho]
if laplace:
objfun = lambda theta: DSA.neg_log_posterior(df=self.data, priordist=self.priordist, theta=theta)
minobj = self.fit_till_success(objfun, x0=x0, bounds=bounds, ifglobal=True)
else:
objfun = lambda theta: -DSA.data_loglikelihood(self.data, theta=theta)
minobj = self.fit_till_success(objfun, x0=x0, bounds=bounds, ifglobal=True)
self.a, self.b, self.rho = minobj.x
# print summary
if summary:
print(minobj)
self.summary()
# self.summary()
if ifSave:
self.summary(ifSave=True, fname=fname)
return self
def simulate_from_model(self, N):
'''
simulate N infections from the survival curve of the fitted model using inverse sampling
'''
unis = rand.uniform(low=self.S(self.T), high=1, size=N)
vals = np.linspace(0, self.T, 10000)
Ss = np.asarray(list(self.S(x) for x in vals))
values = []
for u in unis:
i = np.argmax(Ss - u < 0)
if i > 0 and i < vals.size - 1:
'''
point slope form to find zero
'''
y0 = Ss[i - 1] - u
y1 = Ss[i] - u
x0 = vals[i - 1]
x1 = vals[i]
m = (y1 - y0) / (x1 - x0)
v = (m * x1 - y1) / m
else:
v = vals[i]
values.append(v)
df = pd.DataFrame(values, index=range(N), columns=['infection'])
return df
def simulate_and_fit(self, N, n, laplace=True):
'''
simulate N data from the model n times and refit
'''
fits = []
for i in range(1, n + 1):
df = self.simulate_from_model(N)
bounds = [self.bounda, self.boundb, self.boundrho]
a, b, rho = DSA.draw_from_prior(self.priordist)
fit = DSA(df=df, a=a, b=b, rho=rho,
bounds=bounds, priordist=self.priordist,
parent=self).fit(laplace=laplace)
fits.append(fit)
self.fits = fits
return self
def simulate_and_fit_parallel(self, N, n, laplace=True, threads=40):
epidemic_obj_arr = []
total_time = 0
for i in range(1, n + 1):
# print("sample", i // 100)
df = self.simulate_from_model(N)
bounds = [self.bounda, self.boundb, self.boundrho]
a, b, rho = DSA.draw_from_prior(self.priordist)
st = time.time()
epidemic_obj = DSA(df=df, a=a, b=b, rho=rho,
bounds=bounds, priordist=self.priordist,
parent=self)
total_time += time.time() - st
epidemic_obj_arr.append(epidemic_obj)
import pickle
st = time.time()
if os.path.exists("epidemic_objects_array_fitted"):
os.remove("epidemic_objects_array_fitted")
pickle.dump(epidemic_obj_arr, open("epidemic_objects_array", "wb"), protocol=3)
commandstr = "mpiexec -n " + str(threads) + " python dsa_parallel_fitting.py " + str(laplace)
os.system(commandstr)
# os.system("mpiexec -n %s python dsa_parallel_fitting.py %s" % (threads, laplace))
epidemic_objects_array_fitted = pickle.load(open("epidemic_objects_array_fitted", "rb"))
print("Total fit time %f" % ((time.time() - st) / 60.0))
self.fits = epidemic_objects_array_fitted
return self
def bayesian_fit(self, N=None, niter=5000, nchains=4):
if N is None:
self.data = self.df['infection']
N = self.df.size
elif N > self.df.size:
self.data = self.df['infection']
N = self.df.size
else:
self.data = self.df['infection'].sample(N, replace=False)
Tmax = self.data.max()
ordered_infection_data = self.data.sort_values().tolist()
pystan_data = dict(N=N,
Tmax=Tmax,
infectiontimes=ordered_infection_data,
t0=0.0)
sm = pystan.StanModel(file='DSA.stan')
fit = sm.sampling(data=pystan_data, iter=niter, chains=nchains)
print(fit)
res_a = fit.extract()['a']
res_b = fit.extract()['b']
res_rho = fit.extract()['rho']
mh_chains = pd.DataFrame({'a': res_a, 'b': res_b, 'rho': res_rho})
self.mh_chains = mh_chains
self.a = np.mean(res_a)
self.b = np.mean(res_b)
self.rho = np.mean(res_rho)
# self.stan_model = sm
# self.stan_fit = fit
# temp = DSA.mh(df=self.data,
# p=self.priordist,
# proposal=self.proposaldist,
# burnin=burnin,
# chain_length=chain_length,
# thinning=thinning)
# res_a = temp[:, 0]
# res_b = temp[:, 1]
# res_rho = temp[:, 2]
# mh_chains = pd.DataFrame({'a': res_a, 'b': res_b, 'rho': res_rho})
# self.mh_chains = mh_chains
fits = []
l = len(res_a)
for i in range(l):
a = res_a[i]
b = res_b[i]
rho = res_rho[i]
fit = DSA(df=self.df, a=a, b=b, rho=rho,
bounds=self.bounds, priordist=self.priordist,
parent=self)
fits.append(fit)
self.fits = fits
return sm, fit
def trace_plot(self):
if self.mh_chains is None:
self.bayesian_fit()
a_chain = self.mh_chains["a"]
b_chain = self.mh_chains["b"]
rho_chain = self.mh_chains["rho"]
my_plot_configs()
fig_a = plt.figure(frameon=False)
plt.plot(a_chain, color=cyans['cyan3'].get_rgb())
plt.ylabel('$a$')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
fig_b = plt.figure(frameon=False)
plt.plot(b_chain, color=cyans['cyan3'].get_rgb())
plt.ylabel('$b$')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
fig_c = plt.figure(frameon=False)
plt.plot(rho_chain, color=cyans['cyan3'].get_rgb())
plt.ylabel('$\\rho$')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
return fig_a, fig_b, fig_c
def add_fits(self, samples):
fits = []
l = np.size(samples, axis=0)
for i in range(l):
a, b, rho = samples[i]
fit = DSA(df=self.df, a=a, b=b, rho=rho,
bounds=self.bounds, priordist=self.priordist,
parent=self)
fits.append(fit)
self.fits = fits
return self
def get_histograms(self):
if self.fits is None:
raise ValueError('\n Please run the inference model first.\n')
my_plot_configs()
figa = plt.figure()
plt.hist(list(f.a for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.xlabel('$a$')
plt.ylabel('Density')
sns.despine()
# plt.title('$a$')
figb = plt.figure()
plt.hist(list(f.b for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.xlabel('$b$')
plt.ylabel('Density')
sns.despine()
# plt.title('$b$')
figc = plt.figure()
plt.hist(list(f.c for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.xlabel('$c$')
plt.ylabel('Density')
sns.despine()
# plt.title('$c$')
figR0 = plt.figure()
plt.hist(list(f.R0 for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.xlabel('$R_0$')
plt.ylabel('Density')
sns.despine()
# plt.title('$R_0$')
figrho = plt.figure()
plt.hist(list(f.rho for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.xlabel('$\\rho$')
plt.ylabel('Density')
sns.despine()
# plt.title('rho')
fign, ax = plt.subplots()
plt.hist(list(f.n for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel('$n$')
plt.ylabel('Density')
sns.despine()
figsT, ax = plt.subplots()
plt.hist(list(f.sT for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
# plt.title('$s_T$')
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel('$s_T$')
plt.ylabel('Density')
sns.despine()
figkinfty, ax = plt.subplots()
plt.hist(list(f.kinfty for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
# plt.title('$k_\infty$')
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel('$k_\infty$')
plt.ylabel('Density')
sns.despine()
figsinfty, ax = plt.subplots()
plt.hist(list(f.sinfty for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
plt.title('$s_\infty$')
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel('$s_\infty$')
plt.ylabel('Density')
sns.despine()
figsinvrho, ax = plt.subplots()
plt.hist(list(f.invrho for f in self.fits),
bins=50, density=True,
color=cyans['cyan3'].get_rgb()
)
# plt.title('1/rho')
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel('$1/\\rho$')
plt.ylabel('Density')
sns.despine()
return (figa, figb, figc, figR0, figrho, fign, figsT, figkinfty, figsinfty, figsinvrho)
def posterior_histograms(self, samples):
a_samples = samples[:,0]
b_samples = samples[:,1]
rho_samples = samples[:,2]
my_plot_configs()
fig_a = plt.figure()
plt.hist(a_samples, bins=50, density=True,
color=cyans['cyan3'].get_rgb())
plt.xlabel('$a$')
plt.ylabel('Density')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
fig_b = plt.figure()
plt.hist(b_samples, bins=50, density=True,
color=cyans['cyan3'].get_rgb())
plt.xlabel('$b$')
plt.ylabel('Density')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
fig_c = plt.figure()
plt.hist(rho_samples, bins=50, density=True,
color=cyans['cyan3'].get_rgb())
plt.xlabel('$\\rho$')
plt.ylabel('Density')
sns.despine()
# ax = plt.gca()
# ax.spines['right'].set_visible(False)
# ax.spines['top'].set_visible(False)
return fig_a, fig_b, fig_c
def summary(self, ifSave=False, fname=None):
headers = ['Parameter', 'Name', 'MLE', 'Mean', 'StdErr']
table = [['T', 'final time', self.T, None if self.fits is None else self.mean_T(),
None if self.fits is None else np.sqrt(self.var_T())],
["a", 'beta+gamma+delta', self.a, None if self.fits is None else self.mean_a(),
None if self.fits is None else np.sqrt(self.var_a())],
["b", "beta*mu", self.b, None if self.fits is None else self.mean_b(),
None if self.fits is None else np.sqrt(self.var_b())],
["c", "beta*mu*rho", self.c, None if self.fits is None else self.mean_c(),
None if self.fits is None else np.sqrt(self.var_c())],
['R0', "R-naught", self.R0, None if self.fits is None else self.mean_R0(),
None if self.fits is None else np.sqrt(self.var_R0())],
['rho', "initial fraction I", self.rho, None if self.fits is None else self.mean_rho(),
None if self.fits is None else np.sqrt(self.var_rho())],
['tau', "epidemic size", self.tau, None if self.fits is None else self.mean_tau(),
None if self.fits is None else np.sqrt(self.var_tau())],
['1-S(T)', "rescaling", self.rescale, None, None],
['n', "#S+#I", self.n, None if self.fits is None else self.mean_n(),
None if self.fits is None else np.sqrt(self.var_n())],
['kT', "#I(T)", self.kT, None if self.fits is None else self.mean_kT(),
None if self.fits is None else np.sqrt(self.var_kT())],
['sT', "#S(T)", self.sT, None if self.fits is None else self.mean_sT(),
None if self.fits is None else np.sqrt(self.var_sT())],
['kinfty', "#I(infty)", self.kinfty, None if self.fits is None else self.mean_kinfty(),
None if self.fits is None else np.sqrt(self.var_kinfty())],
['sinfty', '#S(infty)', self.sinfty, None if self.fits is None else self.mean_sinfty(),
None if self.fits is None else np.sqrt(self.var_sinfty())],
['1/rho', 'initial total population', self.invrho, None if self.fits is None else self.mean_invrho(),
None if self.fits is None else np.sqrt(self.var_invrho())],
['gamma', 'recovery rate', None if self.g is None else self.g, None, None],
['offset', 'shift parameter', None if self.offset is None else self.offset, None, None]]
print(tabulate(table, headers=headers))
# print(tabulate(table, headers=headers, tablefmt="html"))
# print(tabulate(table,headers=headers,tablefmt="latex_booktabs"))
if ifSave:
str1 = '\\documentclass{article}\n \\usepackage{booktabs} \n \\begin{document}'
str2 = '\\end{document}'
if fname == None:
fname = 'summary.tex'
with open(fname, 'w') as outputfile:
outputfile.write(str1 + tabulate(table, headers=headers, tablefmt="latex_booktabs") + str2)
return self
def predict(self, samples, df, dates, n0=1, d0=0, theta=None):
nSamples = np.size(samples, axis=0)
nDays = len(dates)
time_points = np.arange(nDays)
mean = np.zeros((nSamples, nDays), dtype=np.float)
mean_daily = np.zeros((nSamples, nDays), dtype=np.float)
if theta is not None:
theta = np.mean(samples, axis=0)
# theta = np.mean(samples, axis=0)
# n = self.n
my_plot_configs()
fig_a = plt.figure()
for i in range(nSamples):
a, b, rho = samples[i]
epi = DSA(df=self.df, a=a, b=b, rho=rho)
n = epi.n
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
t, sol = euler1d(odefun=odefun, endpoints=[0.0, nDays + 1])
S = interp1d(t, sol)
mean[i] = np.asarray(list(n * (1 - S(x)) + n0 for x in time_points))
# mean[i][0] = 1
mean_daily[i] = np.append(d0, np.diff(mean[i]))
l1, = plt.plot(dates['d'].dt.date, mean[i], '-', color=cyans['cyan1'].get_rgb(), lw=1, alpha=0.05)
m_ = np.int64(np.ceil(np.mean(mean, axis=0)))
l = np.int64(np.ceil(np.quantile(mean, q=0.025, axis=0)))
h = np.int64(np.ceil(np.quantile(mean, q=0.975, axis=0)))
n = self.n
a, b, rho = theta
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=self.a, b=self.b, rho=self.rho)
t, sol = euler1d(odefun=odefun, endpoints=[0.0, nDays + 1])
S = interp1d(t, sol)
m = np.asarray(list(n * (1 - S(x)) + n0 for x in time_points))
l2 = plt.plot(dates['d'].dt.date, m, '-', color=cyans['cyan5'].get_rgb(), lw=3)
# l2_, = plt.plot(dates['d'].dt.date, m_, '-', color=myColours['tud1d'].get_rgb(), lw=3, label='With mitigation')
l3 = plt.plot(dates['d'].dt.date, l, '--', color=cyans['cyan3'].get_rgb(), lw=1.5)
l4 = plt.plot(dates['d'].dt.date, h, '--', color=cyans['cyan3'].get_rgb(), lw=1.5)
l5 = plt.fill_between(dates['d'].dt.date, l, h, alpha=.1, color=cyans['cyan1'].get_rgb())
# l6 = plt.axvline(x=df['time'].max(), color=blues['blue1'].get_rgb(), linestyle='--')
l6 = plt.axvline(x=df['time'][self.T - 1], color=blues['blue1'].get_rgb(), linestyle='--')
l7 = plt.plot(df['time'].values, df['cum_confirm'].values, '-', color=maroons['maroon3'].get_rgb(),
lw=2)
plt.xlabel('Dates')
plt.ylabel('Cumulative infections')
ax = plt.gca()
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
sns.despine()
fig_b = plt.figure()
for i in range(nSamples):
# l1nd = plt.plot(dates['d'].dt.date, mean_nd_daily[i], '-', color=myColours['tud7b'].get_rgb(), lw=1, alpha=0.05)
l1 = plt.plot(dates['d'].dt.date, mean_daily[i], '-', color=cyans['cyan3'].get_rgb(), lw=1, alpha=0.05)
m_daily = np.append(m[0], np.diff(m))
# m_nd_daily = np.append(m_nd[0], np.diff(m_nd))
# l2nd, = plt.plot(dates['d'].dt.date, m_nd_daily, '-', color=myColours['tud7d'].get_rgb(), lw=3, label='Without mitigation')
l2, = plt.plot(dates['d'].dt.date, m_daily, '-', color=cyans['cyan5'].get_rgb(), lw=3,
label='With mitigation')
# l6 = plt.axvline(x=df['time'].max(), color=myColours['tud7d'].get_rgb(), linestyle='--')
l6 = plt.axvline(x=df['time'][self.T - 1], color=maroons['maroon3'].get_rgb(), linestyle='-')
# l7 = plt.plot(df_ohio['time'].values, df_ohio['daily_confirm'].values, color=myColours['tud11d'].get_rgb(), lw=3)
plt.ylabel('Daily new infections')
plt.xlabel('Dates')
ax = plt.gca()
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
sns.despine()
# plt.legend(handles=[l2nd, l2])
# fig_save(fig, Plot_Folder, fname_)
# m[0] = 1
# m_nd[0] = 1
my_dict = {}
my_dict['Dates'] = dates['d']
my_dict['Mean'] = m
my_dict['High'] = h
my_dict['Low'] = l
my_dict = pd.DataFrame(my_dict)
# my_dict.to_csv(os.path.join(Plot_Folder, fname + '.csv'), index=False)
return fig_a, fig_b, my_dict
def compute_density(self, theta):
a, b, rho = theta
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
t, sol = euler1d(odefun=odefun, endpoints=[0.0, self.T + 1])
S = interp1d(t, sol)
out = []
ST = S(self.T)
for x in self.timepoints:
Sx = S(x)
out.append((a * Sx * np.log(Sx) + b * (Sx - Sx ** 2) + b * rho * Sx) / (1 - ST))
return out
def plot_density_fit_posterior(self, samples):
nSamples = np.size(samples, axis=0)
Ds = np.zeros((nSamples, len(self.timepoints)), dtype=np.float)
for idx in range(nSamples):
Ds[idx] = self.compute_density(samples[idx])
Dslow = np.quantile(Ds, q=0.025, axis=0)
Dshigh = np.quantile(Ds, q=0.975, axis=0)
Dmean = np.mean(Ds, axis=0)
fig = plt.figure()
plt.plot(self.timepoints, Dmean, '-', color=cyans['cyan3'].get_rgb(), lw=3)
plt.plot(self.timepoints, Dslow, '--', color=cyans['cyan3'].get_rgb(), lw=1)
plt.plot(self.timepoints, Dshigh, '--', color=cyans['cyan3'].get_rgb(), lw=1)
# plt.axvline(x=self.T, color=junglegreen['green3'].get_rgb(), linestyle='-')
mirrored_data = (2 * self.T - self.df['infection'].values).tolist()
combined_data = self.df['infection'].values.tolist() + mirrored_data
dense = gaussian_kde(combined_data)
denseval = list(dense(x) * 2 for x in self.timepoints)
plt.plot(self.timepoints, denseval, '-', color=maroons['maroon3'].get_rgb(), lw=3)
plt.fill_between(self.timepoints, Dslow, Dshigh, alpha=.3, color=maroons['maroon3'].get_rgb())
# plt.legend()
plt.ylabel('$-S_t/(1-S_T)$')
plt.xlabel('t')
c = cumtrapz(Dmean, self.timepoints)
ind = np.argmax(c >= 0.001)
plt.xlim((self.timepoints[ind], self.timepoints[-1] + 1))
sns.despine()
return fig
# density
def density_nuA(self, u):
S = self.Scure
s = S(u)
ST = S(self.Tcure)
return (self.a * s * np.log(s) + self.b * (s - s ** 2) + self.b * self.rho * s) / (1 - ST)
# density
def density_gamma(self, t, gamma):
return expon.pdf(t, scale=1 / gamma)
# integral nu_A Q
def int_nuAQ(self, t, gamma):
l = int(max(np.ceil(100.0 / (self.T / t)), 2))
ttt = np.linspace(0, t, l)
integrand = []
for u in ttt:
result = self.density_nuA(u) * self.density_gamma(t - u, gamma=gamma) # expon.pdf(t-u,scale=1/gamma)
integrand.append(result)
integrand = np.asarray(integrand)
return np.trapz(integrand, ttt)
# improper mixture recovery density
def density_recovery(self, t, gamma):
t = max(t, self.Tcure / 100)
return (1.0 / (1 + self.rho) * self.int_nuAQ(t, gamma) + self.rho / (1 + self.rho) * self.density_gamma(t,
gamma))
# normalization of the improper mixture density
def proper(self, gamma, offset):
s = np.linspace(self.Tcure / 100, self.Tcure, 100).tolist()
vals = [0] + list(self.density_recovery(u + offset, gamma) for u in s)
s = [0] + s
A = np.trapz(vals, s)
return A
def proper_density_recovery(self, t, gamma, prop=None, offset=0):
if prop is None:
return self.density_recovery(t + offset, gamma) / self.proper(gamma, offset=offset)
else:
return self.density_recovery(t + offset, gamma) / prop
def negloglikelihood_gammaoffset(self, theta):
gamma, offset = theta
bound_g, bound_o = self.bounds_gamma
if np.isclose(gamma, bound_g[0]) or np.isclose(gamma, bound_g[1]) or np.isclose(offset,
bound_o[0]) or np.isclose(
offset, bound_o[1]):
lk = 1E6
# print("gamma=", gamma, "offset=", offset, "-LOGLIKE=", lk)
return lk
result = []
prop = self.proper(gamma, offset=offset)
for u in self.datacure:
r = -np.log(self.proper_density_recovery(u, gamma, prop=prop, offset=offset))
result.append(r)
lk = np.sum(result)
# print("gamma=", gamma, "offset=", offset, "-LOGLIKE=", lk)
return lk
def estimate_gamma(self, df_recovery, N, x0, bounds, approach='gamma'):
gamma, offset = self.estimate_gamma_sample(self.theta, df_recovery, N, x0, bounds, approach=approach)
self.g = gamma
self.offset = offset
return self
def estimate_gamma_sample(self, sample, df_recovery, N, x0, bounds, approach='gamma'):
S0 = [1.0]
t = self.T
if approach == 'offset':
t += bounds[1][1]
a, b, rho = sample
odefun = lambda t, S: DSA.poisson_ode_fun(t, S, a=a, b=b, rho=rho)
tt, sol = euler1d(odefun=odefun, endpoints=[0.0, t])
S = interp1d(tt, sol)
self.Scure = S
self.Tcure = np.ceil(df_recovery['recovery'].max())
self.datacure = df_recovery['recovery'].sample(N, replace=True).values
if approach == 'offset':
self.bounds_gamma = bounds
gamma, offset = minimize(
self.negloglikelihood_gammaoffset,
x0=x0,
bounds=self.bounds_gamma,
options={'disp': False, 'maxiter': 3}
).x
return gamma, offset
elif approach == 'prior':
self.bounds_gamma = bounds
offset = 0
alpha, beta = minimize(
self.negloglikelihood_alphabeta,
x0=x0,
bounds=self.bounds_gamma,
options={'disp': False, 'maxiter': 3}
).x
gamma = alpha * beta
elif approach == 'gamma':
self.offset = 0
self.bounds_gamma = bounds
gamma, = minimize(
self.negloglikelihood_gamma,
x0=x0,
bounds=self.bounds_gamma,
options={'disp': False, 'maxiter': 3}
).x
return gamma, offset