https://hal.archives-ouvertes.fr/hal-03945531
presolve.jl
function detect_sep_fe!(esample::BitVector, y::Vector{<: Real}, Xexo::Matrix{<: Real}, fes::AbstractVector{<: FixedEffect}; sep_at::Real=0)
# update esample, y, Xexo and fes
before_n = sum(esample)
sub_esample = trues(before_n)
@assert before_n == length(y) "esample and y have different length."
for fe in fes
cache = trues(fe.n)
level_visited = falses(fe.n)
@assert before_n == length(fe.refs) "esample and fe have different length."
for i in 1:before_n
# for cache = true finally, all y[i] must equal to sep_at
if y[i] == sep_at
cache[fe.refs[i]] *= true
else
cache[fe.refs[i]] *= false
end
level_visited[fe.refs[i]] = true
end
for i in 1:before_n
# if all y in this ref are equal to sep_at, exclude these ys
if cache[fe.refs[i]] & level_visited[fe.refs[i]]
sub_esample[i] = false
end
end
end
after_n = sum(sub_esample)
dropped_n = before_n - after_n
if dropped_n > 0
@info "$(dropped_n) observations detected as separated using the FE method. Dropping them ..."
end
# drop them in esample
remaining = findall(esample)
esample[remaining[.~sub_esample]] .= false
# drop them in fe
fes = FixedEffect[_subset(fe, sub_esample) for fe in fes]
return esample, y[sub_esample], Xexo[sub_esample, :], fes
end
function detect_sep_simplex(y::Vector{<: Real}, Xexo::Matrix{<: Real}, fes::AbstractVector{<: FixedEffect})
end
function detect_sep_relu!(esample::BitVector, y::Vector{<: Real}, Xexo::Matrix{<: Real}, fes::AbstractVector{<: FixedEffect};
double_precision::Bool = true,
dtol::Real = sqrt(eps(double_precision ? Float64 : Float32)), # tol for solve_coefficients and solve_residuals
dmaxiter::Integer = 100_000, # maxiter for solve_coefficients and solve_residuals
rtol::Real = 1e-3, # tol for ReLU
rmaxiter::Integer = 1000, # maxiter for ReLU
method::Symbol = :cpu,
verbose::Bool = false
)
verbose && @info "identifying separations using ReLU."
@assert all(GLFixedEffectModels.basecol(Xexo)) "There are Multicollinearity in the data, this should be done with before running ReLU."
before_n = sum(esample)
@assert before_n == length(y)
Xexo_copy = deepcopy(Float64.(Xexo))
u = (y.==0)
K = sum(u) / rtol^2
w = (y.>0) .* K + .~(y.>0)
weights = Weights(w)
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(fes, weights, Val{method})
outer_converged = false
for iter in 1:rmaxiter
iterations = Int[]
convergeds = Bool[]
verbose && println("* iter $(iter)")
Xexo = deepcopy(Xexo_copy)
Xexo, b, c = solve_residuals!(Xexo, feM; tol = dtol, maxiter = dmaxiter)
Xexo = sqrt.(w) .* Xexo
u = sqrt.(w) .* u
append!(iterations, b)
append!(convergeds, c)
crossx = cholesky!(Symmetric(Xexo' * Xexo))
beta = crossx \ (Xexo' * u)
xb = Xexo_copy * beta
# something is wrong
newfes, b, c = solve_coefficients!(u - xb, feM; tol = dtol, maxiter = dmaxiter)
append!(iterations, b)
append!(convergeds, c)
iterations = maximum(iterations)
converged = all(convergeds)
if converged == false
@warn "Convergence of annihilation procedure not achieved in $(iterations) iterations; try increasing dmaxiter or decreasing dtol."
@warn "Cannot identify separated obs because lsmr wasn't solved properly. Skipping ..."
return esample, y, Xexo_copy, fes
end
xbd = sum(newfes) + xb
xbd[abs.(xbd) .< rtol] .= 0
if all(xbd.>=0)
outer_converged = true
is_sep = xbd .> 0
@info "$(sum(is_sep)) observations detected as separated using the ReLU method. Dropping them ..."
sub_esample = .~is_sep
# drop them in esample
remaining = findall(esample)
esample[remaining[is_sep]] .= false
# drop them in fe
fes = FixedEffect[GLFixedEffectModels._subset(fe, sub_esample) for fe in fes]
# drop them in y
y = y[sub_esample]
# drop them in Xexo_copy
Xexo_copy = Xexo_copy[sub_esample,:]
return esample, y, Xexo_copy, fes
else
verbose && println("negative xbd: $(sum(xbd.<0))")
end
u = xbd
u[u .< 0] .= 0
end
if ~outer_converged
@warn "cannot identify separated obs. Maximal iteration reached. Skipping ..."
return esample, y, Xexo_copy, fes
end
end
function detect_linear_dependency_among_X!(X::Matrix{<: Real}, basecoef::BitVector; coefnames::Vector)
# assert this is a model without IV or weights, this code is written at GLFixedEffectModel@0.3.1
before_n = sum(basecoef)
@assert before_n == size(X,2) "Dimension of basecoef and X doesn't match"
base = GLFixedEffectModels.basecol(X)
if all(base)
return X, basecoef
else
X = GLFixedEffectModels.getcols(X, base)
remaining_cols = findall(basecoef)
regressor_ind_to_be_dropped = remaining_cols[.~base]
basecoef[regressor_ind_to_be_dropped] .= 0 # update basecoef
@info "Multicollinearity detected among columns of X. Dropping regressors: $(join(coefnames[regressor_ind_to_be_dropped]," "))"
return X, basecoef
end
end