serialize.jl
# This file is a part of Julia. License is MIT: https://julialang.org/license
module Serializer
import Base: GMP, Bottom, unsafe_convert, uncompressed_ast
import Core: svec
using Base: ViewIndex, Slice, index_lengths, unwrap_unionall
export serialize, deserialize, SerializationState
mutable struct SerializationState{I<:IO} <: AbstractSerializer
io::I
counter::Int
table::ObjectIdDict
pending_refs::Vector{Int}
known_object_data::Dict{UInt64,Any}
SerializationState{I}(io::I) where I<:IO = new(io, 0, ObjectIdDict(), Int[], Dict{UInt64,Any}())
end
SerializationState(io::IO) = SerializationState{typeof(io)}(io)
## serializing values ##
# types AbstractSerializer and Serializer # defined in dict.jl
const n_int_literals = 33
const n_reserved_slots = 25
const n_reserved_tags = 12
const TAGS = Any[
Symbol, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128,
Float16, Float32, Float64, Char, DataType, Union, UnionAll, TypeName, Tuple,
Array, Expr, LineNumberNode, LabelNode, GotoNode, QuoteNode, CodeInfo, TypeVar,
Core.Box, Core.MethodInstance, Module, Task, String, SimpleVector, Method,
GlobalRef, SlotNumber, TypedSlot, NewvarNode, SSAValue,
# dummy entries for tags that don't correspond directly to types
Symbol, # UNDEFREF_TAG
Symbol, # BACKREF_TAG
Symbol, # LONGBACKREF_TAG
Symbol, # SHORTBACKREF_TAG
Symbol, # LONGTUPLE_TAG
Symbol, # LONGSYMBOL_TAG
Symbol, # LONGEXPR_TAG
Symbol, # LONGSTRING_TAG
Symbol, # SHORTINT64_TAG
Symbol, # FULL_DATATYPE_TAG
Symbol, # WRAPPER_DATATYPE_TAG
Symbol, # OBJECT_TAG
Symbol, # REF_OBJECT_TAG
Symbol, # FULL_GLOBALREF_TAG
Symbol, # HEADER_TAG
fill(Symbol, n_reserved_tags)...,
(), Bool, Any, Bottom, Core.TypeofBottom, Type, svec(), Tuple{}, false, true, nothing,
:Any, :Array, :TypeVar, :Box, :Tuple, :Ptr, :return, :call, Symbol("::"), :Function,
:(=), :(==), :(===), :gotoifnot, :A, :B, :C, :M, :N, :T, :S, :X, :Y, :a, :b, :c, :d, :e, :f,
:g, :h, :i, :j, :k, :l, :m, :n, :o, :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z, :add_int,
:sub_int, :mul_int, :add_float, :sub_float, :new, :mul_float, :bitcast, :start, :done, :next,
:indexed_next, :getfield, :meta, :eq_int, :slt_int, :sle_int, :ne_int, :push_loc, :pop_loc,
:pop, :arrayset, :arrayref, :apply_type, :inbounds, :getindex, :setindex!, :Core, :!, :+,
:Base, :static_parameter, :convert, :colon, Symbol("#self#"), Symbol("#temp#"), :tuple,
fill(:_reserved_, n_reserved_slots)...,
(Int32(0):Int32(n_int_literals-1))...,
(Int64(0):Int64(n_int_literals-1))...
]
@assert length(TAGS) == 255
const ser_version = 7 # do not make changes without bumping the version #!
const NTAGS = length(TAGS)
function sertag(@nospecialize(v))
ptr = pointer_from_objref(v)
ptags = convert(Ptr{Ptr{Void}}, pointer(TAGS))
# note: constant ints & reserved slots never returned here
@inbounds for i in 1:(NTAGS-(n_reserved_slots+2*n_int_literals))
ptr == unsafe_load(ptags,i) && return i%Int32
end
return Int32(-1)
end
desertag(i::Int32) = TAGS[i]
# tags >= this just represent themselves, their whole representation is 1 byte
const VALUE_TAGS = sertag(())
const ZERO32_TAG = Int32(NTAGS-(2*n_int_literals-1))
const ZERO64_TAG = Int64(NTAGS-(n_int_literals-1))
const TRUE_TAG = sertag(true)
const FALSE_TAG = sertag(false)
const EMPTYTUPLE_TAG = sertag(())
const TUPLE_TAG = sertag(Tuple)
const SIMPLEVECTOR_TAG = sertag(SimpleVector)
const SYMBOL_TAG = sertag(Symbol)
const ARRAY_TAG = sertag(Array)
const EXPR_TAG = sertag(Expr)
const MODULE_TAG = sertag(Module)
const METHODINSTANCE_TAG = sertag(Core.MethodInstance)
const METHOD_TAG = sertag(Method)
const TASK_TAG = sertag(Task)
const DATATYPE_TAG = sertag(DataType)
const TYPENAME_TAG = sertag(TypeName)
const INT32_TAG = sertag(Int32)
const INT64_TAG = sertag(Int64)
const GLOBALREF_TAG = sertag(GlobalRef)
const BOTTOM_TAG = sertag(Bottom)
const UNIONALL_TAG = sertag(UnionAll)
const STRING_TAG = sertag(String)
const o0 = sertag(SSAValue)
const UNDEFREF_TAG = Int32(o0+1)
const BACKREF_TAG = Int32(o0+2)
const LONGBACKREF_TAG = Int32(o0+3)
const SHORTBACKREF_TAG = Int32(o0+4)
const LONGTUPLE_TAG = Int32(o0+5)
const LONGSYMBOL_TAG = Int32(o0+6)
const LONGEXPR_TAG = Int32(o0+7)
const LONGSTRING_TAG = Int32(o0+8)
const SHORTINT64_TAG = Int32(o0+9)
const FULL_DATATYPE_TAG = Int32(o0+10)
const WRAPPER_DATATYPE_TAG = Int32(o0+11)
const OBJECT_TAG = Int32(o0+12)
const REF_OBJECT_TAG = Int32(o0+13)
const FULL_GLOBALREF_TAG = Int32(o0+14)
const HEADER_TAG = Int32(o0+15)
writetag(s::IO, tag) = write(s, UInt8(tag))
function write_as_tag(s::IO, tag)
tag < VALUE_TAGS && write(s, UInt8(0))
write(s, UInt8(tag))
end
# cycle handling
function serialize_cycle(s::AbstractSerializer, x)
offs = get(s.table, x, -1)::Int
if offs != -1
if offs <= typemax(UInt16)
writetag(s.io, SHORTBACKREF_TAG)
write(s.io, UInt16(offs))
elseif offs <= typemax(Int32)
writetag(s.io, BACKREF_TAG)
write(s.io, Int32(offs))
else
writetag(s.io, LONGBACKREF_TAG)
write(s.io, Int64(offs))
end
return true
end
s.table[x] = s.counter
s.counter += 1
return false
end
function serialize_cycle_header(s::AbstractSerializer, @nospecialize(x))
serialize_cycle(s, x) && return true
serialize_type(s, typeof(x), true)
return false
end
function reset_state(s::AbstractSerializer)
s.counter = 0
empty!(s.table)
empty!(s.pending_refs)
s
end
serialize(s::AbstractSerializer, x::Bool) = x ? writetag(s.io, TRUE_TAG) :
writetag(s.io, FALSE_TAG)
serialize(s::AbstractSerializer, p::Ptr) = serialize_any(s, oftype(p, C_NULL))
serialize(s::AbstractSerializer, ::Tuple{}) = writetag(s.io, EMPTYTUPLE_TAG)
function serialize(s::AbstractSerializer, t::Tuple)
l = length(t)
if l <= 255
writetag(s.io, TUPLE_TAG)
write(s.io, UInt8(l))
else
writetag(s.io, LONGTUPLE_TAG)
write(s.io, Int32(l))
end
for x in t
serialize(s, x)
end
end
function serialize(s::AbstractSerializer, v::SimpleVector)
writetag(s.io, SIMPLEVECTOR_TAG)
write(s.io, Int32(length(v)))
for x in v
serialize(s, x)
end
end
function serialize(s::AbstractSerializer, x::Symbol)
tag = sertag(x)
if tag > 0
return write_as_tag(s.io, tag)
end
pname = unsafe_convert(Ptr{UInt8}, x)
len = Int(ccall(:strlen, Csize_t, (Cstring,), pname))
if len > 7
serialize_cycle(s, x) && return
end
if len <= 255
writetag(s.io, SYMBOL_TAG)
write(s.io, UInt8(len))
else
writetag(s.io, LONGSYMBOL_TAG)
write(s.io, Int32(len))
end
unsafe_write(s.io, pname, len)
end
function serialize_array_data(s::IO, a)
isempty(a) && return 0
if eltype(a) === Bool
last = a[1]
count = 1
for i = 2:length(a)
if a[i] != last || count == 127
write(s, UInt8((UInt8(last) << 7) | count))
last = a[i]
count = 1
else
count += 1
end
end
write(s, UInt8((UInt8(last) << 7) | count))
else
write(s, a)
end
end
function serialize(s::AbstractSerializer, a::Array)
serialize_cycle(s, a) && return
elty = eltype(a)
writetag(s.io, ARRAY_TAG)
if elty !== UInt8
serialize(s, elty)
end
if ndims(a) != 1
serialize(s, size(a))
else
serialize(s, length(a))
end
if isbits(elty)
serialize_array_data(s.io, a)
else
sizehint!(s.table, div(length(a),4)) # prepare for lots of pointers
@inbounds for i in eachindex(a)
if isassigned(a, i)
serialize(s, a[i])
else
writetag(s.io, UNDEFREF_TAG)
end
end
end
end
function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Array}
b = trimmedsubarray(a)
serialize_any(s, b)
end
function trimmedsubarray(V::SubArray{T,N,A}) where {T,N,A<:Array}
dest = Array{eltype(V)}(trimmedsize(V))
copy!(dest, V)
_trimmedsubarray(dest, V, (), V.indexes...)
end
trimmedsize(V) = index_lengths(V.indexes...)
function _trimmedsubarray(A, V::SubArray{T,N,P,I,LD}, newindexes) where {T,N,P,I,LD}
LD && return SubArray{T,N,P,I,LD}(A, newindexes, Base.compute_offset1(A, 1, newindexes), 1)
SubArray{T,N,P,I,LD}(A, newindexes, 0, 0)
end
_trimmedsubarray(A, V, newindexes, index::ViewIndex, indexes...) = _trimmedsubarray(A, V, (newindexes..., trimmedindex(V.parent, length(newindexes)+1, index)), indexes...)
trimmedindex(P, d, i::Real) = oftype(i, 1)
trimmedindex(P, d, i::Colon) = i
trimmedindex(P, d, i::Slice) = i
trimmedindex(P, d, i::AbstractArray) = oftype(i, reshape(linearindices(i), indices(i)))
function serialize(s::AbstractSerializer, ss::String)
len = sizeof(ss)
if len <= 255
writetag(s.io, STRING_TAG)
write(s.io, UInt8(len))
else
writetag(s.io, LONGSTRING_TAG)
write(s.io, Int64(len))
end
write(s.io, ss)
end
function serialize(s::AbstractSerializer, ss::SubString{T}) where T<:AbstractString
# avoid saving a copy of the parent string, keeping the type of ss
serialize_any(s, convert(SubString{T}, convert(T,ss)))
end
# Don't serialize the pointers
function serialize(s::AbstractSerializer, r::Regex)
serialize_type(s, typeof(r))
serialize(s, r.pattern)
serialize(s, r.compile_options)
serialize(s, r.match_options)
end
function serialize(s::AbstractSerializer, n::BigInt)
serialize_type(s, BigInt)
serialize(s, base(62,n))
end
function serialize(s::AbstractSerializer, n::BigFloat)
serialize_type(s, BigFloat)
serialize(s, string(n))
end
function serialize(s::AbstractSerializer, ex::Expr)
serialize_cycle(s, ex) && return
l = length(ex.args)
if l <= 255
writetag(s.io, EXPR_TAG)
write(s.io, UInt8(l))
else
writetag(s.io, LONGEXPR_TAG)
write(s.io, Int32(l))
end
serialize(s, ex.head)
serialize(s, ex.typ)
for a in ex.args
serialize(s, a)
end
end
function serialize(s::AbstractSerializer, d::Dict)
serialize_cycle_header(s, d) && return
write(s.io, Int32(length(d)))
for (k,v) in d
serialize(s, k)
serialize(s, v)
end
end
function serialize_mod_names(s::AbstractSerializer, m::Module)
if Base.is_root_module(m)
serialize(s, Base.root_module_key(m))
else
serialize_mod_names(s, module_parent(m))
serialize(s, module_name(m))
end
end
function serialize(s::AbstractSerializer, m::Module)
writetag(s.io, MODULE_TAG)
serialize_mod_names(s, m)
writetag(s.io, EMPTYTUPLE_TAG)
end
# TODO: make this bidirectional, so objects can be sent back via the same key
const object_numbers = WeakKeyDict()
const obj_number_salt = Ref(0)
function object_number(@nospecialize(l))
global obj_number_salt, object_numbers
if haskey(object_numbers, l)
return object_numbers[l]
end
# a hash function that always gives the same number to the same
# object on the same machine, and is unique over all machines.
ln = obj_number_salt[]+(UInt64(myid())<<44)
obj_number_salt[] += 1
object_numbers[l] = ln
return ln::UInt64
end
lookup_object_number(s::AbstractSerializer, n::UInt64) = nothing
remember_object(s::AbstractSerializer, @nospecialize(o), n::UInt64) = nothing
function lookup_object_number(s::SerializationState, n::UInt64)
return get(s.known_object_data, n, nothing)
end
function remember_object(s::SerializationState, @nospecialize(o), n::UInt64)
s.known_object_data[n] = o
return nothing
end
function serialize(s::AbstractSerializer, meth::Method)
serialize_cycle(s, meth) && return
writetag(s.io, METHOD_TAG)
write(s.io, object_number(meth))
serialize(s, meth.module)
serialize(s, meth.name)
serialize(s, meth.file)
serialize(s, meth.line)
serialize(s, meth.sig)
serialize(s, meth.sparam_syms)
serialize(s, meth.ambig)
serialize(s, meth.nargs)
serialize(s, meth.isva)
if isdefined(meth, :source)
serialize(s, uncompressed_ast(meth, meth.source))
else
serialize(s, nothing)
end
if isdefined(meth, :generator)
serialize(s, uncompressed_ast(meth, meth.generator.inferred))
else
serialize(s, nothing)
end
nothing
end
function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
serialize_cycle(s, linfo) && return
isa(linfo.def, Module) || error("can only serialize toplevel MethodInstance objects")
writetag(s.io, METHODINSTANCE_TAG)
serialize(s, linfo.inferred)
if isdefined(linfo, :inferred_const)
serialize(s, linfo.inferred_const)
else
writetag(s.io, UNDEFREF_TAG)
end
serialize(s, linfo.sparam_vals)
serialize(s, linfo.rettype)
serialize(s, linfo.specTypes)
serialize(s, linfo.def)
nothing
end
function serialize(s::AbstractSerializer, t::Task)
serialize_cycle(s, t) && return
if istaskstarted(t) && !istaskdone(t)
error("cannot serialize a running Task")
end
state = [t.code,
t.storage,
t.state == :queued || t.state == :runnable ? (:runnable) : t.state,
t.result,
t.exception]
writetag(s.io, TASK_TAG)
for fld in state
serialize(s, fld)
end
end
function serialize(s::AbstractSerializer, g::GlobalRef)
if (g.mod === __deserialized_types__ ) ||
(g.mod === Main && isdefined(g.mod, g.name) && isconst(g.mod, g.name))
v = getfield(g.mod, g.name)
unw = unwrap_unionall(v)
if isa(unw,DataType) && v === unw.name.wrapper && should_send_whole_type(s, unw)
# handle references to types in Main by sending the whole type.
# needed to be able to send nested functions (#15451).
writetag(s.io, FULL_GLOBALREF_TAG)
serialize(s, v)
return
end
end
writetag(s.io, GLOBALREF_TAG)
serialize(s, g.mod)
serialize(s, g.name)
end
function serialize(s::AbstractSerializer, t::TypeName)
serialize_cycle(s, t) && return
writetag(s.io, TYPENAME_TAG)
write(s.io, object_number(t))
serialize_typename(s, t)
end
function serialize_typename(s::AbstractSerializer, t::TypeName)
serialize(s, t.name)
serialize(s, t.names)
primary = unwrap_unionall(t.wrapper)
serialize(s, primary.super)
serialize(s, primary.parameters)
serialize(s, primary.types)
serialize(s, isdefined(primary, :instance))
serialize(s, primary.abstract)
serialize(s, primary.mutable)
serialize(s, primary.ninitialized)
if isdefined(t, :mt)
serialize(s, t.mt.name)
serialize(s, collect(Base.MethodList(t.mt)))
serialize(s, t.mt.max_args)
if isdefined(t.mt, :kwsorter)
serialize(s, t.mt.kwsorter)
else
writetag(s.io, UNDEFREF_TAG)
end
else
writetag(s.io, UNDEFREF_TAG)
end
nothing
end
# decide whether to send all data for a type (instead of just its name)
function should_send_whole_type(s, t::DataType)
tn = t.name
if isdefined(tn, :mt)
# TODO improve somehow
# send whole type for anonymous functions in Main
name = tn.mt.name
mod = tn.module
isanonfunction = mod === Main && # only Main
t.super === Function && # only Functions
unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type
(!isdefined(mod, name) || t != typeof(getfield(mod, name))) # XXX: 95% accurate test for this being an inner function
# TODO: more accurate test? (tn.name !== "#" name)
#TODO: iskw = startswith(tn.name, "#kw#") && ???
#TODO: iskw && return send-as-kwftype
return mod === __deserialized_types__ || isanonfunction
end
return false
end
function serialize_type_data(s, t::DataType)
whole = should_send_whole_type(s, t)
iswrapper = (t === unwrap_unionall(t.name.wrapper))
if whole && iswrapper
writetag(s.io, WRAPPER_DATATYPE_TAG)
serialize(s, t.name)
return
end
serialize_cycle(s, t) && return
if whole
writetag(s.io, FULL_DATATYPE_TAG)
serialize(s, t.name)
else
writetag(s.io, DATATYPE_TAG)
tname = t.name.name
serialize(s, tname)
mod = t.name.module
serialize(s, mod)
end
if !isempty(t.parameters)
if iswrapper
write(s.io, Int32(0))
else
write(s.io, Int32(length(t.parameters)))
for p in t.parameters
serialize(s, p)
end
end
end
end
function serialize(s::AbstractSerializer, t::DataType)
tag = sertag(t)
tag > 0 && return write_as_tag(s.io, tag)
if t === Tuple
# `sertag` is not able to find types === to `Tuple` because they
# will not have been hash-consed. Plus `serialize_type_data` does not
# handle this case correctly, since Tuple{} != Tuple. `Tuple` is the
# only type with this property. issue #15849
return write_as_tag(s.io, TUPLE_TAG)
end
serialize_type_data(s, t)
end
function serialize_type(s::AbstractSerializer, t::DataType, ref::Bool = false)
tag = sertag(t)
tag > 0 && return writetag(s.io, tag)
writetag(s.io, ref ? REF_OBJECT_TAG : OBJECT_TAG)
serialize_type_data(s, t)
end
function serialize(s::AbstractSerializer, n::Int32)
if 0 <= n <= (n_int_literals-1)
write(s.io, UInt8(ZERO32_TAG+n))
else
writetag(s.io, INT32_TAG)
write(s.io, n)
end
end
function serialize(s::AbstractSerializer, n::Int64)
if 0 <= n <= (n_int_literals-1)
write(s.io, UInt8(ZERO64_TAG+n))
elseif typemin(Int32) <= n <= typemax(Int32)
writetag(s.io, SHORTINT64_TAG)
write(s.io, Int32(n))
else
writetag(s.io, INT64_TAG)
write(s.io, n)
end
end
serialize(s::AbstractSerializer, ::Type{Bottom}) = write_as_tag(s.io, BOTTOM_TAG)
function serialize(s::AbstractSerializer, u::UnionAll)
writetag(s.io, UNIONALL_TAG)
n = 0; t = u
while isa(t, UnionAll)
t = t.body
n += 1
end
if isa(t, DataType) && t === unwrap_unionall(t.name.wrapper)
write(s.io, UInt8(1))
write(s.io, Int16(n))
serialize(s, t)
else
write(s.io, UInt8(0))
serialize(s, u.var)
serialize(s, u.body)
end
end
serialize(s::AbstractSerializer, @nospecialize(x)) = serialize_any(s, x)
function serialize_any(s::AbstractSerializer, @nospecialize(x))
tag = sertag(x)
if tag > 0
return write_as_tag(s.io, tag)
end
t = typeof(x)::DataType
nf = nfields(x)
if nf == 0 && t.size > 0
serialize_type(s, t)
write(s.io, x)
else
if t.mutable && nf > 0
serialize_cycle(s, x) && return
serialize_type(s, t, true)
else
serialize_type(s, t, false)
end
for i in 1:nf
if isdefined(x, i)
serialize(s, getfield(x, i))
else
writetag(s.io, UNDEFREF_TAG)
end
end
end
end
"""
Serializer.writeheader(s::AbstractSerializer)
Write an identifying header to the specified serializer. The header consists of
8 bytes as follows:
| Offset | Description |
|:-------|:------------------------------------------------|
| 0 | tag byte (0x37) |
| 1-2 | signature bytes "JL" |
| 3 | protocol version |
| 4 | bits 0-1: endianness: 0 = little, 1 = big |
| 4 | bits 2-3: platform: 0 = 32-bit, 1 = 64-bit |
| 5-7 | reserved |
"""
function writeheader(s::AbstractSerializer)
io = s.io
writetag(io, HEADER_TAG)
write(io, "JL") # magic bytes
write(io, UInt8(ser_version))
endianness = (ENDIAN_BOM == 0x04030201 ? 0 :
ENDIAN_BOM == 0x01020304 ? 1 :
error("unsupported endianness in serializer"))
machine = (sizeof(Int) == 4 ? 0 :
sizeof(Int) == 8 ? 1 :
error("unsupported word size in serializer"))
write(io, UInt8(endianness) | (UInt8(machine) << 2))
write(io, b"\x00\x00\x00") # 3 reserved bytes
nothing
end
"""
serialize(stream::IO, value)
Write an arbitrary value to a stream in an opaque format, such that it can be read back by
[`deserialize`](@ref). The read-back value will be as identical as possible to the original.
In general, this process will not work if the reading and writing are done by different
versions of Julia, or an instance of Julia with a different system image. `Ptr` values are
serialized as all-zero bit patterns (`NULL`).
An 8-byte identifying header is written to the stream first. To avoid writing the header,
construct a `SerializationState` and use it as the first argument to `serialize` instead.
See also [`Serializer.writeheader`](@ref).
"""
function serialize(s::IO, x)
ss = SerializationState(s)
writeheader(ss)
serialize(ss, x)
end
## deserializing values ##
"""
deserialize(stream)
Read a value written by [`serialize`](@ref). `deserialize` assumes the binary data read from
`stream` is correct and has been serialized by a compatible implementation of [`serialize`](@ref).
It has been designed with simplicity and performance as a goal and does not validate
the data read. Malformed data can result in process termination. The caller has to ensure
the integrity and correctness of data read from `stream`.
"""
deserialize(s::IO) = deserialize(SerializationState(s))
function deserialize(s::AbstractSerializer)
handle_deserialize(s, Int32(read(s.io, UInt8)::UInt8))
end
function deserialize_cycle(s::AbstractSerializer, @nospecialize(x))
slot = pop!(s.pending_refs)
s.table[slot] = x
nothing
end
# optimized version of:
# slot = s.counter; s.counter += 1
# push!(s.pending_refs, slot)
# slot = pop!(s.pending_refs)
# s.table[slot] = x
function resolve_ref_immediately(s::AbstractSerializer, @nospecialize(x))
s.table[s.counter] = x
s.counter += 1
nothing
end
# deserialize_ is an internal function to dispatch on the tag
# describing the serialized representation. the number of
# representations is fixed, so deserialize_ does not get extended.
function handle_deserialize(s::AbstractSerializer, b::Int32)
if b == 0
return desertag(Int32(read(s.io, UInt8)::UInt8))
end
if b >= VALUE_TAGS
return desertag(b)
elseif b == TUPLE_TAG
return deserialize_tuple(s, Int(read(s.io, UInt8)::UInt8))
elseif b == SHORTBACKREF_TAG
id = read(s.io, UInt16)::UInt16
return s.table[Int(id)]
elseif b == BACKREF_TAG
id = read(s.io, Int32)::Int32
return s.table[Int(id)]
elseif b == ARRAY_TAG
return deserialize_array(s)
elseif b == DATATYPE_TAG
return deserialize_datatype(s, false)
elseif b == FULL_DATATYPE_TAG
return deserialize_datatype(s, true)
elseif b == WRAPPER_DATATYPE_TAG
tname = deserialize(s)::TypeName
return unwrap_unionall(tname.wrapper)
elseif b == OBJECT_TAG
t = deserialize(s)
return deserialize(s, t)
elseif b == REF_OBJECT_TAG
slot = s.counter; s.counter += 1
push!(s.pending_refs, slot)
t = deserialize(s)
return deserialize(s, t)
elseif b == SYMBOL_TAG
return deserialize_symbol(s, Int(read(s.io, UInt8)::UInt8))
elseif b == SHORTINT64_TAG
return Int64(read(s.io, Int32)::Int32)
elseif b == EXPR_TAG
return deserialize_expr(s, Int(read(s.io, UInt8)::UInt8))
elseif b == MODULE_TAG
return deserialize_module(s)
elseif b == STRING_TAG
return deserialize_string(s, Int(read(s.io, UInt8)::UInt8))
elseif b == LONGSTRING_TAG
return deserialize_string(s, Int(read(s.io, Int64)::Int64))
elseif b == SIMPLEVECTOR_TAG
return deserialize_svec(s)
elseif b == GLOBALREF_TAG
return GlobalRef(deserialize(s)::Module, deserialize(s)::Symbol)
elseif b == FULL_GLOBALREF_TAG
ty = deserialize(s)
tn = unwrap_unionall(ty).name
return GlobalRef(tn.module, tn.name)
elseif b == LONGTUPLE_TAG
return deserialize_tuple(s, Int(read(s.io, Int32)::Int32))
elseif b == LONGEXPR_TAG
return deserialize_expr(s, Int(read(s.io, Int32)::Int32))
elseif b == LONGBACKREF_TAG
id = read(s.io, Int64)::Int64
return s.table[Int(id)]
elseif b == LONGSYMBOL_TAG
return deserialize_symbol(s, Int(read(s.io, Int32)::Int32))
elseif b == HEADER_TAG
for _ = 1:7
read(s.io, UInt8)
end
return deserialize(s)
end
t = desertag(b)
if t.mutable && length(t.types) > 0 # manual specialization of fieldcount
slot = s.counter; s.counter += 1
push!(s.pending_refs, slot)
end
return deserialize(s, t)
end
function deserialize_symbol(s::AbstractSerializer, len::Int)
str = Base._string_n(len)
unsafe_read(s.io, pointer(str), len)
sym = Symbol(str)
if len > 7
resolve_ref_immediately(s, sym)
end
return sym
end
deserialize_tuple(s::AbstractSerializer, len) = ntuple(i->deserialize(s), len)
function deserialize_svec(s::AbstractSerializer)
n = read(s.io, Int32)
svec(Any[ deserialize(s) for i=1:n ]...)
end
function deserialize_module(s::AbstractSerializer)
mkey = deserialize(s)
if isa(mkey, Tuple)
# old version, TODO: remove
if mkey === ()
return Main
end
m = Base.root_module(mkey[1])
for i = 2:length(mkey)
m = getfield(m, mkey[i])::Module
end
else
m = Base.root_module(mkey)
mname = deserialize(s)
while mname !== ()
m = getfield(m, mname)::Module
mname = deserialize(s)
end
end
return m
end
function deserialize(s::AbstractSerializer, ::Type{Method})
lnumber = read(s.io, UInt64)
meth = lookup_object_number(s, lnumber)
if meth !== nothing
meth = meth::Method
makenew = false
else
meth = ccall(:jl_new_method_uninit, Ref{Method}, (Any,), Main)
makenew = true
end
deserialize_cycle(s, meth)
mod = deserialize(s)::Module
name = deserialize(s)::Symbol
file = deserialize(s)::Symbol
line = deserialize(s)::Int32
sig = deserialize(s)::DataType
sparam_syms = deserialize(s)::SimpleVector
ambig = deserialize(s)::Union{Array{Any,1}, Void}
nargs = deserialize(s)::Int32
isva = deserialize(s)::Bool
template = deserialize(s)
generator = deserialize(s)
if makenew
meth.module = mod
meth.name = name
meth.file = file
meth.line = line
meth.sig = sig
meth.sparam_syms = sparam_syms
meth.ambig = ambig
meth.nargs = nargs
meth.isva = isva
# TODO: compress template
if template !== nothing
meth.source = template
meth.pure = template.pure
end
if generator !== nothing
linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ())
linfo.specTypes = Tuple
linfo.inferred = generator
linfo.def = meth
meth.generator = linfo
end
ftype = ccall(:jl_first_argument_datatype, Any, (Any,), sig)::DataType
if isdefined(ftype.name, :mt) && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), ftype.name.mt, sig, typemax(UInt))
ccall(:jl_method_table_insert, Void, (Any, Any, Ptr{Void}), ftype.name.mt, meth, C_NULL)
end
remember_object(s, meth, lnumber)
end
return meth
end
function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Void},), C_NULL)
deserialize_cycle(s, linfo)
linfo.inferred = deserialize(s)::CodeInfo
tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
linfo.inferred_const = handle_deserialize(s, tag)
end
linfo.sparam_vals = deserialize(s)::SimpleVector
linfo.rettype = deserialize(s)
linfo.specTypes = deserialize(s)
linfo.def = deserialize(s)::Module
return linfo
end
function deserialize_array(s::AbstractSerializer)
slot = s.counter; s.counter += 1
d1 = deserialize(s)
if isa(d1, Type)
elty = d1
d1 = deserialize(s)
else
elty = UInt8
end
if isa(d1, Integer)
if elty !== Bool && isbits(elty)
a = Array{elty, 1}(d1)
s.table[slot] = a
return read!(s.io, a)
end
dims = (Int(d1),)
else
dims = convert(Dims, d1)::Dims
end
if isbits(elty)
n = prod(dims)::Int
if elty === Bool && n > 0
A = Array{Bool, length(dims)}(dims)
i = 1
while i <= n
b = read(s.io, UInt8)::UInt8
v = (b >> 7) != 0
count = b & 0x7f
nxt = i + count
while i < nxt
A[i] = v
i += 1
end
end
else
A = read!(s.io, Array{elty}(dims))
end
s.table[slot] = A
return A
end
A = Array{elty, length(dims)}(dims)
s.table[slot] = A
sizehint!(s.table, s.counter + div(length(A),4))
for i = eachindex(A)
tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
@inbounds A[i] = handle_deserialize(s, tag)
end
end
return A
end
function deserialize_expr(s::AbstractSerializer, len)
e = Expr(:temp)
resolve_ref_immediately(s, e)
e.head = deserialize(s)::Symbol
ty = deserialize(s)
e.args = Any[ deserialize(s) for i = 1:len ]
e.typ = ty
e
end
module __deserialized_types__ end
function deserialize(s::AbstractSerializer, ::Type{TypeName})
number = read(s.io, UInt64)
return deserialize_typename(s, number)
end
function deserialize_typename(s::AbstractSerializer, number)
name = deserialize(s)::Symbol
tn = lookup_object_number(s, number)
if tn !== nothing
makenew = false
else
# reuse the same name for the type, if possible, for nicer debugging
tn_name = isdefined(__deserialized_types__, name) ? gensym() : name
tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any),
tn_name, __deserialized_types__)
makenew = true
end
remember_object(s, tn, number)
deserialize_cycle(s, tn)
names = deserialize(s)::SimpleVector
super = deserialize(s)::Type
parameters = deserialize(s)::SimpleVector
types = deserialize(s)::SimpleVector
has_instance = deserialize(s)::Bool
abstr = deserialize(s)::Bool
mutabl = deserialize(s)::Bool
ninitialized = deserialize(s)::Int32
if makenew
tn.names = names
# TODO: there's an unhanded cycle in the dependency graph at this point:
# while deserializing super and/or types, we may have encountered
# tn.wrapper and throw UndefRefException before we get to this point
ndt = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Any, Cint, Cint, Cint),
tn, tn.module, super, parameters, names, types,
abstr, mutabl, ninitialized)
tn.wrapper = ndt.name.wrapper
ccall(:jl_set_const, Void, (Any, Any, Any), tn.module, tn.name, tn.wrapper)
ty = tn.wrapper
if has_instance && !isdefined(ty, :instance)
# use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty))
end
end
tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
mtname = handle_deserialize(s, tag)
defs = deserialize(s)
maxa = deserialize(s)::Int
if makenew
tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module)
tn.mt.name = mtname
tn.mt.max_args = maxa
for def in defs
if isdefined(def, :sig)
ccall(:jl_method_table_insert, Void, (Any, Any, Ptr{Void}), tn.mt, def, C_NULL)
end
end
end
tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
kws = handle_deserialize(s, tag)
if makenew
tn.mt.kwsorter = kws
end
end
end
return tn::TypeName
end
function deserialize_datatype(s::AbstractSerializer, full::Bool)
slot = s.counter; s.counter += 1
if full
tname = deserialize(s)::TypeName
ty = tname.wrapper
else
name = deserialize(s)::Symbol
mod = deserialize(s)::Module
ty = getfield(mod,name)
end
if isa(ty,DataType) && isempty(ty.parameters)
t = ty
else
np = Int(read(s.io, Int32)::Int32)
if np == 0
t = unwrap_unionall(ty)
elseif ty === Tuple
# note np==0 has its own tag
if np == 1
t = Tuple{deserialize(s)}
elseif np == 2
t = Tuple{deserialize(s), deserialize(s)}
elseif np == 3
t = Tuple{deserialize(s), deserialize(s), deserialize(s)}
elseif np == 4
t = Tuple{deserialize(s), deserialize(s), deserialize(s), deserialize(s)}
else
t = Tuple{Any[ deserialize(s) for i=1:np ]...}
end
else
t = ty
for i = 1:np
t = t{deserialize(s)}
end
end
end
s.table[slot] = t
return t
end
function deserialize(s::AbstractSerializer, ::Type{UnionAll})
form = read(s.io, UInt8)
if form == 0
var = deserialize(s)
body = deserialize(s)
return UnionAll(var, body)
else
n = read(s.io, Int16)
t = deserialize(s)::DataType
w = t.name.wrapper
k = 0
while isa(w, UnionAll)
w = w.body
k += 1
end
w = t.name.wrapper
k -= n
while k > 0
w = w.body
k -= 1
end
return w
end
end
function deserialize(s::AbstractSerializer, ::Type{Task})
t = Task(()->nothing)
deserialize_cycle(s, t)
t.code = deserialize(s)
t.storage = deserialize(s)
t.state = deserialize(s)
t.result = deserialize(s)
t.exception = deserialize(s)
t
end
function deserialize_string(s::AbstractSerializer, len::Int)
out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), len)
unsafe_read(s.io, pointer(out), len)
return out
end
# default DataType deserializer
function deserialize(s::AbstractSerializer, t::DataType)
nf = length(t.types)
if nf == 0 && t.size > 0
# bits type
return read(s.io, t)
end
if nf == 0
return ccall(:jl_new_struct, Any, (Any,Any...), t)
elseif isbits(t)
if nf == 1
f1 = deserialize(s)
return ccall(:jl_new_struct, Any, (Any,Any...), t, f1)
elseif nf == 2
f1 = deserialize(s)
f2 = deserialize(s)
return ccall(:jl_new_struct, Any, (Any,Any...), t, f1, f2)
elseif nf == 3
f1 = deserialize(s)
f2 = deserialize(s)
f3 = deserialize(s)
return ccall(:jl_new_struct, Any, (Any,Any...), t, f1, f2, f3)
else
flds = Any[ deserialize(s) for i = 1:nf ]
return ccall(:jl_new_structv, Any, (Any,Ptr{Void},UInt32), t, flds, nf)
end
else
x = ccall(:jl_new_struct_uninit, Any, (Any,), t)
t.mutable && deserialize_cycle(s, x)
for i in 1:nf
tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
ccall(:jl_set_nth_field, Void, (Any, Csize_t, Any), x, i-1, handle_deserialize(s, tag))
end
end
return x
end
end
function deserialize(s::AbstractSerializer, T::Type{Dict{K,V}}) where {K,V}
n = read(s.io, Int32)
t = T(); sizehint!(t, n)
deserialize_cycle(s, t)
for i = 1:n
k = deserialize(s)
v = deserialize(s)
t[k] = v
end
return t
end
deserialize(s::AbstractSerializer, ::Type{BigFloat}) = parse(BigFloat, deserialize(s))
deserialize(s::AbstractSerializer, ::Type{BigInt}) = parse(BigInt, deserialize(s), 62)
function deserialize(s::AbstractSerializer, t::Type{Regex})
pattern = deserialize(s)
compile_options = deserialize(s)
match_options = deserialize(s)
Regex(pattern, compile_options, match_options)
end
if !Sys.iswindows()
function serialize(s::AbstractSerializer, rd::RandomDevice)
serialize_type(s, typeof(rd))
serialize(s, rd.unlimited)
end
function deserialize(s::AbstractSerializer, t::Type{RandomDevice})
unlimited = deserialize(s)
return RandomDevice(unlimited)
end
end
end