// Custom Forward Derivative Function reference __attributeTarget(FunctionDeclBase) attribute_syntax [ForwardDerivative(function)] : ForwardDerivativeAttribute; __attributeTarget(FunctionDeclBase) attribute_syntax [BackwardDerivative(function)] : BackwardDerivativeAttribute; __attributeTarget(FunctionDeclBase) attribute_syntax [PrimalSubstitute(function)] : PrimalSubstituteAttribute; __attributeTarget(FunctionDeclBase) attribute_syntax [ForwardDerivativeOf(function)] : ForwardDerivativeOfAttribute; __attributeTarget(FunctionDeclBase) attribute_syntax [BackwardDerivativeOf(function)] : BackwardDerivativeOfAttribute; __attributeTarget(FunctionDeclBase) attribute_syntax [PrimalSubstituteOf(function)] : PrimalSubstituteOfAttribute; __attributeTarget(DeclBase) attribute_syntax [DerivativeMember(memberName)] : DerivativeMemberAttribute; // Exclude "this" parameter from differentiation. __attributeTarget(FunctionDeclBase) attribute_syntax [NoDiffThis] : NoDiffThisAttribute; // A 'none-type' that acts as a run-time sentinel for zero differentials. public struct NullDifferential : IDifferentiable { // for now, we'll use at least one field to make sure the type is non-empty uint dummy; typedef NullDifferential Differential; [Differentiable] [ForceInline] static Differential dzero() { return { 0 }; } [Differentiable] [ForceInline] static Differential dadd(Differential, Differential) { return { 0 }; } [Differentiable] [ForceInline] static Differential dmul(T, Differential) { return { 0 }; } }; // Existential check for null differential type __intrinsic_op($(kIROp_IsDifferentialNull)) bool isDifferentialNull(IDifferentiable obj); /// Represents a GPU view of a tensor. __generic __magic_type(TensorViewType) __intrinsic_type($(kIROp_TensorViewType)) struct TensorView { __target_intrinsic(cuda, "$0.data_ptr<$G0>()") [__NoSideEffect] Ptr data_ptr(); __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") [__NoSideEffect] Ptr data_ptr_at(uint index); __generic __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") [__NoSideEffect] Ptr data_ptr_at(vector index); __implicit_conversion($(kConversionCost_ImplicitDereference)) __intrinsic_op($(kIROp_TorchTensorGetView)) __init(TorchTensor t); __target_intrinsic(cuda, "$0.load<$G0>($1)") [__NoSideEffect] T load(uint x); __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] T load(uint x, uint y); __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] T load(uint x, uint y, uint z); __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] T load(uint x, uint y, uint z, uint w); __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] T load(uint i0, uint i1, uint i2, uint i3, uint i4); [__NoSideEffect] __generic __target_intrinsic(cuda, "$0.load<$TR>($1)") T load(vector index); __target_intrinsic(cuda, "$0.store<$G0>($1, $2)") void store(uint x, T val); __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3)") void store(uint x, uint y, T val); __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4)") void store(uint x, uint y, uint z, T val); __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5)") void store(uint x, uint y, uint z, uint w, T val); __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)") void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val); __generic __target_intrinsic(cuda, "$0.store<$T2>($1, $2)") void store(vector index, T val); __target_intrinsic(cuda, "*($3) = atomicAdd($0.data_ptr_at<$T2>($1), $2)") void InterlockedAdd(uint index, T val, out T oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicAdd($0.data_ptr_at<$T2>($1), $2)") void InterlockedAdd(vector index, T val, out T oldVal); __target_intrinsic(cuda, "$0.dimensionCount") [__readNone] uint dims(); __target_intrinsic(cuda, "$0.sizes[$1]") [__readNone] uint size(uint i); __target_intrinsic(cuda, "$0.strides[$1]") [__readNone] uint stride(uint i); __subscript(uint index) -> T { [ForceInline] [__NoSideEffect] get { return load(index); } [ForceInline] set { store(index, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1)") [__NoSideEffect] ref; } __subscript(uint i1, uint i2) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2); } [ForceInline] set { store(i1, i2, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] ref; } __subscript(uint2 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y); } [ForceInline] set { store(i.x, i.y, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y)") [__NoSideEffect] ref; } __subscript(uint i1, uint i2, uint i3) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3); } [ForceInline] set { store(i1, i2, i3, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] ref; } __subscript(uint3 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y, i.z); } [ForceInline] set { store(i.x, i.y, i.z, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z)") [__NoSideEffect] ref; } __subscript(uint i1, uint i2, uint i3, uint i4) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4); } [ForceInline] set { store(i1, i2, i3, i4, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] ref; } __subscript(uint4 i) -> T { [__NoSideEffect][ForceInline] get { return load(i.x, i.y, i.z, i.w); } [ForceInline] set { store(i.x, i.y, i.z, i.w, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z, $1.w)") [__NoSideEffect] ref; } __subscript(uint i1, uint i2, uint i3, uint i4, uint i5) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4, i5); } [ForceInline] set { store(i1, i2, i3, i4, i5, newValue); } __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] ref; } } ${{{{ for (auto atomicIntegerTypeName : kCudaAtomicIntegerTypes) { }}}} extension TensorView<$(atomicIntegerTypeName)> { typealias __Element = $(atomicIntegerTypeName); __target_intrinsic(cuda, "*($3) = atomicMin($0.data_ptr_at<$T2>($1), $2)") void InterlockedMin(uint index, __Element val, out __Element oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicMin($0.data_ptr_at<$T2>($1), $2)") void InterlockedMin(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "*($3) = atomicMax($0.data_ptr_at<$T2>($1), $2)") void InterlockedMax(uint index, __Element val, out __Element oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicMax($0.data_ptr_at<$T2>($1), $2)") void InterlockedMax(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "*($3) = atomicAnd($0.data_ptr_at<$T2>($1), $2)") void InterlockedAnd(uint index, __Element val, out __Element oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicAnd($0.data_ptr_at<$T2>($1), $2)") void InterlockedAnd(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "*($3) = atomicOr($0.data_ptr_at<$T2>($1), $2)") void InterlockedOr(uint index, __Element val, out __Element oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicOr($0.data_ptr_at<$T2>($1), $2)") void InterlockedOr(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "*($3) = atomicXor($0.data_ptr_at<$T2>($1), $2)") void InterlockedXor(uint index, __Element val, out __Element oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicXor($0.data_ptr_at<$T2>($1), $2)") void InterlockedXor(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<$T2>($1), $2)") void InterlockedExchange(uint index, __Element va, out __Element oldVall); __generic __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<$T2>($1), $2)") void InterlockedExchange(vector index, __Element val, out __Element oldVal); __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$T2>($1), $2, $3)") void InterlockedCompareExchange(uint index, __Element compare, __Element val); __generic __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$T2>($1), $2, $3)") void InterlockedCompareExchange(vector index, __Element compare, __Element val); } ${{{{ } // end for atomicIntegerTypeName }}}} extension TensorView { __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at($1), $2)") float InterlockedExchange(uint index, float val, out float oldVal); __generic __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at($1), $2)") float InterlockedExchange(vector index, float val, out float oldVal); __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at($1), slang_bit_cast($2), slang_bit_cast($3))") void InterlockedCompareExchange(uint index, float compare, float val); __generic __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at($1), slang_bit_cast($2), slang_bit_cast($3))") void InterlockedCompareExchange(vector index, float compare, float val); } interface IDiffTensorWrapper { // Derivatives for universal load/store operations. __generic T load_forward(uint i); __generic T load_forward(vector i); __generic void load_backward(uint i, T dOut); __generic void load_backward(vector i, T dOut); __generic void store_forward(uint i, T dx); __generic void store_forward(vector i, T dx); __generic T store_backward(uint i); __generic T store_backward(vector i); // Derivatives for loadOnce/storeOnce operations. These operations // are designed to only run once per-address and don't need atomic // gradient handling. // __generic T loadOnce_forward(uint i); __generic T loadOnce_forward(vector i); __generic void loadOnce_backward(uint i, T dOut); __generic void loadOnce_backward(vector i, T dOut); __generic void storeOnce_forward(uint i, T dx); __generic void storeOnce_forward(vector i, T dx); __generic T storeOnce_backward(uint i); __generic T storeOnce_backward(vector i); }; struct AtomicAdd : IDiffTensorWrapper { TensorView diff; // Derivatives for universal load/store operations. __generic T load_forward(uint i) { return __realCast(diff.load(i)); } __generic T load_forward(vector i) { return __realCast(diff.load(i)); } __generic void load_backward(uint i, T dOut) { float oldVal; diff.InterlockedAdd(i, __realCast(dOut), oldVal); } __generic void load_backward(vector i, T dOut) { float oldVal; diff.InterlockedAdd(i, __realCast(dOut), oldVal); } __generic void store_forward(uint i, T dx) { diff.store(i, __realCast(dx)); } __generic void store_forward(vector i, T dx) { diff.store(i, __realCast(dx)); } __generic T store_backward(uint i) { float oldVal; diff.InterlockedExchange(i, (float)0, oldVal); return __realCast(oldVal); } __generic T store_backward(vector i) { float oldVal; diff.InterlockedExchange(i, (float)0, oldVal); return __realCast(oldVal); } // Derivatives for loadOnce/storeOnce operations. These operations // are designed to only run once per-address and don't need atomic // gradient handling. // __generic T loadOnce_forward(uint i) { return __realCast(diff.load(i)); } __generic T loadOnce_forward(vector i) { return __realCast(diff.load(i)); } __generic void loadOnce_backward(uint i, T dOut) { diff.store(i, __realCast(dOut)); } __generic void loadOnce_backward(vector i, T dOut) { diff.store(i, __realCast(dOut)); } __generic void storeOnce_forward(uint i, T dx) { diff.store(i, __realCast(dx)); } __generic void storeOnce_forward(vector i, T dx) { diff.store(i, __realCast(dx)); } __generic T storeOnce_backward(uint i) { return __realCast(diff.load(i)); } __generic T storeOnce_backward(vector i) { return __realCast(diff.load(i)); } }; __generic struct DiffTensorView { TensorView primal; A diff; uint size(uint i) { return primal.size(i); } uint dims() { return primal.dims(); } uint stride(uint i) { return primal.stride(i); } // Constructors __init(TensorView primal, A diff) { this.primal = primal; this.diff = diff; } __init(TensorView primal) { this.primal = primal; } // Universal load/store operations. [BackwardDerivative(__load_backward)] [ForwardDerivative(__load_forward)] T load(uint i) { return primal.load(i); } [BackwardDerivative(__load_backward)] [ForwardDerivative(__load_forward)] __generic T load(vector i) { return primal.load(i); } DifferentialPair __load_forward(uint x) { return diffPair(primal.load(x), reinterpret(diff.load_forward(x))); } __generic DifferentialPair __load_forward(vector x) { return diffPair(primal.load(x), reinterpret(diff.load_forward(x))); } void __load_backward(uint x, T.Differential dOut) { diff.load_backward(x, reinterpret(dOut)); } __generic void __load_backward(vector x, T.Differential dOut) { diff.load_backward(x, reinterpret(dOut)); } [BackwardDerivative(__store_backward)] [ForwardDerivative(__store_forward)] void store(uint x, T val) { primal.store(x, val); } [BackwardDerivative(__store_backward)] [ForwardDerivative(__store_forward)] __generic void store(vector x, T val) { primal.store(x, val); } void __store_forward(uint x, DifferentialPair dpval) { primal.store(x, dpval.p); diff.store_forward(x, reinterpret(dpval.d)); } __generic void __store_forward(vector x, DifferentialPair dpval) { primal.store(x, dpval.p); diff.store_forward(x, reinterpret(dpval.d)); } void __store_backward(uint x, inout DifferentialPair dpval) { dpval = diffPair(dpval.p, reinterpret(diff.store_backward(x))); } __generic void __store_backward(vector x, inout DifferentialPair dpval) { dpval = diffPair(dpval.p, reinterpret(diff.store_backward(x))); } __subscript(uint index)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(index); } [__unsafeForceInlineEarly] [Differentiable] set { store(index, newValue); } [__NoSideEffect] ref; } __subscript(uint2 index)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(index); } [__unsafeForceInlineEarly] [Differentiable] set { store(index, newValue); } [__NoSideEffect] ref; } __subscript(uint x, uint y)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(uint2(x, y)); } [__unsafeForceInlineEarly] [Differentiable] set { store(uint2(x, y), newValue); } [__NoSideEffect] ref; } __subscript(uint3 index)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(index); } [__unsafeForceInlineEarly] [Differentiable] set { store(index, newValue); } [__NoSideEffect] ref; } __subscript(uint x, uint y, uint z)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(uint3(x, y, z)); } [__unsafeForceInlineEarly] [Differentiable] set { store(uint3(x, y, z), newValue); } [__NoSideEffect] ref; } __subscript(uint4 index)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(index); } [__unsafeForceInlineEarly] [Differentiable] set { store(index, newValue); } [__NoSideEffect] ref; } __subscript(uint x, uint y, uint z, uint w)->T { [__unsafeForceInlineEarly] [Differentiable] [__NoSideEffect] get { return load(uint4(x, y, z, w)); } [__unsafeForceInlineEarly] [Differentiable] set { store(uint4(x, y, z, w), newValue); } [__NoSideEffect] ref; } // loadOnce/storeOnce operations. These operations are designed to only run once per-address and // don't need atomic gradient handling. // [BackwardDerivative(__loadOnce_backward)] [ForwardDerivative(__loadOnce_forward)] T loadOnce(uint i) { return primal.load(i); } [BackwardDerivative(__loadOnce_backward)] [ForwardDerivative(__loadOnce_forward)] __generic T loadOnce(vector i) { return primal.load(i); } DifferentialPair __loadOnce_forward(uint x) { return diffPair(primal.load(x), reinterpret(diff.loadOnce_forward(x))); } __generic DifferentialPair __loadOnce_forward(vector x) { return diffPair(primal.load(x), reinterpret(diff.loadOnce_forward(x))); } void __loadOnce_backward(uint x, T.Differential dOut) { diff.loadOnce_backward(x, reinterpret(dOut)); } __generic void __loadOnce_backward(vector x, T.Differential dOut) { diff.loadOnce_backward(x, reinterpret(dOut)); } [BackwardDerivative(__storeOnce_backward)] [ForwardDerivative(__storeOnce_forward)] void storeOnce(uint x, T val) { primal.store(x, val); } [BackwardDerivative(__storeOnce_backward)] [ForwardDerivative(__storeOnce_forward)] __generic void storeOnce(vector x, T val) { primal.store(x, val); } void __storeOnce_forward(uint x, DifferentialPair dpval) { primal.store(x, dpval.p); diff.storeOnce_forward(x, reinterpret(dpval.d)); } __generic void __storeOnce_forward(vector x, DifferentialPair dpval) { primal.store(x, dpval.p); diff.storeOnce_forward(x, reinterpret(dpval.d)); } void __storeOnce_backward(uint x, inout DifferentialPair dpval) { dpval = diffPair(dpval.p, reinterpret(diff.storeOnce_backward(x))); } __generic void __storeOnce_backward(vector x, inout DifferentialPair dpval) { dpval = diffPair(dpval.p, reinterpret(diff.storeOnce_backward(x))); } }; /// Represents the handle of a Torch tensor object. __generic __intrinsic_type($(kIROp_TorchTensorType)) struct TorchTensor { __intrinsic_op($(kIROp_TorchTensorGetView)) [CudaHost] TensorView getView(); __target_intrinsic(cuda, "$0.dims()") __target_intrinsic(cpp, "$0.dims()") [__readNone] [CudaHost] uint dims(); __target_intrinsic(cuda, "$0.size($1)") __target_intrinsic(cpp, "$0.size($1)") [__readNone] [CudaHost] uint size(uint i); __target_intrinsic(cuda, "$0.stride($1)") __target_intrinsic(cpp, "$0.stride($1)") [__readNone] [CudaHost] uint stride(uint i); __target_intrinsic(cuda, "$0.data_ptr<$G0>()") __target_intrinsic(cpp, "$0.data_ptr<$G0>()") [__readNone] [CudaHost] Ptr data_ptr(); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor alloc(uint x); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor alloc(uint x, uint y); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor alloc(uint x, uint y, uint z); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor alloc(uint x, uint y, uint z, uint w); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor alloc(uint i0, uint i1, uint i2, uint i3, uint i4); __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] static TorchTensor emptyLike(TorchTensor other); __target_intrinsic(cpp, "$0.zero_()") [CudaHost] void fillZero(); __target_intrinsic(cpp, "$0.fill_($1)") [CudaHost] void fillValue(T val); [CudaHost] static TorchTensor zerosLike(TorchTensor other) { var result = emptyLike(other); result.fillZero(); return result; } } __target_intrinsic(cpp, "AT_CUDA_CHECK(cudaStreamSynchronize(at::cuda::getCurrentCUDAStream()))") void syncTorchCudaStream(); /// Constructs a `DifferentialPair` value from a primal value and a differential value. __generic __intrinsic_op($(kIROp_MakeDifferentialPairUserCode)) DifferentialPair diffPair(T primal, T.Differential diff); /// Constructs a `DifferentialPair` value from a primal value and a zero differential value. __generic [__unsafeForceInlineEarly] DifferentialPair diffPair(T primal) { return diffPair(primal, T.dzero()); } [__unsafeForceInlineEarly] void updatePrimal(inout DifferentialPair p, T newPrimal) { p = DifferentialPair(newPrimal, p.d); } [__unsafeForceInlineEarly] void updateDiff(inout DifferentialPair p, T.Differential newDiff) { p = DifferentialPair(p.p, newDiff); } [__unsafeForceInlineEarly] void updatePair(inout DifferentialPair p, T newPrimal, T.Differential newDiff) { p = DifferentialPair(newPrimal, newDiff); } __generic __intrinsic_op($(kIROp_MakeArrayFromElement)) Array makeArrayFromElement(T element); __generic extension Array : IDifferentiable { typedef Array Differential; [__unsafeForceInlineEarly] static Differential dzero() { return makeArrayFromElement(T.dzero()); } [__unsafeForceInlineEarly] static Differential dadd(Differential a, Differential b) { Array result; for (int i = 0; i < N; i++) result[i] = T.dadd(a[i], b[i]); return result; } __generic [__unsafeForceInlineEarly] static Differential dmul(U a, Differential b) { Array result; for (int i = 0; i < N; i++) result[i] = T.dmul(a, b[i]); return result; } } // Matrix transpose __generic [ForceInline] [ForwardDerivativeOf(transpose)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair> __d_transpose(DifferentialPair> m) { return DifferentialPair>(transpose(m.p), transpose(m.d)); } __generic [ForceInline] [BackwardDerivativeOf(transpose)] [PreferRecompute] [BackwardDifferentiable] void __d_transpose(inout DifferentialPair> m, matrix.Differential dOut) { m = diffPair(m.p, transpose(dOut)); } // vector-matrix __generic [ForceInline] [ForwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair> mul(DifferentialPair> left, DifferentialPair> right) { let primal = mul(left.p, right.p); let diff = mul(left.d, right.p) + mul(left.p, right.d); return DifferentialPair>(primal, diff); } __generic [BackwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] void __d_mul(inout DifferentialPair> left, inout DifferentialPair> right, vector.Differential dOut) { vector.Differential left_d_result; matrix.Differential right_d_result; [ForceUnroll] for (int i = 0; i < N; ++i) { T sum = T(0); [ForceUnroll] for (int j = 0; j < M; ++j) { sum += right.p[i][j] * dOut[j]; right_d_result[i][j] = left.p[i] * dOut[j]; } left_d_result[i] = sum; } left = diffPair(left.p, left_d_result); right = diffPair(right.p, right_d_result); } // matrix-vector __generic [ForceInline] [ForwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair> mul(DifferentialPair> left, DifferentialPair> right) { let primal = mul(left.p, right.p); let diff = mul(left.d, right.p) + mul(left.p, right.d); return DifferentialPair>(primal, diff); } __generic [BackwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] void __d_mul(inout DifferentialPair> left, inout DifferentialPair> right, vector.Differential dOut) { matrix.Differential left_d_result; vector.Differential right_d_result; [ForceUnroll] for (int j = 0; j < M; ++j) { T sum = T(0); [ForceUnroll] for (int i = 0; i < N; ++i) { sum += left.p[i][j] * dOut[i]; left_d_result[i][j] = right.p[j] * dOut[i]; } right_d_result[j] = sum; } left = diffPair(left.p, left_d_result); right = diffPair(right.p, right_d_result); } // matrix-matrix __generic [ForceInline] [ForwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair> mul(DifferentialPair> left, DifferentialPair> right) { let primal = mul(left.p, right.p); let diff = mul(left.d, right.p) + mul(left.p, right.d); return DifferentialPair>(primal, diff); } __generic [BackwardDerivativeOf(mul)] [PreferRecompute] [BackwardDifferentiable] void mul(inout DifferentialPair> left, inout DifferentialPair> right, matrix.Differential dOut) { matrix.Differential left_d_result; [ForceUnroll] for (int r = 0; r < R; ++r) [ForceUnroll] for (int n = 0; n < N; ++n) left_d_result[r][n] = T(0.0); matrix.Differential right_d_result; [ForceUnroll] for (int n = 0; n < N; ++n) [ForceUnroll] for (int c = 0; c < C; ++c) right_d_result[n][c] = T(0.0); [ForceUnroll] for (int r = 0; r < R; ++r) { [ForceUnroll] for (int c = 0; c < C; ++c) { [ForceUnroll] for (int n = 0; n < N; ++n) { left_d_result[r][n] += right.p[n][c] * dOut[r][c]; right_d_result[n][c] += left.p[r][n] * dOut[r][c]; } } } left = diffPair(left.p, left_d_result); right = diffPair(right.p, right_d_result); } // Vector dot product __generic [ForwardDerivativeOf(dot)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair __d_dot(DifferentialPair> dpx, DifferentialPair> dpy) { T result = T(0); T.Differential d_result = T.dzero(); [ForceUnroll] for (int i = 0; i < N; ++i) { result = result + dpx.p[i] * dpy.p[i]; d_result = T.dadd(d_result, __slang_noop_cast(dpx.p[i] * dpy.d[i])); d_result = T.dadd(d_result, __slang_noop_cast(dpy.p[i] * dpx.d[i])); } return DifferentialPair(result, d_result); } __generic [BackwardDerivativeOf(dot)] [PreferRecompute] [BackwardDifferentiable] void __d_dot(inout DifferentialPair> dpx, inout DifferentialPair> dpy, T.Differential dOut) { vector.Differential x_d_result, y_d_result; [ForceUnroll] for (int i = 0; i < N; ++i) { x_d_result[i] = dpy.p[i] * __slang_noop_cast(dOut); y_d_result[i] = dpx.p[i] * __slang_noop_cast(dOut); } dpx = diffPair(dpx.p, x_d_result); dpy = diffPair(dpy.p, y_d_result); } // Cross product __generic [ForwardDerivativeOf(cross)] [PreferRecompute] [BackwardDifferentiable] DifferentialPair> __d_cross(DifferentialPair> a, DifferentialPair> b) { /* cx = ay * bz − az * by cy = az * bx − ax * bz cz = ax * by − ay * bx */ T aybz = a.p.y * b.p.z; T azby = a.p.z * b.p.y; T px = aybz - azby; T dx = (b.p.z - azby) * a.d.y + (a.p.y - azby) * b.d.z + (aybz - b.p.y) * a.d.z + (aybz - a.p.z) * b.d.y; T azbx = a.p.z * b.p.x; T axbz = a.p.x * b.p.z; T py = azbx - axbz; T dy = (b.p.x - axbz) * a.d.z + (a.p.z - axbz) * b.d.x + (azbx - b.p.z) * a.d.x + (azbx - a.p.x) * b.d.z; T axby = a.p.x * b.p.y; T aybx = a.p.y * b.p.x; T pz = axby - aybx; T dz = (b.p.y - aybx) * a.d.x + (a.p.x - aybx) * b.d.y + (axby - b.p.x) * a.d.y + (axby - a.p.y) * b.d.x; return DifferentialPair>(vector(px, py, pz), vector.Differential(dx, dy, dz)); } __generic [BackwardDerivativeOf(cross)] [PreferRecompute] [BackwardDifferentiable] void __d_cross(inout DifferentialPair> a, inout DifferentialPair> b, vector.Differential dOut) { /* cx = ay * bz − az * by cy = az * bx − ax * bz cz = ax * by − ay * bx */ T dax = (-b.p.z * dOut.y) + (b.p.y * dOut.z); T day = (b.p.z * dOut.x) + (-b.p.x * dOut.z); T daz = (-b.p.y * dOut.x) + (b.p.x * dOut.y); T dbx = (a.p.z * dOut.y) + (-a.p.y * dOut.z); T dby = (-a.p.z * dOut.x) + (a.p.x * dOut.z); T dbz = (a.p.y * dOut.x) + (-a.p.x * dOut.y); a = diffPair(a.p, vector.Differential(dax, day, daz)); b = diffPair(b.p, vector.Differential(dbx, dby, dbz)); } #define VECTOR_MATRIX_BINARY_DIFF_IMPL(NAME) \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_vector( \ DifferentialPair> dpx, DifferentialPair> dpy) \ { \ vector result; \ vector.Differential d_result; \ [ForceUnroll] for (int i = 0; i < N; ++i) \ { \ DifferentialPair dp_elem = __d_##NAME( \ DifferentialPair(dpx.p[i], __slang_noop_cast(dpx.d[i])), \ DifferentialPair(dpy.p[i], __slang_noop_cast(dpy.d[i]))); \ result[i] = dp_elem.p; \ d_result[i] = __slang_noop_cast(dp_elem.d); \ } \ return DifferentialPair>(result, d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_matrix( \ DifferentialPair> dpx, DifferentialPair> dpy) \ { \ matrix result; \ matrix.Differential d_result; \ [ForceUnroll] for (int i = 0; i < M; ++i) \ [ForceUnroll] for (int j = 0; j < N; ++j) \ { \ DifferentialPair dp_elem = __d_##NAME( \ DifferentialPair(dpx.p[i][j], __slang_noop_cast(dpx.d[i][j])), \ DifferentialPair(dpy.p[i][j], __slang_noop_cast(dpy.d[i][j]))); \ result[i][j] = dp_elem.p; \ d_result[i][j] = __slang_noop_cast(dp_elem.d); \ } \ return DifferentialPair>(result, d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_vector( \ inout DifferentialPair> dpx, \ inout DifferentialPair> dpy, \ vector.Differential dOut) \ { \ vector.Differential left_d_result, right_d_result; \ [ForceUnroll] for (int i = 0; i < N; ++i) \ { \ DifferentialPair left_dp = diffPair(dpx.p[i], T.dzero()); \ DifferentialPair right_dp = diffPair(dpy.p[i], T.dzero()); \ __d_##NAME(left_dp, right_dp, __slang_noop_cast(dOut[i])); \ left_d_result[i] = __slang_noop_cast(left_dp.d); \ right_d_result[i] = __slang_noop_cast(right_dp.d); \ } \ dpx = diffPair(dpx.p, left_d_result); \ dpy = diffPair(dpy.p, right_d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_matrix( \ inout DifferentialPair> dpx, \ inout DifferentialPair> dpy, \ matrix.Differential dOut) \ { \ matrix.Differential left_d_result, right_d_result; \ [ForceUnroll] for (int i = 0; i < M; ++i) \ [ForceUnroll] for (int j = 0; j < N; ++j) \ { \ DifferentialPair left_dp = diffPair(dpx.p[i][j], T.dzero()); \ DifferentialPair right_dp = diffPair(dpy.p[i][j], T.dzero()); \ __d_##NAME(left_dp, right_dp, __slang_noop_cast(dOut[i][j])); \ left_d_result[i][j] = __slang_noop_cast(left_dp.d); \ right_d_result[i][j] = __slang_noop_cast(right_dp.d); \ } \ dpx = diffPair(dpx.p, left_d_result); \ dpy = diffPair(dpy.p, right_d_result); \ } #define VECTOR_MATRIX_TERNARY_DIFF_IMPL(NAME) \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_vector( \ DifferentialPair> dpx, \ DifferentialPair> dpy, \ DifferentialPair> dpz) \ { \ vector result; \ vector.Differential d_result; \ [ForceUnroll] for (int i = 0; i < N; ++i) \ { \ DifferentialPair dp_elem = __d_##NAME( \ DifferentialPair(dpx.p[i], __slang_noop_cast(dpx.d[i])), \ DifferentialPair(dpy.p[i], __slang_noop_cast(dpy.d[i])), \ DifferentialPair(dpz.p[i], __slang_noop_cast(dpz.d[i]))); \ result[i] = dp_elem.p; \ d_result[i] = __slang_noop_cast(dp_elem.d); \ } \ return DifferentialPair>(result, d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_matrix( \ DifferentialPair> dpx, \ DifferentialPair> dpy, \ DifferentialPair> dpz) \ { \ matrix result; \ matrix.Differential d_result; \ [ForceUnroll] for (int i = 0; i < M; ++i) \ [ForceUnroll] for (int j = 0; j < N; ++j) \ { \ DifferentialPair dp_elem = __d_##NAME( \ DifferentialPair(dpx.p[i][j], __slang_noop_cast(dpx.d[i][j])), \ DifferentialPair(dpy.p[i][j], __slang_noop_cast(dpy.d[i][j])), \ DifferentialPair(dpz.p[i][j], __slang_noop_cast(dpz.d[i][j]))); \ result[i][j] = dp_elem.p; \ d_result[i][j] = __slang_noop_cast(dp_elem.d); \ } \ return DifferentialPair>(result, d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_vector( \ inout DifferentialPair> dpx, \ inout DifferentialPair> dpy, \ inout DifferentialPair> dpz, \ vector.Differential dOut) \ { \ vector.Differential left_d_result, middle_d_result, right_d_result; \ [ForceUnroll] for (int i = 0; i < N; ++i) \ { \ DifferentialPair left_dp = diffPair(dpx.p[i], T.dzero()); \ DifferentialPair middle_dp = diffPair(dpy.p[i], T.dzero()); \ DifferentialPair right_dp = diffPair(dpz.p[i], T.dzero()); \ __d_##NAME(left_dp, middle_dp, right_dp, \ __slang_noop_cast(dOut[i])); \ left_d_result[i] = __slang_noop_cast(left_dp.d); \ middle_d_result[i] = __slang_noop_cast(middle_dp.d); \ right_d_result[i] = __slang_noop_cast(right_dp.d); \ } \ dpx = diffPair(dpx.p, left_d_result); \ dpy = diffPair(dpy.p, middle_d_result); \ dpz = diffPair(dpz.p, right_d_result); \ } \ __generic \ [BackwardDifferentiable][PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_matrix( \ inout DifferentialPair> dpx, \ inout DifferentialPair> dpy, \ inout DifferentialPair> dpz, \ matrix.Differential dOut) \ { \ matrix.Differential left_d_result, middle_d_result, right_d_result; \ [ForceUnroll] for (int i = 0; i < M; ++i) \ [ForceUnroll] for (int j = 0; j < N; ++j) \ { \ DifferentialPair left_dp = diffPair(dpx.p[i][j], T.dzero()); \ DifferentialPair middle_dp = diffPair(dpy.p[i][j], T.dzero()); \ DifferentialPair right_dp = diffPair(dpz.p[i][j], T.dzero()); \ __d_##NAME(left_dp, middle_dp, right_dp, \ __slang_noop_cast(dOut[i][j])); \ left_d_result[i][j] = __slang_noop_cast(left_dp.d); \ middle_d_result[i][j] = __slang_noop_cast(middle_dp.d); \ right_d_result[i][j] = __slang_noop_cast(right_dp.d); \ } \ dpx = diffPair(dpx.p, left_d_result); \ dpy = diffPair(dpy.p, middle_d_result); \ dpz = diffPair(dpz.p, right_d_result); \ } #define UNARY_DERIVATIVE_IMPL(NAME, FWD_DIFF_FUNC, BWD_DIFF_FUNC) \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair __d_##NAME(DifferentialPair dpx) \ { \ typealias ReturnType = T; \ return DifferentialPair(NAME(dpx.p), FWD_DIFF_FUNC); \ } \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_vector(DifferentialPair> dpx) \ { \ typealias ReturnType = vector; \ return DifferentialPair(NAME(dpx.p), FWD_DIFF_FUNC); \ } \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [ForwardDerivativeOf(NAME)] \ DifferentialPair> __d_##NAME##_m(DifferentialPair> dpm) \ { \ typealias ReturnType = vector; \ matrix.Differential diff; \ [ForceUnroll] for (int i = 0; i < M; i++) \ { \ var dpx = diffPair(dpm.p[i], dpm.d[i]); \ diff[i] = __slang_noop_cast>(FWD_DIFF_FUNC); \ } \ return diffPair(NAME(dpm.p), diff); \ } \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME(inout DifferentialPair dpx, T.Differential dOut) \ { \ typealias ReturnType = T; \ dpx = diffPair(dpx.p, BWD_DIFF_FUNC); \ } \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_vector( \ inout DifferentialPair> dpx, vector.Differential dOut) \ { \ typealias ReturnType = vector; \ dpx = diffPair(dpx.p, BWD_DIFF_FUNC); \ } \ __generic \ [BackwardDifferentiable] [PreferRecompute] \ [BackwardDerivativeOf(NAME)] \ void __d_##NAME##_matrix( \ inout DifferentialPair> m, matrix.Differential mdOut) \ { \ typealias ReturnType = vector; \ matrix.Differential diff; \ [ForceUnroll] for (int i = 0; i < M; i++) \ { \ var dpx = diffPair(m.p[i], m.d[i]); \ var dOut = __slang_noop_cast>(mdOut[i]); \ diff[i] = BWD_DIFF_FUNC; \ } \ m = diffPair(m.p, diff); \ } #define SIMPLE_UNARY_DERIVATIVE_IMPL(NAME, DIFF_FUNC) UNARY_DERIVATIVE_IMPL(NAME, __mul_p_d(DIFF_FUNC, dpx.d), __mul_p_d(DIFF_FUNC, dOut)) /// Element-wise multiply for scalars and vectors for (T, T.Differential) __generic [__unsafeForceInlineEarly] [Differentiable] T.Differential __mul_p_d(T a, T.Differential b) { return __slang_noop_cast(a * __slang_noop_cast(b)); } __generic [__unsafeForceInlineEarly] [Differentiable] T __mul_p_d(T a, T b) { return (a * b); } __generic [__unsafeForceInlineEarly] [Differentiable] vector __mul_p_d(vector a, vector b) { return a * b; } /// Detach and set derivatives to zero. __generic __intrinsic_op($(kIROp_DetachDerivative)) T detach(T x); #define SLANG_SQR(x) ((x)*(x)) #define SLANG_SIGN(x) select(((x)>T(0.0)), ReturnType(T(1.0)), select(((x)==T(0.0)), ReturnType(T(0.0)), ReturnType(T(-1.0)))) // Absolute value UNARY_DERIVATIVE_IMPL(abs, (__mul_p_d(SLANG_SIGN(dpx.p), (dpx.d))), (__mul_p_d(SLANG_SIGN(dpx.p), (dOut)))) // Saturate UNARY_DERIVATIVE_IMPL(saturate, select(dpx.p < T(0.0) || dpx.p > T(1.0), ReturnType.dzero(), dpx.d), select(dpx.p < T(0.0) || dpx.p > T(1.0), ReturnType.dzero(), dOut)) // frac UNARY_DERIVATIVE_IMPL(frac, dpx.d, dOut) // raidans, degrees SIMPLE_UNARY_DERIVATIVE_IMPL(radians, ReturnType(T(0.01745329251994329576923690768489))) SIMPLE_UNARY_DERIVATIVE_IMPL(degrees, ReturnType(T(57.295779513082320876798154814105))) // Exponent SIMPLE_UNARY_DERIVATIVE_IMPL(exp, exp(dpx.p)) SIMPLE_UNARY_DERIVATIVE_IMPL(exp2, exp2(dpx.p)* T(50.69314718055994530941723212145818)) // sin, sinh SIMPLE_UNARY_DERIVATIVE_IMPL(sin, cos(dpx.p)) SIMPLE_UNARY_DERIVATIVE_IMPL(sinh, cosh(dpx.p)) // cos, cosh SIMPLE_UNARY_DERIVATIVE_IMPL(cos, -sin(dpx.p)) SIMPLE_UNARY_DERIVATIVE_IMPL(cosh, sinh(dpx.p)) // tan, tanh SIMPLE_UNARY_DERIVATIVE_IMPL(tan, T(1.0) / (cos(dpx.p) * cos(dpx.p))) SIMPLE_UNARY_DERIVATIVE_IMPL(tanh, T(1.0) / (cosh(dpx.p) * cosh(dpx.p))) // Logarithm SIMPLE_UNARY_DERIVATIVE_IMPL(log, T(1.0) / dpx.p) SIMPLE_UNARY_DERIVATIVE_IMPL(log10, T(1.0) / (dpx.p * T(52.3025850929940456840179914546844))) SIMPLE_UNARY_DERIVATIVE_IMPL(log2, T(1.0) / (dpx.p * T(50.69314718055994530941723212145818))) // Square root SIMPLE_UNARY_DERIVATIVE_IMPL(sqrt, T(0.5) / sqrt(max(ReturnType(T(1e-7)), dpx.p))) // Reciprocal SIMPLE_UNARY_DERIVATIVE_IMPL(rcp, T(-1.0) / max(ReturnType(T(1e-7)), dpx.p * dpx.p)) // rsqrt SIMPLE_UNARY_DERIVATIVE_IMPL(rsqrt, T(-0.5) / (dpx.p * sqrt(dpx.p))) // Arc-sin SIMPLE_UNARY_DERIVATIVE_IMPL(asin, T(1.0) / sqrt(T(1.0) - dpx.p * dpx.p)) // Arc-cos SIMPLE_UNARY_DERIVATIVE_IMPL(acos, T(-1.0) / sqrt(T(1.0) - dpx.p * dpx.p)) // Arc-tan SIMPLE_UNARY_DERIVATIVE_IMPL(atan, T(1.0) / (T(1.0) + dpx.p * dpx.p)) // Atan2 __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(atan2)] DifferentialPair __d_atan2(DifferentialPair dpy, DifferentialPair dpx) { T.Differential dx = __mul_p_d(-dpy.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpx.d); T.Differential dy = __mul_p_d(dpx.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpy.d); return DifferentialPair( atan2(dpy.p, dpx.p), T.dadd(dx, dy)); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(atan2)] void __d_atan2(inout DifferentialPair dpy, inout DifferentialPair dpx, T.Differential dOut) { dpx = diffPair(dpx.p, __mul_p_d(-dpy.p / (dpx.p * dpx.p + dpy.p * dpy.p), dOut)); dpy = diffPair(dpy.p, __mul_p_d(dpx.p / (dpx.p * dpx.p + dpy.p * dpy.p), dOut)); } VECTOR_MATRIX_BINARY_DIFF_IMPL(atan2) // fmod __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(fmod)] DifferentialPair __d_fmod(DifferentialPair x, DifferentialPair y) { return DifferentialPair(fmod(x.p, y.p), x.d); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(fmod)] void __d_fmod(inout DifferentialPair x, inout DifferentialPair y, T.Differential dOut) { x = diffPair(x.p, dOut); y = diffPair(y.p); } VECTOR_MATRIX_BINARY_DIFF_IMPL(fmod) // Raise to a power __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(pow)] DifferentialPair __d_pow(DifferentialPair dpx, DifferentialPair dpy) { // Special case if (dpx.p < T(1e-6)) { return DifferentialPair(T(0.0), T.dzero()); } T val = pow(dpx.p, dpy.p); T.Differential d1 = __mul_p_d((val * log(dpx.p)), dpy.d); T.Differential d2 = __mul_p_d((val * dpy.p / dpx.p), dpx.d); return DifferentialPair( val, T.dadd(d1, d2) ); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(pow)] void __d_pow(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { // Special case if (dpx.p < T(1e-6)) { dpx = diffPair(dpx.p, T.dzero()); dpy = diffPair(dpy.p, T.dzero()); } else { T val = pow(dpx.p, dpy.p); dpx = diffPair( dpx.p, (__mul_p_d((val * dpy.p / dpx.p), dOut))); dpy = diffPair( dpy.p, (__mul_p_d((val * log(dpx.p)), dOut))); } } VECTOR_MATRIX_BINARY_DIFF_IMPL(pow) // Maximum __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(max)] DifferentialPair __d_max(DifferentialPair dpx, DifferentialPair dpy) { return DifferentialPair( max(dpx.p, dpy.p), dpx.p > dpy.p ? dpx.d : dpy.d ); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(max)] void __d_max(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : T.dzero()); dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : T.dzero()); } VECTOR_MATRIX_BINARY_DIFF_IMPL(max) // Minimum __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(min)] DifferentialPair __d_min(DifferentialPair dpx, DifferentialPair dpy) { return DifferentialPair( min(dpx.p, dpy.p), dpx.p < dpy.p ? dpx.d : dpy.d ); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(min)] void __d_min(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : T.dzero()); dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : T.dzero()); } VECTOR_MATRIX_BINARY_DIFF_IMPL(min) // Lerp __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(lerp)] DifferentialPair __d_lerp(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dps) { return DifferentialPair( lerp(dpx.p, dpy.p, dps.p), T.dadd(T.dadd(__mul_p_d((T(1.0) - dps.p), dpx.d), __mul_p_d(dps.p, dpy.d)), __mul_p_d((dpy.p - dpx.p), dps.d)) ); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(lerp)] void __d_lerp(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dps, T.Differential dOut) { dpx = diffPair(dpx.p, __mul_p_d((T(1.0) - dps.p), dOut)); dpy = diffPair(dpy.p, __mul_p_d(dps.p, dOut)); dps = diffPair(dpy.p, __mul_p_d((dpy.p - dpx.p), dOut)); } VECTOR_MATRIX_TERNARY_DIFF_IMPL(lerp) // Clamp __generic [BackwardDifferentiable] [PreferRecompute] [ForwardDerivativeOf(clamp)] DifferentialPair __d_clamp(DifferentialPair dpx, DifferentialPair dpMin, DifferentialPair dpMax) { return DifferentialPair( clamp(dpx.p, dpMin.p, dpMax.p), dpx.p < dpMin.p ? (dpx.p > dpMax.p ? dpMax.d : dpx.d) : dpMin.d); } __generic [BackwardDifferentiable] [PreferRecompute] [BackwardDerivativeOf(clamp)] void __d_clamp(inout DifferentialPair dpx, inout DifferentialPair dpMin, inout DifferentialPair dpMax, T.Differential dOut) { dpx = diffPair(dpx.p, dpx.p > dpMin.p && dpx.p < dpMax.p ? dOut : T.dzero()); dpMin = diffPair(dpMin.p, dpx.p <= dpMin.p ? dOut : T.dzero()); dpMax = diffPair(dpMin.p, dpx.p >= dpMax.p ? dOut : T.dzero()); } VECTOR_MATRIX_TERNARY_DIFF_IMPL(clamp) // fma [BackwardDifferentiable] [ForwardDerivativeOf(fma)] [PreferRecompute] DifferentialPair __d_fma(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dpz) { return DifferentialPair( fma(dpx.p, dpy.p, dpz.p), dpy.p * dpx.d + dpx.p * dpy.d + dpz.d); } [BackwardDifferentiable] [BackwardDerivativeOf(fma)] [PreferRecompute] void __d_fma(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dpz, double dOut) { dpx = diffPair(dpx.p, dpy.p * dOut); dpy = diffPair(dpy.p, dpx.p * dOut); dpz = diffPair(dpz.p, dOut); } __generic [BackwardDifferentiable] [ForwardDerivativeOf(fma)] [PreferRecompute] DifferentialPair> __d_fma_vector( DifferentialPair> dpx, DifferentialPair> dpy, DifferentialPair> dpz) { vector result; vector.Differential d_result; [ForceUnroll] for (int i = 0; i < N; ++i) { DifferentialPair dp_elem = __d_fma( DifferentialPair(dpx.p[i], dpx.d[i]), DifferentialPair(dpy.p[i], dpy.d[i]), DifferentialPair(dpz.p[i], dpz.d[i])); result[i] = dp_elem.p; d_result[i] = dp_elem.d; } return DifferentialPair>(result, d_result); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(fma)] [PreferRecompute] void __d_fma_vector( inout DifferentialPair> dpx, inout DifferentialPair> dpy, inout DifferentialPair> dpz, vector dOut) { vector.Differential x_d_result, y_d_result, z_d_result; [ForceUnroll] for (int i = 0; i < N; ++i) { DifferentialPair x_dp = diffPair(dpx.p[i], 0.0); DifferentialPair y_dp = diffPair(dpy.p[i], 0.0); DifferentialPair z_dp = diffPair(dpz.p[i], 0.0); __d_fma(x_dp, y_dp, z_dp, dOut[i]); x_d_result[i] = x_dp.d; y_d_result[i] = y_dp.d; z_d_result[i] = z_dp.d; } dpx = diffPair(dpx.p, x_d_result); dpy = diffPair(dpy.p, y_d_result); dpz = diffPair(dpz.p, z_d_result); } // mad __generic [BackwardDifferentiable] [ForwardDerivativeOf(mad)] [PreferRecompute] DifferentialPair __d_mad(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dpz) { return DifferentialPair( mad(dpx.p, dpy.p, dpz.p), T.dadd(T.dadd(__mul_p_d(dpy.p, dpx.d), __mul_p_d(dpx.p, dpy.d)), dpz.d)); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(mad)] [PreferRecompute] void __d_mad(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dpz, T.Differential dOut) { dpx = diffPair(dpx.p, __mul_p_d(dpy.p, dOut)); dpy = diffPair(dpy.p, __mul_p_d(dpx.p, dOut)); dpz = diffPair(dpz.p, dOut); } VECTOR_MATRIX_TERNARY_DIFF_IMPL(mad) // Smoothstep __generic [BackwardDifferentiable] [PreferRecompute] T __smoothstep_impl(T minVal, T maxVal, T x) { let t = saturate((x - minVal) / (maxVal - minVal)); return t * t * (T(3.0) - T(2.0) * t); } __generic [BackwardDifferentiable] [ForwardDerivativeOf(smoothstep)] [PreferRecompute] DifferentialPair __d_smoothstep(DifferentialPair minVal, DifferentialPair maxVal, DifferentialPair x) { return __fwd_diff(__smoothstep_impl)(minVal, maxVal, x); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(smoothstep)] [PreferRecompute] void __d_smoothstep(inout DifferentialPair minVal, inout DifferentialPair maxVal, inout DifferentialPair x, T.Differential dOut) { __bwd_diff(__smoothstep_impl)(minVal, maxVal, x, dOut); } VECTOR_MATRIX_TERNARY_DIFF_IMPL(smoothstep) // Vector length __generic [BackwardDifferentiable] [PreferRecompute] T __length_impl(vector x) { T len = T(0.0); [ForceUnroll] for (int i = 0; i < N; i++) { len += x[i] * x[i]; } return sqrt(len); } __generic [BackwardDifferentiable] [ForwardDerivativeOf(length)] [ForceInline] [PreferRecompute] DifferentialPair __d_length(DifferentialPair> x) { return __fwd_diff(__length_impl)(x); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(length)] [ForceInline] [PreferRecompute] void __d_length(inout DifferentialPair> x, T.Differential dOut) { return __bwd_diff(__length_impl)(x, dOut); } // Vector distance __generic [BackwardDifferentiable] [PreferRecompute] T __distance_impl(vector x, vector y) { return length(y - x); } __generic [BackwardDifferentiable] [ForwardDerivativeOf(distance)] [ForceInline] [PreferRecompute] DifferentialPair __d_distance(DifferentialPair> x, DifferentialPair> y) { return __fwd_diff(__distance_impl)(x, y); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(distance)] [ForceInline] [PreferRecompute] void __d_distance(inout DifferentialPair> x, inout DifferentialPair> y, T.Differential dOut) { return __bwd_diff(__distance_impl)(x, y, dOut); } // Vector normalize __generic [BackwardDifferentiable] [PreferRecompute] vector __normalize_impl(vector x) { let r = T(1.0) / length(x); return x * r; } __generic [BackwardDifferentiable] [ForwardDerivativeOf(normalize)] [ForceInline] [PreferRecompute] DifferentialPair> __d_normalize(DifferentialPair> x) { return __fwd_diff(__normalize_impl)(x); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(normalize)] [ForceInline] [PreferRecompute] void __d_distance(inout DifferentialPair> x, vector.Differential dOut) { return __bwd_diff(__normalize_impl)(x, dOut); } // Vector reflect __generic [BackwardDifferentiable] vector __reflect_impl(vector i, vector n) { return i - n * (T(2.0) * dot(i, n)); } __generic [BackwardDifferentiable] [ForwardDerivativeOf(reflect)] [ForceInline] DifferentialPair> __d_reflect(DifferentialPair> i, DifferentialPair> n) { return __fwd_diff(__reflect_impl)(i, n); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(reflect)] [ForceInline] void __d_reflect(inout DifferentialPair> i, inout DifferentialPair> n, vector.Differential dOut) { return __bwd_diff(__reflect_impl)(i, n, dOut); } // Vector refract __generic [BackwardDifferentiable] vector __refract_impl(vector i, vector n, T eta) { let k = T(1.0) - eta * eta * (T(1.0) - dot(n, i) * dot(n, i)); return (k < T(0.0)) ? vector(T(0.0)) : eta * i - (eta * dot(n, i) + sqrt(max(T(0.0),k))) * n; } __generic [BackwardDifferentiable] [ForwardDerivativeOf(refract)] [ForceInline] DifferentialPair> __d_refract(DifferentialPair> i, DifferentialPair> n, DifferentialPair eta) { return __fwd_diff(__refract_impl)(i, n, eta); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(refract)] [ForceInline] void __d_refract(inout DifferentialPair> i, inout DifferentialPair> n, inout DifferentialPair eta, vector.Differential dOut) { return __bwd_diff(__refract_impl)(i, n, eta, dOut); } // Sine and cosine __generic [BackwardDifferentiable] [PrimalSubstituteOf(sincos)] [PreferRecompute] void __sincos_impl(T x, out T s, out T c) { s = sin(x); c = cos(x); } __generic [BackwardDifferentiable] [PreferRecompute] [PrimalSubstituteOf(sincos)] void __sincos_impl(vector x, out vector s, out vector c) { s = sin(x); c = cos(x); } __generic [BackwardDifferentiable] [PrimalSubstituteOf(sincos)] [PreferRecompute] void __sincos_impl(matrix x, out matrix s, out matrix c) { s = sin(x); c = cos(x); } // dst (obsolete) __generic [BackwardDifferentiable] [PrimalSubstituteOf(dst)] vector __dst_impl(vector src0, vector src1) { vector dest; dest.x = T(1.0); dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; ; return dest; } // Legacy lighting function (obsolete) [__readNone] [BackwardDifferentiable] [PrimalSubstituteOf(lit)] float4 __lit_impl(float n_dot_l, float n_dot_h, float m) { let ambient = 1.0f; let diffuse = max(n_dot_l, 0.0f); let specular = ((n_dot_l < 0.0f || n_dot_h < 0.0) ? 0.0 : pow(n_dot_h, m)); return float4(ambient, diffuse, specular, 1.0f); } // Matrix determinant __generic [BackwardDifferentiable] [__readNone] T __determinant_impl(matrix m) { T result = T(0); switch (N) { case 1: result = m[0][0]; break; case 2: result = m[0][0] * m[1][1] - m[0][1] * m[1][0]; break; case 3: result = m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) - m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); break; case 4: T s00 = m[2][2] * m[3][3] - m[3][2] * m[2][3]; T s01 = m[2][1] * m[3][3] - m[3][1] * m[2][3]; T s02 = m[2][1] * m[3][2] - m[3][1] * m[2][2]; T s03 = m[2][0] * m[3][3] - m[3][0] * m[2][3]; T s04 = m[2][0] * m[3][2] - m[3][0] * m[2][2]; T s05 = m[2][0] * m[3][1] - m[3][0] * m[2][1]; result = m[0][0] * (m[1][1] * s00 - m[1][2] * s01 + m[1][3] * s02) - m[0][1] * (m[1][0] * s00 - m[1][2] * s03 + m[1][3] * s04) + m[0][2] * (m[1][0] * s01 - m[1][1] * s03 + m[1][3] * s05) - m[0][3] * (m[1][0] * s02 - m[1][1] * s04 + m[1][2] * s05); break; } return result; } __generic [BackwardDifferentiable] [ForwardDerivativeOf(determinant)] [ForceInline] DifferentialPair __determinant_impl(DifferentialPair> m) { return __fwd_diff(__determinant_impl)(m); } __generic [BackwardDifferentiable] [BackwardDerivativeOf(determinant)] [ForceInline] void __d_determinant(inout DifferentialPair> m, T.Differential dOut) { __bwd_diff(__determinant_impl)(m, dOut); }