/* * Copyright (c) 2018 Side Effects Software Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * COMMENTS: * Functions and structures for computing solid angles. */ #include "UT_SolidAngle.h" #include "UT_BVHImpl.h" #include "UT_SmallArray.h" #include "UT_FixedVector.h" #include "VM/VM_SIMD.h" #include "SYS/SYS_Types.h" #include #include #define SOLID_ANGLE_TIME_PRECOMPUTE 0 #if SOLID_ANGLE_TIME_PRECOMPUTE #include #endif #define SOLID_ANGLE_DEBUG 0 #if SOLID_ANGLE_DEBUG #include #endif #define TAYLOR_SERIES_ORDER 2 using namespace HDK_Sample; template struct UT_SolidAngle::BoxData { void clear() { // Set everything to zero memset(this,0,sizeof(*this)); } using Type = typename std::conditional::value, v4uf, UT_FixedVector>::type; using SType = typename std::conditional::value, v4uf, UT_FixedVector>::type; /// An upper bound on the squared distance from myAverageP to the farthest point in the box. SType myMaxPDist2; /// Centre of mass of the mesh surface in this box UT_FixedVector myAverageP; /// Unnormalized, area-weighted normal of the mesh in this box UT_FixedVector myN; #if TAYLOR_SERIES_ORDER >= 1 /// Values for Omega_1 /// @{ UT_FixedVector myNijDiag; // Nxx, Nyy, Nzz Type myNxy_Nyx; // Nxy+Nyx Type myNyz_Nzy; // Nyz+Nzy Type myNzx_Nxz; // Nzx+Nxz /// @} #endif #if TAYLOR_SERIES_ORDER >= 2 /// Values for Omega_2 /// @{ UT_FixedVector myNijkDiag; // Nxxx, Nyyy, Nzzz Type mySumPermuteNxyz; // (Nxyz+Nxzy+Nyzx+Nyxz+Nzxy+Nzyx) = 2*(Nxyz+Nyzx+Nzxy) Type my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx Type my2Nxxz_Nzxx; // Nxxz+Nxzx+Nzxx = 2Nxxz+Nzxx Type my2Nyyz_Nzyy; // Nyyz+Nyzy+Nzyy = 2Nyyz+Nzyy Type my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy Type my2Nzzx_Nxzz; // Nzzx+Nzxz+Nxzz = 2Nzzx+Nxzz Type my2Nzzy_Nyzz; // Nzzy+Nzyz+Nyzz = 2Nzzy+Nyzz /// @} #endif }; template UT_SolidAngle::UT_SolidAngle() : myTree() , myNBoxes(0) , myOrder(2) , myData(nullptr) , myNTriangles(0) , myTrianglePoints(nullptr) , myNPoints(0) , myPositions(nullptr) {} template UT_SolidAngle::~UT_SolidAngle() { // Default destruction works, but this needs to be outlined // to avoid having to include UT_BVHImpl.h in the header, // (for the UT_UniquePtr destructor.) } template static SYS_FORCE_INLINE T dot( const UT_FixedVector &a, const UT_FixedVector &b) { return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; } template void UT_SolidAngle::init( const int ntriangles, const int *const triangle_points, const int npoints, const UT_Vector3T *const positions, const int order) { #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat(""); UTdebugFormat("Building BVH for {} ntriangles on {} points:", ntriangles, npoints); #endif myOrder = order; myNTriangles = ntriangles; myTrianglePoints = triangle_points; myNPoints = npoints; myPositions = positions; #if SOLID_ANGLE_TIME_PRECOMPUTE UT_StopWatch timer; timer.start(); #endif UT_SmallArray> triangle_boxes; triangle_boxes.setSizeNoInit(ntriangles); if (ntriangles < 16*1024) { const int *cur_triangle_points = triangle_points; for (int i = 0; i < ntriangles; ++i, cur_triangle_points += 3) { UT::Box &box = triangle_boxes[i]; box.initBounds(positions[cur_triangle_points[0]]); box.enlargeBounds(positions[cur_triangle_points[1]]); box.enlargeBounds(positions[cur_triangle_points[2]]); } } else { UTparallelFor(UT_BlockedRange(0,ntriangles), [triangle_points,&triangle_boxes,positions](const UT_BlockedRange &r) { const int *cur_triangle_points = triangle_points + exint(r.begin())*3; for (int i = r.begin(), end = r.end(); i < end; ++i, cur_triangle_points += 3) { UT::Box &box = triangle_boxes[i]; box.initBounds(positions[cur_triangle_points[0]]); box.enlargeBounds(positions[cur_triangle_points[1]]); box.enlargeBounds(positions[cur_triangle_points[2]]); } }); } #if SOLID_ANGLE_TIME_PRECOMPUTE double time = timer.stop(); UTdebugFormat("{} s to create bounding boxes.", time); timer.start(); #endif myTree.template init(triangle_boxes.array(), ntriangles); #if SOLID_ANGLE_TIME_PRECOMPUTE time = timer.stop(); UTdebugFormat("{} s to initialize UT_BVH structure. {} nodes", time, myTree.getNumNodes()); #endif //myTree.debugDump(); const int nnodes = myTree.getNumNodes(); myNBoxes = nnodes; BoxData *box_data = new BoxData[nnodes]; myData.reset(box_data); // Some data are only needed during initialization. struct LocalData { // Bounding box UT::Box myBox; // P and N are needed from each child for computing Nij. UT_Vector3T myAverageP; UT_Vector3T myAreaP; UT_Vector3T myN; // Unsigned area is needed for computing the average position. T myArea; #if TAYLOR_SERIES_ORDER >= 1 // These are needed for computing Nijk. UT_Vector3T myNijDiag; T myNxy; T myNyx; T myNyz; T myNzy; T myNzx; T myNxz; #endif #if TAYLOR_SERIES_ORDER >= 2 UT_Vector3T myNijkDiag; // Nxxx, Nyyy, Nzzz T mySumPermuteNxyz; // (Nxyz+Nxzy+Nyzx+Nyxz+Nzxy+Nzyx) = 2*(Nxyz+Nyzx+Nzxy) T my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx T my2Nxxz_Nzxx; // Nxxz+Nxzx+Nzxx = 2Nxxz+Nzxx T my2Nyyz_Nzyy; // Nyyz+Nyzy+Nzyy = 2Nyyz+Nzyy T my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy T my2Nzzx_Nxzz; // Nzzx+Nzxz+Nxzz = 2Nzzx+Nxzz T my2Nzzy_Nyzz; // Nzzy+Nzyz+Nyzz = 2Nzzy+Nyzz #endif }; struct PrecomputeFunctors { BoxData *const myBoxData; const UT::Box *const myTriangleBoxes; const int *const myTrianglePoints; const UT_Vector3T *const myPositions; const int myOrder; PrecomputeFunctors( BoxData *box_data, const UT::Box *triangle_boxes, const int *triangle_points, const UT_Vector3T *positions, const int order) : myBoxData(box_data) , myTriangleBoxes(triangle_boxes) , myTrianglePoints(triangle_points) , myPositions(positions) , myOrder(order) {} constexpr SYS_FORCE_INLINE bool pre(const int nodei, LocalData *data_for_parent) const { return true; } void item(const int itemi, const int parent_nodei, LocalData &data_for_parent) const { const UT_Vector3T *const positions = myPositions; const int *const cur_triangle_points = myTrianglePoints + 3*itemi; const UT_Vector3T a = positions[cur_triangle_points[0]]; const UT_Vector3T b = positions[cur_triangle_points[1]]; const UT_Vector3T c = positions[cur_triangle_points[2]]; const UT_Vector3T ab = b-a; const UT_Vector3T ac = c-a; const UT::Box &triangle_box = myTriangleBoxes[itemi]; data_for_parent.myBox.initBounds(triangle_box.getMin(), triangle_box.getMax()); // Area-weighted normal (unnormalized) const UT_Vector3T N = T(0.5)*cross(ab,ac); const T area2 = N.length2(); const T area = SYSsqrt(area2); const UT_Vector3T P = (a+b+c)/3; data_for_parent.myAverageP = P; data_for_parent.myAreaP = P*area; data_for_parent.myN = N; #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat("Triangle {}: P = {}; N = {}; area = {}", itemi, P, N, area); UTdebugFormat(" box = {}", data_for_parent.myBox); #endif data_for_parent.myArea = area; #if TAYLOR_SERIES_ORDER >= 1 const int order = myOrder; if (order < 1) return; // NOTE: Due to P being at the centroid, triangles have Nij = 0 // contributions to Nij. data_for_parent.myNijDiag = T(0); data_for_parent.myNxy = 0; data_for_parent.myNyx = 0; data_for_parent.myNyz = 0; data_for_parent.myNzy = 0; data_for_parent.myNzx = 0; data_for_parent.myNxz = 0; #endif #if TAYLOR_SERIES_ORDER >= 2 if (order < 2) return; // If it's zero-length, the results are zero, so we can skip. if (area == 0) { data_for_parent.myNijkDiag = T(0); data_for_parent.mySumPermuteNxyz = 0; data_for_parent.my2Nxxy_Nyxx = 0; data_for_parent.my2Nxxz_Nzxx = 0; data_for_parent.my2Nyyz_Nzyy = 0; data_for_parent.my2Nyyx_Nxyy = 0; data_for_parent.my2Nzzx_Nxzz = 0; data_for_parent.my2Nzzy_Nyzz = 0; return; } // We need to use the NORMALIZED normal to multiply the integrals by. UT_Vector3T n = N/area; // Figure out the order of a, b, and c in x, y, and z // for use in computing the integrals for Nijk. UT_Vector3T values[3] = {a, b, c}; int order_x[3] = {0,1,2}; if (a[0] > b[0]) std::swap(order_x[0],order_x[1]); if (values[order_x[0]][0] > c[0]) std::swap(order_x[0],order_x[2]); if (values[order_x[1]][0] > values[order_x[2]][0]) std::swap(order_x[1],order_x[2]); T dx = values[order_x[2]][0] - values[order_x[0]][0]; int order_y[3] = {0,1,2}; if (a[1] > b[1]) std::swap(order_y[0],order_y[1]); if (values[order_y[0]][1] > c[1]) std::swap(order_y[0],order_y[2]); if (values[order_y[1]][1] > values[order_y[2]][1]) std::swap(order_y[1],order_y[2]); T dy = values[order_y[2]][1] - values[order_y[0]][1]; int order_z[3] = {0,1,2}; if (a[2] > b[2]) std::swap(order_z[0],order_z[1]); if (values[order_z[0]][2] > c[2]) std::swap(order_z[0],order_z[2]); if (values[order_z[1]][2] > values[order_z[2]][2]) std::swap(order_z[1],order_z[2]); T dz = values[order_z[2]][2] - values[order_z[0]][2]; auto &&compute_integrals = []( const UT_Vector3T &a, const UT_Vector3T &b, const UT_Vector3T &c, const UT_Vector3T &P, T *integral_ii, T *integral_ij, T *integral_ik, const int i) { #if SOLID_ANGLE_DEBUG UTdebugFormat(" Splitting on {}; a = {}; b = {}; c = {}", char('x'+i), a, b, c); #endif // NOTE: a, b, and c must be in order of the i axis. // We're splitting the triangle at the middle i coordinate. const UT_Vector3T oab = b - a; const UT_Vector3T oac = c - a; const UT_Vector3T ocb = b - c; UT_ASSERT_MSG_P(oac[i] > 0, "This should have been checked by the caller."); const T t = oab[i]/oac[i]; UT_ASSERT_MSG_P(t >= 0 && t <= 1, "Either sorting must have gone wrong, or there are input NaNs."); const int j = (i==2) ? 0 : (i+1); const int k = (j==2) ? 0 : (j+1); const T jdiff = t*oac[j] - oab[j]; const T kdiff = t*oac[k] - oab[k]; UT_Vector3T cross_a; cross_a[0] = (jdiff*oab[k] - kdiff*oab[j]); cross_a[1] = kdiff*oab[i]; cross_a[2] = jdiff*oab[i]; UT_Vector3T cross_c; cross_c[0] = (jdiff*ocb[k] - kdiff*ocb[j]); cross_c[1] = kdiff*ocb[i]; cross_c[2] = jdiff*ocb[i]; const T area_scale_a = cross_a.length(); const T area_scale_c = cross_c.length(); const T Pai = a[i] - P[i]; const T Pci = c[i] - P[i]; // Integral over the area of the triangle of (pi^2)dA, // by splitting the triangle into two at b, the a side // and the c side. const T int_ii_a = area_scale_a*(T(0.5)*Pai*Pai + T(2.0/3.0)*Pai*oab[i] + T(0.25)*oab[i]*oab[i]); const T int_ii_c = area_scale_c*(T(0.5)*Pci*Pci + T(2.0/3.0)*Pci*ocb[i] + T(0.25)*ocb[i]*ocb[i]); *integral_ii = int_ii_a + int_ii_c; #if SOLID_ANGLE_DEBUG UTdebugFormat(" integral_{}{}_a = {}; integral_{}{}_c = {}", char('x'+i), char('x'+i), int_ii_a, char('x'+i), char('x'+i), int_ii_c); #endif int jk = j; T *integral = integral_ij; T diff = jdiff; while (true) // This only does 2 iterations, one for j and one for k { if (integral) { T obmidj = b[jk] + T(0.5)*diff; T oabmidj = obmidj - a[jk]; T ocbmidj = obmidj - c[jk]; T Paj = a[jk] - P[jk]; T Pcj = c[jk] - P[jk]; // Integral over the area of the triangle of (pi*pj)dA const T int_ij_a = area_scale_a*(T(0.5)*Pai*Paj + T(1.0/3.0)*Pai*oabmidj + T(1.0/3.0)*Paj*oab[i] + T(0.25)*oab[i]*oabmidj); const T int_ij_c = area_scale_c*(T(0.5)*Pci*Pcj + T(1.0/3.0)*Pci*ocbmidj + T(1.0/3.0)*Pcj*ocb[i] + T(0.25)*ocb[i]*ocbmidj); *integral = int_ij_a + int_ij_c; #if SOLID_ANGLE_DEBUG UTdebugFormat(" integral_{}{}_a = {}; integral_{}{}_c = {}", char('x'+i), char('x'+jk), int_ij_a, char('x'+i), char('x'+jk), int_ij_c); #endif } if (jk == k) break; jk = k; integral = integral_ik; diff = kdiff; } }; T integral_xx = 0; T integral_xy = 0; T integral_yy = 0; T integral_yz = 0; T integral_zz = 0; T integral_zx = 0; // Note that if the span of any axis is zero, the integral must be zero, // since there's a factor of (p_i-P_i), i.e. value minus average, // and every value must be equal to the average, giving zero. if (dx > 0) { compute_integrals( values[order_x[0]], values[order_x[1]], values[order_x[2]], P, &integral_xx, ((dx >= dy && dy > 0) ? &integral_xy : nullptr), ((dx >= dz && dz > 0) ? &integral_zx : nullptr), 0); } if (dy > 0) { compute_integrals( values[order_y[0]], values[order_y[1]], values[order_y[2]], P, &integral_yy, ((dy >= dz && dz > 0) ? &integral_yz : nullptr), ((dx < dy && dx > 0) ? &integral_xy : nullptr), 1); } if (dz > 0) { compute_integrals( values[order_z[0]], values[order_z[1]], values[order_z[2]], P, &integral_zz, ((dx < dz && dx > 0) ? &integral_zx : nullptr), ((dy < dz && dy > 0) ? &integral_yz : nullptr), 2); } UT_Vector3T Niii; Niii[0] = integral_xx; Niii[1] = integral_yy; Niii[2] = integral_zz; Niii *= n; data_for_parent.myNijkDiag = Niii; data_for_parent.mySumPermuteNxyz = 2*(n[0]*integral_yz + n[1]*integral_zx + n[2]*integral_xy); T Nxxy = n[0]*integral_xy; T Nxxz = n[0]*integral_zx; T Nyyz = n[1]*integral_yz; T Nyyx = n[1]*integral_xy; T Nzzx = n[2]*integral_zx; T Nzzy = n[2]*integral_yz; data_for_parent.my2Nxxy_Nyxx = 2*Nxxy + n[1]*integral_xx; data_for_parent.my2Nxxz_Nzxx = 2*Nxxz + n[2]*integral_xx; data_for_parent.my2Nyyz_Nzyy = 2*Nyyz + n[2]*integral_yy; data_for_parent.my2Nyyx_Nxyy = 2*Nyyx + n[0]*integral_yy; data_for_parent.my2Nzzx_Nxzz = 2*Nzzx + n[0]*integral_zz; data_for_parent.my2Nzzy_Nyzz = 2*Nzzy + n[1]*integral_zz; #if SOLID_ANGLE_DEBUG UTdebugFormat(" integral_xx = {}; yy = {}; zz = {}", integral_xx, integral_yy, integral_zz); UTdebugFormat(" integral_xy = {}; yz = {}; zx = {}", integral_xy, integral_yz, integral_zx); #endif #endif } void post(const int nodei, const int parent_nodei, LocalData *data_for_parent, const int nchildren, const LocalData *child_data_array) const { // NOTE: Although in the general case, data_for_parent may be null for the root call, // this functor assumes that it's non-null, so the call below must pass a non-null pointer. BoxData ¤t_box_data = myBoxData[nodei]; UT_Vector3T N = child_data_array[0].myN; ((T*)¤t_box_data.myN[0])[0] = N[0]; ((T*)¤t_box_data.myN[1])[0] = N[1]; ((T*)¤t_box_data.myN[2])[0] = N[2]; UT_Vector3T areaP = child_data_array[0].myAreaP; T area = child_data_array[0].myArea; UT_Vector3T local_P = child_data_array[0].myAverageP; ((T*)¤t_box_data.myAverageP[0])[0] = local_P[0]; ((T*)¤t_box_data.myAverageP[1])[0] = local_P[1]; ((T*)¤t_box_data.myAverageP[2])[0] = local_P[2]; for (int i = 1; i < nchildren; ++i) { const UT_Vector3T local_N = child_data_array[i].myN; N += local_N; ((T*)¤t_box_data.myN[0])[i] = local_N[0]; ((T*)¤t_box_data.myN[1])[i] = local_N[1]; ((T*)¤t_box_data.myN[2])[i] = local_N[2]; areaP += child_data_array[i].myAreaP; area += child_data_array[i].myArea; const UT_Vector3T local_P = child_data_array[i].myAverageP; ((T*)¤t_box_data.myAverageP[0])[i] = local_P[0]; ((T*)¤t_box_data.myAverageP[1])[i] = local_P[1]; ((T*)¤t_box_data.myAverageP[2])[i] = local_P[2]; } for (int i = nchildren; i < BVH_N; ++i) { // Set to zero, just to avoid false positives for uses of uninitialized memory. ((T*)¤t_box_data.myN[0])[i] = 0; ((T*)¤t_box_data.myN[1])[i] = 0; ((T*)¤t_box_data.myN[2])[i] = 0; ((T*)¤t_box_data.myAverageP[0])[i] = 0; ((T*)¤t_box_data.myAverageP[1])[i] = 0; ((T*)¤t_box_data.myAverageP[2])[i] = 0; } data_for_parent->myN = N; data_for_parent->myAreaP = areaP; data_for_parent->myArea = area; UT::Box box(child_data_array[0].myBox); for (int i = 1; i < nchildren; ++i) box.enlargeBounds(child_data_array[i].myBox); // Normalize P UT_Vector3T averageP; if (area > 0) averageP = areaP/area; else averageP = T(0.5)*(box.getMin() + box.getMax()); data_for_parent->myAverageP = averageP; data_for_parent->myBox = box; for (int i = 0; i < nchildren; ++i) { const UT::Box &local_box(child_data_array[i].myBox); const UT_Vector3T &local_P = child_data_array[i].myAverageP; const UT_Vector3T maxPDiff = SYSmax(local_P-UT_Vector3T(local_box.getMin()), UT_Vector3T(local_box.getMax())-local_P); ((T*)¤t_box_data.myMaxPDist2)[i] = maxPDiff.length2(); } for (int i = nchildren; i < BVH_N; ++i) { // This child is non-existent. If we set myMaxPDist2 to infinity, it will never // use the approximation, and the traverseVector function can check for EMPTY. ((T*)¤t_box_data.myMaxPDist2)[i] = std::numeric_limits::infinity(); } #if TAYLOR_SERIES_ORDER >= 1 const int order = myOrder; if (order >= 1) { // We now have the current box's P, so we can adjust Nij and Nijk data_for_parent->myNijDiag = child_data_array[0].myNijDiag; data_for_parent->myNxy = 0; data_for_parent->myNyx = 0; data_for_parent->myNyz = 0; data_for_parent->myNzy = 0; data_for_parent->myNzx = 0; data_for_parent->myNxz = 0; #if TAYLOR_SERIES_ORDER >= 2 data_for_parent->myNijkDiag = child_data_array[0].myNijkDiag; data_for_parent->mySumPermuteNxyz = child_data_array[0].mySumPermuteNxyz; data_for_parent->my2Nxxy_Nyxx = child_data_array[0].my2Nxxy_Nyxx; data_for_parent->my2Nxxz_Nzxx = child_data_array[0].my2Nxxz_Nzxx; data_for_parent->my2Nyyz_Nzyy = child_data_array[0].my2Nyyz_Nzyy; data_for_parent->my2Nyyx_Nxyy = child_data_array[0].my2Nyyx_Nxyy; data_for_parent->my2Nzzx_Nxzz = child_data_array[0].my2Nzzx_Nxzz; data_for_parent->my2Nzzy_Nyzz = child_data_array[0].my2Nzzy_Nyzz; #endif for (int i = 1; i < nchildren; ++i) { data_for_parent->myNijDiag += child_data_array[i].myNijDiag; #if TAYLOR_SERIES_ORDER >= 2 data_for_parent->myNijkDiag += child_data_array[i].myNijkDiag; data_for_parent->mySumPermuteNxyz += child_data_array[i].mySumPermuteNxyz; data_for_parent->my2Nxxy_Nyxx += child_data_array[i].my2Nxxy_Nyxx; data_for_parent->my2Nxxz_Nzxx += child_data_array[i].my2Nxxz_Nzxx; data_for_parent->my2Nyyz_Nzyy += child_data_array[i].my2Nyyz_Nzyy; data_for_parent->my2Nyyx_Nxyy += child_data_array[i].my2Nyyx_Nxyy; data_for_parent->my2Nzzx_Nxzz += child_data_array[i].my2Nzzx_Nxzz; data_for_parent->my2Nzzy_Nyzz += child_data_array[i].my2Nzzy_Nyzz; #endif } for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijDiag[j])[0] = child_data_array[0].myNijDiag[j]; ((T*)¤t_box_data.myNxy_Nyx)[0] = child_data_array[0].myNxy + child_data_array[0].myNyx; ((T*)¤t_box_data.myNyz_Nzy)[0] = child_data_array[0].myNyz + child_data_array[0].myNzy; ((T*)¤t_box_data.myNzx_Nxz)[0] = child_data_array[0].myNzx + child_data_array[0].myNxz; for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijkDiag[j])[0] = child_data_array[0].myNijkDiag[j]; ((T*)¤t_box_data.mySumPermuteNxyz)[0] = child_data_array[0].mySumPermuteNxyz; ((T*)¤t_box_data.my2Nxxy_Nyxx)[0] = child_data_array[0].my2Nxxy_Nyxx; ((T*)¤t_box_data.my2Nxxz_Nzxx)[0] = child_data_array[0].my2Nxxz_Nzxx; ((T*)¤t_box_data.my2Nyyz_Nzyy)[0] = child_data_array[0].my2Nyyz_Nzyy; ((T*)¤t_box_data.my2Nyyx_Nxyy)[0] = child_data_array[0].my2Nyyx_Nxyy; ((T*)¤t_box_data.my2Nzzx_Nxzz)[0] = child_data_array[0].my2Nzzx_Nxzz; ((T*)¤t_box_data.my2Nzzy_Nyzz)[0] = child_data_array[0].my2Nzzy_Nyzz; for (int i = 1; i < nchildren; ++i) { for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijDiag[j])[i] = child_data_array[i].myNijDiag[j]; ((T*)¤t_box_data.myNxy_Nyx)[i] = child_data_array[i].myNxy + child_data_array[i].myNyx; ((T*)¤t_box_data.myNyz_Nzy)[i] = child_data_array[i].myNyz + child_data_array[i].myNzy; ((T*)¤t_box_data.myNzx_Nxz)[i] = child_data_array[i].myNzx + child_data_array[i].myNxz; for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijkDiag[j])[i] = child_data_array[i].myNijkDiag[j]; ((T*)¤t_box_data.mySumPermuteNxyz)[i] = child_data_array[i].mySumPermuteNxyz; ((T*)¤t_box_data.my2Nxxy_Nyxx)[i] = child_data_array[i].my2Nxxy_Nyxx; ((T*)¤t_box_data.my2Nxxz_Nzxx)[i] = child_data_array[i].my2Nxxz_Nzxx; ((T*)¤t_box_data.my2Nyyz_Nzyy)[i] = child_data_array[i].my2Nyyz_Nzyy; ((T*)¤t_box_data.my2Nyyx_Nxyy)[i] = child_data_array[i].my2Nyyx_Nxyy; ((T*)¤t_box_data.my2Nzzx_Nxzz)[i] = child_data_array[i].my2Nzzx_Nxzz; ((T*)¤t_box_data.my2Nzzy_Nyzz)[i] = child_data_array[i].my2Nzzy_Nyzz; } for (int i = nchildren; i < BVH_N; ++i) { // Set to zero, just to avoid false positives for uses of uninitialized memory. for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijDiag[j])[i] = 0; ((T*)¤t_box_data.myNxy_Nyx)[i] = 0; ((T*)¤t_box_data.myNyz_Nzy)[i] = 0; ((T*)¤t_box_data.myNzx_Nxz)[i] = 0; for (int j = 0; j < 3; ++j) ((T*)¤t_box_data.myNijkDiag[j])[i] = 0; ((T*)¤t_box_data.mySumPermuteNxyz)[i] = 0; ((T*)¤t_box_data.my2Nxxy_Nyxx)[i] = 0; ((T*)¤t_box_data.my2Nxxz_Nzxx)[i] = 0; ((T*)¤t_box_data.my2Nyyz_Nzyy)[i] = 0; ((T*)¤t_box_data.my2Nyyx_Nxyy)[i] = 0; ((T*)¤t_box_data.my2Nzzx_Nxzz)[i] = 0; ((T*)¤t_box_data.my2Nzzy_Nyzz)[i] = 0; } for (int i = 0; i < nchildren; ++i) { const LocalData &child_data = child_data_array[i]; UT_Vector3T displacement = child_data.myAverageP - UT_Vector3T(data_for_parent->myAverageP); UT_Vector3T N = child_data.myN; // Adjust Nij for the change in centre P data_for_parent->myNijDiag += N*displacement; T Nxy = child_data.myNxy + N[0]*displacement[1]; T Nyx = child_data.myNyx + N[1]*displacement[0]; T Nyz = child_data.myNyz + N[1]*displacement[2]; T Nzy = child_data.myNzy + N[2]*displacement[1]; T Nzx = child_data.myNzx + N[2]*displacement[0]; T Nxz = child_data.myNxz + N[0]*displacement[2]; data_for_parent->myNxy += Nxy; data_for_parent->myNyx += Nyx; data_for_parent->myNyz += Nyz; data_for_parent->myNzy += Nzy; data_for_parent->myNzx += Nzx; data_for_parent->myNxz += Nxz; #if TAYLOR_SERIES_ORDER >= 2 if (order >= 2) { // Adjust Nijk for the change in centre P data_for_parent->myNijkDiag += T(2)*displacement*child_data.myNijDiag + displacement*displacement*child_data.myN; data_for_parent->mySumPermuteNxyz += (displacement[0]*(Nyz+Nzy) + displacement[1]*(Nzx+Nxz) + displacement[2]*(Nxy+Nyx)); data_for_parent->my2Nxxy_Nyxx += 2*(displacement[1]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxy + N[0]*displacement[0]*displacement[1]) + 2*child_data.myNyx*displacement[0] + N[1]*displacement[0]*displacement[0]; data_for_parent->my2Nxxz_Nzxx += 2*(displacement[2]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxz + N[0]*displacement[0]*displacement[2]) + 2*child_data.myNzx*displacement[0] + N[2]*displacement[0]*displacement[0]; data_for_parent->my2Nyyz_Nzyy += 2*(displacement[2]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyz + N[1]*displacement[1]*displacement[2]) + 2*child_data.myNzy*displacement[1] + N[2]*displacement[1]*displacement[1]; data_for_parent->my2Nyyx_Nxyy += 2*(displacement[0]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyx + N[1]*displacement[1]*displacement[0]) + 2*child_data.myNxy*displacement[1] + N[0]*displacement[1]*displacement[1]; data_for_parent->my2Nzzx_Nxzz += 2*(displacement[0]*child_data.myNijDiag[2] + displacement[2]*child_data.myNzx + N[2]*displacement[2]*displacement[0]) + 2*child_data.myNxz*displacement[2] + N[0]*displacement[2]*displacement[2]; data_for_parent->my2Nzzy_Nyzz += 2*(displacement[1]*child_data.myNijDiag[2] + displacement[2]*child_data.myNzy + N[2]*displacement[2]*displacement[1]) + 2*child_data.myNyz*displacement[2] + N[1]*displacement[2]*displacement[2]; } #endif } } #endif #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat("Node {}: nchildren = {}; maxP = {}", nodei, nchildren, SYSsqrt(current_box_data.myMaxPDist2)); UTdebugFormat(" P = {}; N = {}", current_box_data.myAverageP, current_box_data.myN); #if TAYLOR_SERIES_ORDER >= 1 UTdebugFormat(" Nii = {}", current_box_data.myNijDiag); UTdebugFormat(" Nxy+Nyx = {}; Nyz+Nzy = {}; Nyz+Nzy = {}", current_box_data.myNxy_Nyx, current_box_data.myNyz_Nzy, current_box_data.myNzx_Nxz); #if TAYLOR_SERIES_ORDER >= 2 UTdebugFormat(" Niii = {}; 2(Nxyz+Nyzx+Nzxy) = {}", current_box_data.myNijkDiag, current_box_data.mySumPermuteNxyz); UTdebugFormat(" 2Nxxy+Nyxx = {}; 2Nxxz+Nzxx = {}", current_box_data.my2Nxxy_Nyxx, current_box_data.my2Nxxz_Nzxx); UTdebugFormat(" 2Nyyz+Nzyy = {}; 2Nyyx+Nxyy = {}", current_box_data.my2Nyyz_Nzyy, current_box_data.my2Nyyx_Nxyy); UTdebugFormat(" 2Nzzx+Nxzz = {}; 2Nzzy+Nyzz = {}", current_box_data.my2Nzzx_Nxzz, current_box_data.my2Nzzy_Nyzz); #endif #endif #endif } }; #if SOLID_ANGLE_TIME_PRECOMPUTE timer.start(); #endif const PrecomputeFunctors functors(box_data, triangle_boxes.array(), triangle_points, positions, order); // NOTE: post-functor relies on non-null data_for_parent, so we have to pass one. LocalData local_data; myTree.template traverseParallel(4096, functors, &local_data); //myTree.template traverse(functors); #if SOLID_ANGLE_TIME_PRECOMPUTE time = timer.stop(); UTdebugFormat("{} s to precompute coefficients.", time); #endif } template void UT_SolidAngle::clear() { myTree.clear(); myNBoxes = 0; myOrder = 2; myData.reset(); myNTriangles = 0; myTrianglePoints = nullptr; myNPoints = 0; myPositions = nullptr; } template T UT_SolidAngle::computeSolidAngle(const UT_Vector3T &query_point, const T accuracy_scale) const { const T accuracy_scale2 = accuracy_scale*accuracy_scale; struct SolidAngleFunctors { const BoxData *const myBoxData; const UT_Vector3T myQueryPoint; const T myAccuracyScale2; const UT_Vector3T *const myPositions; const int *const myTrianglePoints; const int myOrder; SolidAngleFunctors( const BoxData *const box_data, const UT_Vector3T &query_point, const T accuracy_scale2, const int order, const UT_Vector3T *const positions, const int *const triangle_points) : myBoxData(box_data) , myQueryPoint(query_point) , myAccuracyScale2(accuracy_scale2) , myOrder(order) , myPositions(positions) , myTrianglePoints(triangle_points) {} uint pre(const int nodei, T *data_for_parent) const { const BoxData &data = myBoxData[nodei]; const typename BoxData::Type maxP2 = data.myMaxPDist2; UT_FixedVector q; q[0] = typename BoxData::Type(myQueryPoint[0]); q[1] = typename BoxData::Type(myQueryPoint[1]); q[2] = typename BoxData::Type(myQueryPoint[2]); q -= data.myAverageP; const typename BoxData::Type qlength2 = q[0]*q[0] + q[1]*q[1] + q[2]*q[2]; // If the query point is within a factor of accuracy_scale of the box radius, // it's assumed to be not a good enough approximation, so it needs to descend. // TODO: Is there a way to estimate the error? static_assert((std::is_same::value), "FIXME: Implement support for other tuple types!"); v4uu descend_mask = (qlength2 <= maxP2*myAccuracyScale2); uint descend_bitmask = _mm_movemask_ps(V4SF(descend_mask.vector)); constexpr uint allchildbits = ((uint(1)<= 1 const int order = myOrder; if (order >= 1) { const UT_FixedVector q2 = q*q; const typename BoxData::Type qlength_m3 = qlength_m2*qlength_m1; const typename BoxData::Type Omega_1 = qlength_m3*(data.myNijDiag[0] + data.myNijDiag[1] + data.myNijDiag[2] -typename BoxData::Type(3.0)*(dot(q2,data.myNijDiag) + q[0]*q[1]*data.myNxy_Nyx + q[0]*q[2]*data.myNzx_Nxz + q[1]*q[2]*data.myNyz_Nzy)); Omega_approx += Omega_1; #if TAYLOR_SERIES_ORDER >= 2 if (order >= 2) { const UT_FixedVector q3 = q2*q; const typename BoxData::Type qlength_m4 = qlength_m2*qlength_m2; typename BoxData::Type temp0[3] = { data.my2Nyyx_Nxyy+data.my2Nzzx_Nxzz, data.my2Nzzy_Nyzz+data.my2Nxxy_Nyxx, data.my2Nxxz_Nzxx+data.my2Nyyz_Nzyy }; typename BoxData::Type temp1[3] = { q[1]*data.my2Nxxy_Nyxx + q[2]*data.my2Nxxz_Nzxx, q[2]*data.my2Nyyz_Nzyy + q[0]*data.my2Nyyx_Nxyy, q[0]*data.my2Nzzx_Nxzz + q[1]*data.my2Nzzy_Nyzz }; const typename BoxData::Type Omega_2 = qlength_m4*(typename BoxData::Type(1.5)*dot(q, typename BoxData::Type(3)*data.myNijkDiag + UT_FixedVector(temp0)) -typename BoxData::Type(7.5)*(dot(q3,data.myNijkDiag) + q[0]*q[1]*q[2]*data.mySumPermuteNxyz + dot(q2, UT_FixedVector(temp1)))); Omega_approx += Omega_2; } #endif } #endif // If q is so small that we got NaNs and we just have a // small bounding box, it needs to descend. const v4uu mask = Omega_approx.isFinite() & ~descend_mask; Omega_approx = Omega_approx & mask; descend_bitmask = (~_mm_movemask_ps(V4SF(mask.vector))) & allchildbits; T sum = Omega_approx[0]; for (int i = 1; i < BVH_N; ++i) sum += Omega_approx[i]; *data_for_parent = sum; return descend_bitmask; } void item(const int itemi, const int parent_nodei, T &data_for_parent) const { const UT_Vector3T *const positions = myPositions; const int *const cur_triangle_points = myTrianglePoints + 3*itemi; const UT_Vector3T a = positions[cur_triangle_points[0]]; const UT_Vector3T b = positions[cur_triangle_points[1]]; const UT_Vector3T c = positions[cur_triangle_points[2]]; data_for_parent = UTsignedSolidAngleTri(a, b, c, myQueryPoint); } SYS_FORCE_INLINE void post(const int nodei, const int parent_nodei, T *data_for_parent, const int nchildren, const T *child_data_array, const uint descend_bits) const { T sum = (descend_bits&1) ? child_data_array[0] : 0; for (int i = 1; i < nchildren; ++i) sum += ((descend_bits>>i)&1) ? child_data_array[i] : 0; *data_for_parent += sum; } }; const SolidAngleFunctors functors(myData.get(), query_point, accuracy_scale2, myOrder, myPositions, myTrianglePoints); T sum; myTree.traverseVector(functors, &sum); return sum; } template struct UT_SubtendedAngle::BoxData { void clear() { // Set everything to zero memset(this,0,sizeof(*this)); } using Type = typename std::conditional::value, v4uf, UT_FixedVector>::type; using SType = typename std::conditional::value, v4uf, UT_FixedVector>::type; /// An upper bound on the squared distance from myAverageP to the farthest point in the box. SType myMaxPDist2; /// Centre of mass of the mesh surface in this box UT_FixedVector myAverageP; /// Unnormalized, area-weighted normal of the mesh in this box UT_FixedVector myN; /// Values for Omega_1 /// @{ UT_FixedVector myNijDiag; // Nxx, Nyy Type myNxy_Nyx; // Nxy+Nyx /// @} /// Values for Omega_2 /// @{ UT_FixedVector myNijkDiag; // Nxxx, Nyyy Type my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx Type my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy /// @} }; template UT_SubtendedAngle::UT_SubtendedAngle() : myTree() , myNBoxes(0) , myOrder(2) , myData(nullptr) , myNSegments(0) , mySegmentPoints(nullptr) , myNPoints(0) , myPositions(nullptr) {} template UT_SubtendedAngle::~UT_SubtendedAngle() { // Default destruction works, but this needs to be outlined // to avoid having to include UT_BVHImpl.h in the header, // (for the UT_UniquePtr destructor.) } template static SYS_FORCE_INLINE T dot( const UT_FixedVector &a, const UT_FixedVector &b) { return a[0]*b[0] + a[1]*b[1]; } template void UT_SubtendedAngle::init( const int nsegments, const int *const segment_points, const int npoints, const UT_Vector2T *const positions, const int order) { #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat(""); UTdebugFormat("Building BVH for {} segments on {} points:", nsegments, npoints); #endif myOrder = order; myNSegments = nsegments; mySegmentPoints = segment_points; myNPoints = npoints; myPositions = positions; #if SOLID_ANGLE_TIME_PRECOMPUTE UT_StopWatch timer; timer.start(); #endif UT_SmallArray> segment_boxes; segment_boxes.setSizeNoInit(nsegments); if (nsegments < 16*1024) { const int *cur_segment_points = segment_points; for (int i = 0; i < nsegments; ++i, cur_segment_points += 2) { UT::Box &box = segment_boxes[i]; box.initBounds(positions[cur_segment_points[0]]); box.enlargeBounds(positions[cur_segment_points[1]]); } } else { UTparallelFor(UT_BlockedRange(0,nsegments), [segment_points,&segment_boxes,positions](const UT_BlockedRange &r) { const int *cur_segment_points = segment_points + exint(r.begin())*2; for (int i = r.begin(), end = r.end(); i < end; ++i, cur_segment_points += 2) { UT::Box &box = segment_boxes[i]; box.initBounds(positions[cur_segment_points[0]]); box.enlargeBounds(positions[cur_segment_points[1]]); } }); } #if SOLID_ANGLE_TIME_PRECOMPUTE double time = timer.stop(); UTdebugFormat("{} s to create bounding boxes.", time); timer.start(); #endif myTree.template init(segment_boxes.array(), nsegments); #if SOLID_ANGLE_TIME_PRECOMPUTE time = timer.stop(); UTdebugFormat("{} s to initialize UT_BVH structure. {} nodes", time, myTree.getNumNodes()); #endif //myTree.debugDump(); const int nnodes = myTree.getNumNodes(); myNBoxes = nnodes; BoxData *box_data = new BoxData[nnodes]; myData.reset(box_data); // Some data are only needed during initialization. struct LocalData { // Bounding box UT::Box myBox; // P and N are needed from each child for computing Nij. UT_Vector2T myAverageP; UT_Vector2T myLengthP; UT_Vector2T myN; // Unsigned length is needed for computing the average position. T myLength; // These are needed for computing Nijk. UT_Vector2T myNijDiag; T myNxy; T myNyx; UT_Vector2T myNijkDiag; // Nxxx, Nyyy T my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx T my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy }; struct PrecomputeFunctors { BoxData *const myBoxData; const UT::Box *const mySegmentBoxes; const int *const mySegmentPoints; const UT_Vector2T *const myPositions; const int myOrder; PrecomputeFunctors( BoxData *box_data, const UT::Box *segment_boxes, const int *segment_points, const UT_Vector2T *positions, const int order) : myBoxData(box_data) , mySegmentBoxes(segment_boxes) , mySegmentPoints(segment_points) , myPositions(positions) , myOrder(order) {} constexpr SYS_FORCE_INLINE bool pre(const int nodei, LocalData *data_for_parent) const { return true; } void item(const int itemi, const int parent_nodei, LocalData &data_for_parent) const { const UT_Vector2T *const positions = myPositions; const int *const cur_segment_points = mySegmentPoints + 2*itemi; const UT_Vector2T a = positions[cur_segment_points[0]]; const UT_Vector2T b = positions[cur_segment_points[1]]; const UT_Vector2T ab = b-a; const UT::Box &segment_box = mySegmentBoxes[itemi]; data_for_parent.myBox = segment_box; // Length-weighted normal (unnormalized) UT_Vector2T N; N[0] = ab[1]; N[1] = -ab[0]; const T length2 = ab.length2(); const T length = SYSsqrt(length2); const UT_Vector2T P = T(0.5)*(a+b); data_for_parent.myAverageP = P; data_for_parent.myLengthP = P*length; data_for_parent.myN = N; #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat("Triangle {}: P = {}; N = {}; length = {}", itemi, P, N, length); UTdebugFormat(" box = {}", data_for_parent.myBox); #endif data_for_parent.myLength = length; const int order = myOrder; if (order < 1) return; // NOTE: Due to P being at the centroid, segments have Nij = 0 // contributions to Nij. data_for_parent.myNijDiag = T(0); data_for_parent.myNxy = 0; data_for_parent.myNyx = 0; if (order < 2) return; // If it's zero-length, the results are zero, so we can skip. if (length == 0) { data_for_parent.myNijkDiag = T(0); data_for_parent.my2Nxxy_Nyxx = 0; data_for_parent.my2Nyyx_Nxyy = 0; return; } T integral_xx = ab[0]*ab[0]/T(12); T integral_xy = ab[0]*ab[1]/T(12); T integral_yy = ab[1]*ab[1]/T(12); data_for_parent.myNijkDiag[0] = integral_xx*N[0]; data_for_parent.myNijkDiag[1] = integral_yy*N[1]; T Nxxy = N[0]*integral_xy; T Nyxx = N[1]*integral_xx; T Nyyx = N[1]*integral_xy; T Nxyy = N[0]*integral_yy; data_for_parent.my2Nxxy_Nyxx = 2*Nxxy + Nyxx; data_for_parent.my2Nyyx_Nxyy = 2*Nyyx + Nxyy; #if SOLID_ANGLE_DEBUG UTdebugFormat(" integral_xx = {}; yy = {}", integral_xx, integral_yy); UTdebugFormat(" integral_xy = {}", integral_xy); #endif } void post(const int nodei, const int parent_nodei, LocalData *data_for_parent, const int nchildren, const LocalData *child_data_array) const { // NOTE: Although in the general case, data_for_parent may be null for the root call, // this functor assumes that it's non-null, so the call below must pass a non-null pointer. BoxData ¤t_box_data = myBoxData[nodei]; UT_Vector2T N = child_data_array[0].myN; ((T*)¤t_box_data.myN[0])[0] = N[0]; ((T*)¤t_box_data.myN[1])[0] = N[1]; UT_Vector2T lengthP = child_data_array[0].myLengthP; T length = child_data_array[0].myLength; const UT_Vector2T local_P = child_data_array[0].myAverageP; ((T*)¤t_box_data.myAverageP[0])[0] = local_P[0]; ((T*)¤t_box_data.myAverageP[1])[0] = local_P[1]; for (int i = 1; i < nchildren; ++i) { const UT_Vector2T local_N = child_data_array[i].myN; N += local_N; ((T*)¤t_box_data.myN[0])[i] = local_N[0]; ((T*)¤t_box_data.myN[1])[i] = local_N[1]; lengthP += child_data_array[i].myLengthP; length += child_data_array[i].myLength; const UT_Vector2T local_P = child_data_array[i].myAverageP; ((T*)¤t_box_data.myAverageP[0])[i] = local_P[0]; ((T*)¤t_box_data.myAverageP[1])[i] = local_P[1]; } for (int i = nchildren; i < BVH_N; ++i) { // Set to zero, just to avoid false positives for uses of uninitialized memory. ((T*)¤t_box_data.myN[0])[i] = 0; ((T*)¤t_box_data.myN[1])[i] = 0; ((T*)¤t_box_data.myAverageP[0])[i] = 0; ((T*)¤t_box_data.myAverageP[1])[i] = 0; } data_for_parent->myN = N; data_for_parent->myLengthP = lengthP; data_for_parent->myLength = length; UT::Box box(child_data_array[0].myBox); for (int i = 1; i < nchildren; ++i) box.combine(child_data_array[i].myBox); // Normalize P UT_Vector2T averageP; if (length > 0) averageP = lengthP/length; else averageP = T(0.5)*(box.getMin() + box.getMax()); data_for_parent->myAverageP = averageP; data_for_parent->myBox = box; for (int i = 0; i < nchildren; ++i) { const UT::Box &local_box(child_data_array[i].myBox); const UT_Vector2T &local_P = child_data_array[i].myAverageP; const UT_Vector2T maxPDiff = SYSmax(local_P-UT_Vector2T(local_box.getMin()), UT_Vector2T(local_box.getMax())-local_P); ((T*)¤t_box_data.myMaxPDist2)[i] = maxPDiff.length2(); } for (int i = nchildren; i < BVH_N; ++i) { // This child is non-existent. If we set myMaxPDist2 to infinity, it will never // use the approximation, and the traverseVector function can check for EMPTY. ((T*)¤t_box_data.myMaxPDist2)[i] = std::numeric_limits::infinity(); } const int order = myOrder; if (order >= 1) { // We now have the current box's P, so we can adjust Nij and Nijk data_for_parent->myNijDiag = child_data_array[0].myNijDiag; data_for_parent->myNxy = 0; data_for_parent->myNyx = 0; data_for_parent->myNijkDiag = child_data_array[0].myNijkDiag; data_for_parent->my2Nxxy_Nyxx = child_data_array[0].my2Nxxy_Nyxx; data_for_parent->my2Nyyx_Nxyy = child_data_array[0].my2Nyyx_Nxyy; for (int i = 1; i < nchildren; ++i) { data_for_parent->myNijDiag += child_data_array[i].myNijDiag; data_for_parent->myNijkDiag += child_data_array[i].myNijkDiag; data_for_parent->my2Nxxy_Nyxx += child_data_array[i].my2Nxxy_Nyxx; data_for_parent->my2Nyyx_Nxyy += child_data_array[i].my2Nyyx_Nxyy; } for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijDiag[j])[0] = child_data_array[0].myNijDiag[j]; ((T*)¤t_box_data.myNxy_Nyx)[0] = child_data_array[0].myNxy + child_data_array[0].myNyx; for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijkDiag[j])[0] = child_data_array[0].myNijkDiag[j]; ((T*)¤t_box_data.my2Nxxy_Nyxx)[0] = child_data_array[0].my2Nxxy_Nyxx; ((T*)¤t_box_data.my2Nyyx_Nxyy)[0] = child_data_array[0].my2Nyyx_Nxyy; for (int i = 1; i < nchildren; ++i) { for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijDiag[j])[i] = child_data_array[i].myNijDiag[j]; ((T*)¤t_box_data.myNxy_Nyx)[i] = child_data_array[i].myNxy + child_data_array[i].myNyx; for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijkDiag[j])[i] = child_data_array[i].myNijkDiag[j]; ((T*)¤t_box_data.my2Nxxy_Nyxx)[i] = child_data_array[i].my2Nxxy_Nyxx; ((T*)¤t_box_data.my2Nyyx_Nxyy)[i] = child_data_array[i].my2Nyyx_Nxyy; } for (int i = nchildren; i < BVH_N; ++i) { // Set to zero, just to avoid false positives for uses of uninitialized memory. for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijDiag[j])[i] = 0; ((T*)¤t_box_data.myNxy_Nyx)[i] = 0; for (int j = 0; j < 2; ++j) ((T*)¤t_box_data.myNijkDiag[j])[i] = 0; ((T*)¤t_box_data.my2Nxxy_Nyxx)[i] = 0; ((T*)¤t_box_data.my2Nyyx_Nxyy)[i] = 0; } for (int i = 0; i < nchildren; ++i) { const LocalData &child_data = child_data_array[i]; UT_Vector2T displacement = child_data.myAverageP - UT_Vector2T(data_for_parent->myAverageP); UT_Vector2T N = child_data.myN; // Adjust Nij for the change in centre P data_for_parent->myNijDiag += N*displacement; T Nxy = child_data.myNxy + N[0]*displacement[1]; T Nyx = child_data.myNyx + N[1]*displacement[0]; data_for_parent->myNxy += Nxy; data_for_parent->myNyx += Nyx; if (order >= 2) { // Adjust Nijk for the change in centre P data_for_parent->myNijkDiag += T(2)*displacement*child_data.myNijDiag + displacement*displacement*child_data.myN; data_for_parent->my2Nxxy_Nyxx += 2*(displacement[1]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxy + N[0]*displacement[0]*displacement[1]) + 2*child_data.myNyx*displacement[0] + N[1]*displacement[0]*displacement[0]; data_for_parent->my2Nyyx_Nxyy += 2*(displacement[0]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyx + N[1]*displacement[1]*displacement[0]) + 2*child_data.myNxy*displacement[1] + N[0]*displacement[1]*displacement[1]; } } } #if SOLID_ANGLE_DEBUG UTdebugFormat(""); UTdebugFormat("Node {}: nchildren = {}; maxP = {}", nodei, nchildren, SYSsqrt(current_box_data.myMaxPDist2)); UTdebugFormat(" P = {}; N = {}", current_box_data.myAverageP, current_box_data.myN); UTdebugFormat(" Nii = {}", current_box_data.myNijDiag); UTdebugFormat(" Nxy+Nyx = {}", current_box_data.myNxy_Nyx); UTdebugFormat(" Niii = {}", current_box_data.myNijkDiag); UTdebugFormat(" 2Nxxy+Nyxx = {}; 2Nyyx+Nxyy = {}", current_box_data.my2Nxxy_Nyxx, current_box_data.my2Nyyx_Nxyy); #endif } }; #if SOLID_ANGLE_TIME_PRECOMPUTE timer.start(); #endif const PrecomputeFunctors functors(box_data, segment_boxes.array(), segment_points, positions, order); // NOTE: post-functor relies on non-null data_for_parent, so we have to pass one. LocalData local_data; myTree.template traverseParallel(4096, functors, &local_data); //myTree.template traverse(functors); #if SOLID_ANGLE_TIME_PRECOMPUTE time = timer.stop(); UTdebugFormat("{} s to precompute coefficients.", time); #endif } template void UT_SubtendedAngle::clear() { myTree.clear(); myNBoxes = 0; myOrder = 2; myData.reset(); myNSegments = 0; mySegmentPoints = nullptr; myNPoints = 0; myPositions = nullptr; } template T UT_SubtendedAngle::computeAngle(const UT_Vector2T &query_point, const T accuracy_scale) const { const T accuracy_scale2 = accuracy_scale*accuracy_scale; struct AngleFunctors { const BoxData *const myBoxData; const UT_Vector2T myQueryPoint; const T myAccuracyScale2; const UT_Vector2T *const myPositions; const int *const mySegmentPoints; const int myOrder; AngleFunctors( const BoxData *const box_data, const UT_Vector2T &query_point, const T accuracy_scale2, const int order, const UT_Vector2T *const positions, const int *const segment_points) : myBoxData(box_data) , myQueryPoint(query_point) , myAccuracyScale2(accuracy_scale2) , myOrder(order) , myPositions(positions) , mySegmentPoints(segment_points) {} uint pre(const int nodei, T *data_for_parent) const { const BoxData &data = myBoxData[nodei]; const typename BoxData::Type maxP2 = data.myMaxPDist2; UT_FixedVector q; q[0] = typename BoxData::Type(myQueryPoint[0]); q[1] = typename BoxData::Type(myQueryPoint[1]); q -= data.myAverageP; const typename BoxData::Type qlength2 = q[0]*q[0] + q[1]*q[1]; // If the query point is within a factor of accuracy_scale of the box radius, // it's assumed to be not a good enough approximation, so it needs to descend. // TODO: Is there a way to estimate the error? static_assert((std::is_same::value), "FIXME: Implement support for other tuple types!"); v4uu descend_mask = (qlength2 <= maxP2*myAccuracyScale2); uint descend_bitmask = _mm_movemask_ps(V4SF(descend_mask.vector)); constexpr uint allchildbits = ((uint(1)<= 1) { const UT_FixedVector q2 = q*q; const typename BoxData::Type Omega_1 = qlength_m2*(data.myNijDiag[0] + data.myNijDiag[1] -typename BoxData::Type(2.0)*(dot(q2,data.myNijDiag) + q[0]*q[1]*data.myNxy_Nyx)); Omega_approx += Omega_1; if (order >= 2) { const UT_FixedVector q3 = q2*q; const typename BoxData::Type qlength_m3 = qlength_m2*qlength_m1; typename BoxData::Type temp0[2] = { data.my2Nyyx_Nxyy, data.my2Nxxy_Nyxx }; typename BoxData::Type temp1[2] = { q[1]*data.my2Nxxy_Nyxx, q[0]*data.my2Nyyx_Nxyy }; const typename BoxData::Type Omega_2 = qlength_m3*(dot(q,typename BoxData::Type(3)*data.myNijkDiag + UT_FixedVector(temp0)) -typename BoxData::Type(4.0)*(dot(q3,data.myNijkDiag) + dot(q2, UT_FixedVector(temp1)))); Omega_approx += Omega_2; } } // If q is so small that we got NaNs and we just have a // small bounding box, it needs to descend. const v4uu mask = Omega_approx.isFinite() & ~descend_mask; Omega_approx = Omega_approx & mask; descend_bitmask = (~_mm_movemask_ps(V4SF(mask.vector))) & allchildbits; T sum = Omega_approx[0]; for (int i = 1; i < BVH_N; ++i) sum += Omega_approx[i]; *data_for_parent = sum; return descend_bitmask; } void item(const int itemi, const int parent_nodei, T &data_for_parent) const { const UT_Vector2T *const positions = myPositions; const int *const cur_segment_points = mySegmentPoints + 2*itemi; const UT_Vector2T a = positions[cur_segment_points[0]]; const UT_Vector2T b = positions[cur_segment_points[1]]; data_for_parent = UTsignedAngleSegment(a, b, myQueryPoint); } SYS_FORCE_INLINE void post(const int nodei, const int parent_nodei, T *data_for_parent, const int nchildren, const T *child_data_array, const uint descend_bits) const { T sum = (descend_bits&1) ? child_data_array[0] : 0; for (int i = 1; i < nchildren; ++i) sum += ((descend_bits>>i)&1) ? child_data_array[i] : 0; *data_for_parent += sum; } }; const AngleFunctors functors(myData.get(), query_point, accuracy_scale2, myOrder, myPositions, mySegmentPoints); T sum; myTree.traverseVector(functors, &sum); return sum; } // Instantiate our templates. namespace HDK_Sample { template class UT_SolidAngle; // FIXME: The SIMD parts will need to be handled differently in order to support fpreal64. //template class UT_SolidAngle; //template class UT_SolidAngle; template class UT_SubtendedAngle; //template class UT_SubtendedAngle; //template class UT_SubtendedAngle; }