Revision f819d7309b317e883bd85dd3369fd46f6709c29f authored by Andrew Adams on 14 February 2021, 17:37:18 UTC, committed by GitHub on 14 February 2021, 17:37:18 UTC
Tweak vector deinterleaving to recognize a new pattern that comes up when you want to use udot/sdot in sparse convolution
1 parent dfbe346
File | Mode | Size |
---|---|---|
CMakeLists.txt | -rw-r--r-- | 1.0 KB |
async_gpu.cpp | -rw-r--r-- | 2.6 KB |
block_transpose.cpp | -rw-r--r-- | 4.5 KB |
boundary_conditions.cpp | -rw-r--r-- | 4.0 KB |
clamped_vector_load.cpp | -rw-r--r-- | 3.9 KB |
const_division.cpp | -rw-r--r-- | 4.4 KB |
fan_in.cpp | -rw-r--r-- | 1.8 KB |
fast_inverse.cpp | -rw-r--r-- | 1.5 KB |
fast_pow.cpp | -rw-r--r-- | 3.1 KB |
fast_sine_cosine.cpp | -rw-r--r-- | 1.6 KB |
gpu_half_throughput.cpp | -rw-r--r-- | 2.4 KB |
inner_loop_parallel.cpp | -rw-r--r-- | 1.5 KB |
jit_stress.cpp | -rw-r--r-- | 794 bytes |
lots_of_inputs.cpp | -rw-r--r-- | 1.5 KB |
lots_of_small_allocations.cpp | -rw-r--r-- | 2.4 KB |
matrix_multiplication.cpp | -rw-r--r-- | 3.2 KB |
memcpy.cpp | -rw-r--r-- | 1.4 KB |
memory_profiler.cpp | -rw-r--r-- | 9.8 KB |
nested_vectorization_gemm.cpp | -rw-r--r-- | 13.1 KB |
packed_planar_fusion.cpp | -rw-r--r-- | 3.0 KB |
parallel_performance.cpp | -rw-r--r-- | 1.5 KB |
profiler.cpp | -rw-r--r-- | 1.9 KB |
realize_overhead.cpp | -rw-r--r-- | 3.9 KB |
rfactor.cpp | -rw-r--r-- | 10.9 KB |
rgb_interleaved.cpp | -rw-r--r-- | 4.2 KB |
sort.cpp | -rw-r--r-- | 6.6 KB |
thread_safe_jit.cpp | -rw-r--r-- | 4.1 KB |
vectorize.cpp | -rw-r--r-- | 2.9 KB |
wrap.cpp | -rw-r--r-- | 5.7 KB |
Computing file changes ...