https://github.com/ssvb/tinymembench
Tip revision: a2cf6d7e382e3aea1eb39173174d9fa28cad15f3 authored by Siarhei Siamashka on 14 February 2017, 21:58:50 UTC
Fix use of uninitialized variable 's' (reported by Coverity)
Fix use of uninitialized variable 's' (reported by Coverity)
Tip revision: a2cf6d7
x86-sse2.S
/*
* Copyright © 2011 Siarhei Siamashka <siarhei.siamashka@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#if defined(__i386__) || defined(__amd64__)
.intel_syntax noprefix
.text
#define PREFETCH_DISTANCE 256
.macro asm_function_helper function_name
.global \function_name
.func \function_name
\function_name:
#ifdef __amd64__
#ifdef _WIN64
.set DST, rcx
.set SRC, rdx
.set SIZE, r8
#else
.set DST, rdi
.set SRC, rsi
.set SIZE, rdx
#endif
#else
mov eax, [esp + 4]
mov ecx, [esp + 8]
mov edx, [esp + 12]
.set DST, eax
.set SRC, ecx
.set SIZE, edx
#endif
.endm
.macro asm_function function_name
#if defined(_WIN32) && !defined(_WIN64)
asm_function_helper _\function_name
#else
asm_function_helper \function_name
#endif
.endm
.macro push3 a, b, c
push \a
push \b
push \c
.endm
.macro pop3 a, b, c
pop \c
pop \b
pop \a
.endm
/*****************************************************************************/
asm_function aligned_block_copy_movsb
0:
#ifdef __amd64__
push3 rdi rsi rcx
push3 DST SRC SIZE
pop3 rdi rsi rcx
rep movsb
pop3 rdi rsi rcx
#else
push3 edi esi ecx
push3 DST SRC SIZE
pop3 edi esi ecx
rep movsb
pop3 edi esi ecx
#endif
ret
.endfunc
asm_function aligned_block_copy_movsd
0:
#ifdef __amd64__
push3 rdi rsi rcx
push3 DST SRC SIZE
pop3 rdi rsi rcx
sar rcx, 2
rep movsd
pop3 rdi rsi rcx
#else
push3 edi esi ecx
push3 DST SRC SIZE
pop3 edi esi ecx
sar ecx, 2
rep movsd
pop3 edi esi ecx
#endif
ret
.endfunc
asm_function aligned_block_copy_sse2
0:
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movdqa [DST + 0], xmm0
movdqa [DST + 16], xmm1
movdqa [DST + 32], xmm2
movdqa [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_copy_nt_sse2
0:
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movntdq [DST + 0], xmm0
movntdq [DST + 16], xmm1
movntdq [DST + 32], xmm2
movntdq [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_copy_pf32_sse2
0:
prefetchnta [SRC + PREFETCH_DISTANCE]
prefetchnta [SRC + PREFETCH_DISTANCE + 32]
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movdqa [DST + 0], xmm0
movdqa [DST + 16], xmm1
movdqa [DST + 32], xmm2
movdqa [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_copy_nt_pf32_sse2
0:
prefetchnta [SRC + PREFETCH_DISTANCE]
prefetchnta [SRC + PREFETCH_DISTANCE + 32]
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movntdq [DST + 0], xmm0
movntdq [DST + 16], xmm1
movntdq [DST + 32], xmm2
movntdq [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_copy_pf64_sse2
0:
prefetchnta [SRC + PREFETCH_DISTANCE]
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movdqa [DST + 0], xmm0
movdqa [DST + 16], xmm1
movdqa [DST + 32], xmm2
movdqa [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_copy_nt_pf64_sse2
0:
prefetchnta [SRC + PREFETCH_DISTANCE]
movdqa xmm0, [SRC + 0]
movdqa xmm1, [SRC + 16]
movdqa xmm2, [SRC + 32]
movdqa xmm3, [SRC + 48]
movntdq [DST + 0], xmm0
movntdq [DST + 16], xmm1
movntdq [DST + 32], xmm2
movntdq [DST + 48], xmm3
add SRC, 64
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_fill_sse2
movdqa xmm0, [SRC + 0]
0:
movdqa [DST + 0], xmm0
movdqa [DST + 16], xmm0
movdqa [DST + 32], xmm0
movdqa [DST + 48], xmm0
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
asm_function aligned_block_fill_nt_sse2
movdqa xmm0, [SRC + 0]
0:
movntdq [DST + 0], xmm0
movntdq [DST + 16], xmm0
movntdq [DST + 32], xmm0
movntdq [DST + 48], xmm0
add DST, 64
sub SIZE, 64
jg 0b
ret
.endfunc
/*****************************************************************************/
#endif