https://github.com/stamatak/standard-RAxML
Tip revision: 933bc7364f74e790267a075a85825f0a9911e7b2 authored by stamatak on 19 February 2014, 16:05:19 UTC
added better error reporting for cases where RAxML runs out of memory
added better error reporting for cases where RAxML runs out of memory
Tip revision: 933bc73
ll_asm.h
/*
* Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Functions that require asm for efficiency, or to work at all...
*/
#if 1
#ifndef LL_ASM_H
#define LL_ASM_H
#include "compiler.h"
#include <stdint.h>
#ifdef GCC_ASM
#include <stdint.h>
#define atomic_or(P, V) __sync_or_and_fetch((P), (V))
#define atomic_and(P, V) __sync_and_and_fetch((P), (V))
#define atomic_add(P, V) __sync_add_and_fetch((P), (V))
#define atomic_xadd(P, V) __sync_fetch_and_add((P), (V))
#define atomic_cmpxchg_bool(P, O, N) __sync_bool_compare_and_swap((P), (O), (N))
#define atomic_access(V) (*(volatile typeof(V) *)&(V))
#if 0
static inline int bts(volatile void *mem, size_t offset)
{
asm goto (
"lock; bts %0, (%1)\n"
"jc %l[carry]\n"
:
: "r" (offset), "r" (mem)
: "memory", "cc"
: carry);
return 0;
carry:
return 1;
}
static inline int btr(volatile void *mem, size_t offset)
{
asm goto (
"lock; btr %0, (%1)\n"
"jnc %l[ncarry]\n"
:
: "r" (offset), "r" (mem)
: "memory", "cc"
: ncarry);
return 1;
ncarry:
return 0;
}
#endif
static inline int ffsu(unsigned x)
{
int result;
asm ("bsf %[x], %[result]"
: [result] "=r" (result)
: [x] "mr" (x)
:"cc");
return result;
}
static inline size_t flsu(unsigned x)
{
size_t result;
asm ("bsr %[x], %[result]"
: [result] "=r" (result)
: [x] "mr" (x)
:"cc");
return result;
}
#ifdef __x86_64__
static inline size_t ffsq(size_t x)
{
size_t result;
asm ("bsfq %[x], %[result]"
: [result] "=r" (result)
: [x] "mr" (x)
:"cc");
return result;
}
static inline size_t flsq(size_t x)
{
size_t result;
asm ("bsrq %[x], %[result]"
: [result] "=r" (result)
: [x] "mr" (x)
:"cc");
return result;
}
#else
static inline size_t ffsq(unsigned long long x)
{
size_t result;
unsigned xlo = x & 0xffffffff;
unsigned xhi = x >> 32;
unsigned tmp;
asm ("bsfl %[xhi], %[tmp]\n"
"addl $0x20, %[tmp]\n"
"bsfl %[xlo], %[result]\n"
"cmove %[tmp], %[result]\n"
:[result] "=r" (result), [tmp] "=&r" (tmp)
:[xlo] "rm" (xlo), [xhi] "rm" (xhi)
:"cc");
return result;
}
static inline size_t flsq(unsigned long long x)
{
size_t result;
unsigned xlo = x & 0xffffffff;
unsigned xhi = x >> 32;
unsigned tmp;
asm ("bsrl %[xlo], %[tmp]\n"
"addl $-0x20, %[tmp]\n"
"bsrl %[xhi], %[result]\n"
"cmove %[tmp], %[result]\n"
"addl $0x20, %[result]\n"
:[result] "=r" (result), [tmp] "=&r" (tmp)
:[xlo] "rm" (xlo), [xhi] "rm" (xhi)
:"cc");
return result;
}
#endif
static inline unsigned char xchg_8(void *ptr, unsigned char x)
{
asm volatile("xchgb %0,%1"
:"=r" ((unsigned char) x)
:"m" (*(volatile unsigned char *)ptr), "0" (x)
:"memory");
return x;
}
static inline unsigned short xchg_16(void *ptr, unsigned short x)
{
asm volatile("xchgw %0,%1"
:"=r" ((unsigned short) x)
:"m" (*(volatile unsigned short *)ptr), "0" (x)
:"memory");
return x;
}
static inline unsigned xchg_32(void *ptr, unsigned x)
{
asm volatile("xchgl %0,%1"
:"=r" ((unsigned) x)
:"m" (*(volatile unsigned *)ptr), "0" (x)
:"memory");
return x;
}
#ifdef __x86_64__
static inline unsigned long long xchg_64(void *ptr, unsigned long long x)
{
asm volatile("xchgq %0,%1"
:"=r" ((unsigned long long) x)
:"m" (*(volatile unsigned long long *)ptr), "0" (x)
:"memory");
return x;
}
static inline void *xchg_ptr(void *ptr, void *x)
{
__asm__ __volatile__("xchgq %0,%1"
:"=r" ((uintptr_t) x)
:"m" (*(volatile uintptr_t *)ptr), "0" ((uintptr_t) x)
:"memory");
return x;
}
#else
static inline void *xchg_ptr(void *ptr, void *x)
{
__asm__ __volatile__("xchgl %k0,%1"
:"=r" ((uintptr_t) x)
:"m" (*(volatile uintptr_t *)ptr), "0" ((uintptr_t) x)
:"memory");
return x;
}
#endif
static inline unsigned long long rdtsc(void)
{
unsigned hi, lo;
asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));
return lo + ((unsigned long long)hi << 32);
}
#else /* GCC_ASM */
static inline int ffsu(unsigned x)
{
unsigned long result;
__assume(x);
_BitScanForward(&result, x);
return result;
}
static inline int flsu(unsigned x)
{
unsigned long result;
__assume(x);
_BitScanReverse(&result, x);
return result;
}
static inline size_t ffsq(unsigned long long x)
{
unsigned long result;
__assume(x);
_BitScanForward64(&result, x);
return result;
}
static inline size_t fflq(unsigned long long x)
{
unsigned long result;
__assume(x);
_BitScanReverse64(&result, x);
return result;
}
#ifdef __x86_64__
static inline void *xchg_ptr(void *ptr, void *x)
{
return (void *) _InterlockedExchange64(ptr, (int64_t) x);
}
#else
static inline void *xchg_ptr(void *ptr, void *x)
{
return (void *) _InterlockedExchange(ptr, (long) x);
}
#endif
#endif /* GCC_ASM */
#endif /* LL_ASM_H */
#endif
#if 0
static inline int ffsu(unsigned x)
{
unsigned long result = __builtin_ffs(x);
return result - 1;
}
static inline int flsu(unsigned x)
{
unsigned long result;
__assume(x);
_BitScanReverse(&result, x);
return result;
}
static inline size_t ffsq(unsigned long long x)
{
unsigned long result;
__assume(x);
_BitScanForward64(&result, x);
return result;
}
static inline size_t fflq(unsigned long long x)
{
unsigned long result;
__assume(x);
_BitScanReverse64(&result, x);
return result;
}
#endif