Revision e91467ecd1ef381377fd327c0ded922835ec52ab authored by Christian Borntraeger on 05 August 2006, 19:13:52 UTC, committed by Linus Torvalds on 06 August 2006, 15:57:46 UTC
This patch adds a barrier() in futex unqueue_me to avoid aliasing of two
pointers.

On my s390x system I saw the following oops:

Unable to handle kernel pointer dereference at virtual kernel address
0000000000000000
Oops: 0004 [#1]
CPU:    0    Not tainted
Process mytool (pid: 13613, task: 000000003ecb6ac0, ksp: 00000000366bdbd8)
Krnl PSW : 0704d00180000000 00000000003c9ac2 (_spin_lock+0xe/0x30)
Krnl GPRS: 00000000ffffffff 000000003ecb6ac0 0000000000000000 0700000000000000
           0000000000000000 0000000000000000 000001fe00002028 00000000000c091f
           000001fe00002054 000001fe00002054 0000000000000000 00000000366bddc0
           00000000005ef8c0 00000000003d00e8 0000000000144f91 00000000366bdcb8
Krnl Code: ba 4e 20 00 12 44 b9 16 00 3e a7 84 00 08 e3 e0 f0 88 00 04
Call Trace:
([<0000000000144f90>] unqueue_me+0x40/0xe4)
 [<0000000000145a0c>] do_futex+0x33c/0xc40
 [<000000000014643e>] sys_futex+0x12e/0x144
 [<000000000010bb00>] sysc_noemu+0x10/0x16
 [<000002000003741c>] 0x2000003741c

The code in question is:

static int unqueue_me(struct futex_q *q)
{
        int ret = 0;
        spinlock_t *lock_ptr;

        /* In the common case we don't take the spinlock, which is nice. */
 retry:
        lock_ptr = q->lock_ptr;
        if (lock_ptr != 0) {
                spin_lock(lock_ptr);
		/*
                 * q->lock_ptr can change between reading it and
                 * spin_lock(), causing us to take the wrong lock.  This
                 * corrects the race condition.
[...]

and my compiler (gcc 4.1.0) makes the following out of it:

00000000000003c8 <unqueue_me>:
     3c8:       eb bf f0 70 00 24       stmg    %r11,%r15,112(%r15)
     3ce:       c0 d0 00 00 00 00       larl    %r13,3ce <unqueue_me+0x6>
                        3d0: R_390_PC32DBL      .rodata+0x2a
     3d4:       a7 f1 1e 00             tml     %r15,7680
     3d8:       a7 84 00 01             je      3da <unqueue_me+0x12>
     3dc:       b9 04 00 ef             lgr     %r14,%r15
     3e0:       a7 fb ff d0             aghi    %r15,-48
     3e4:       b9 04 00 b2             lgr     %r11,%r2
     3e8:       e3 e0 f0 98 00 24       stg     %r14,152(%r15)
     3ee:       e3 c0 b0 28 00 04       lg      %r12,40(%r11)
		/* write q->lock_ptr in r12 */
     3f4:       b9 02 00 cc             ltgr    %r12,%r12
     3f8:       a7 84 00 4b             je      48e <unqueue_me+0xc6>
		/* if r12 is zero then jump over the code.... */
     3fc:       e3 20 b0 28 00 04       lg      %r2,40(%r11)
		/* write q->lock_ptr in r2 */
     402:       c0 e5 00 00 00 00       brasl   %r14,402 <unqueue_me+0x3a>
                        404: R_390_PC32DBL      _spin_lock+0x2
		/* use r2 as parameter for spin_lock */

So the code becomes more or less:
if (q->lock_ptr != 0) spin_lock(q->lock_ptr)
instead of
if (lock_ptr != 0) spin_lock(lock_ptr)

Which caused the oops from above.
After adding a barrier gcc creates code without this problem:
[...] (the same)
     3ee:       e3 c0 b0 28 00 04       lg      %r12,40(%r11)
     3f4:       b9 02 00 cc             ltgr    %r12,%r12
     3f8:       b9 04 00 2c             lgr     %r2,%r12
     3fc:       a7 84 00 48             je      48c <unqueue_me+0xc4>
     400:       c0 e5 00 00 00 00       brasl   %r14,400 <unqueue_me+0x38>
                        402: R_390_PC32DBL      _spin_lock+0x2

As a general note, this code of unqueue_me seems a bit fishy. The retry logic
of unqueue_me only works if we can guarantee, that the original value of
q->lock_ptr is always a spinlock (Otherwise we overwrite kernel memory). We
know that q->lock_ptr can change. I dont know what happens with the original
spinlock, as I am not an expert with the futex code.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@timesys.com>
Signed-off-by: Christian Borntraeger <borntrae@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 72f0b4e
Raw File
bitops.h
#ifndef _H8300_BITOPS_H
#define _H8300_BITOPS_H

/*
 * Copyright 1992, Linus Torvalds.
 * Copyright 2002, Yoshinori Sato
 */

#include <linux/compiler.h>
#include <asm/system.h>

#ifdef __KERNEL__
/*
 * Function prototypes to keep gcc -Wall happy
 */

/*
 * ffz = Find First Zero in word. Undefined if no zero exists,
 * so code should check against ~0UL first..
 */
static __inline__ unsigned long ffz(unsigned long word)
{
	unsigned long result;

	result = -1;
	__asm__("1:\n\t"
		"shlr.l %2\n\t"
		"adds #1,%0\n\t"
		"bcs 1b"
		: "=r" (result)
		: "0"  (result),"r" (word));
	return result;
}

#define H8300_GEN_BITOP_CONST(OP,BIT)			    \
	case BIT:					    \
	__asm__(OP " #" #BIT ",@%0"::"r"(b_addr):"memory"); \
	break;

#define H8300_GEN_BITOP(FNAME,OP)				      \
static __inline__ void FNAME(int nr, volatile unsigned long* addr)    \
{								      \
	volatile unsigned char *b_addr;				      \
	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);    \
	if (__builtin_constant_p(nr)) {				      \
		switch(nr & 7) {				      \
			H8300_GEN_BITOP_CONST(OP,0)		      \
			H8300_GEN_BITOP_CONST(OP,1)		      \
			H8300_GEN_BITOP_CONST(OP,2)		      \
			H8300_GEN_BITOP_CONST(OP,3)		      \
			H8300_GEN_BITOP_CONST(OP,4)		      \
			H8300_GEN_BITOP_CONST(OP,5)		      \
			H8300_GEN_BITOP_CONST(OP,6)		      \
			H8300_GEN_BITOP_CONST(OP,7)		      \
		}						      \
	} else {						      \
		__asm__(OP " %w0,@%1"::"r"(nr),"r"(b_addr):"memory"); \
	}							      \
}

/*
 * clear_bit() doesn't provide any barrier for the compiler.
 */
#define smp_mb__before_clear_bit()	barrier()
#define smp_mb__after_clear_bit()	barrier()

H8300_GEN_BITOP(set_bit	  ,"bset")
H8300_GEN_BITOP(clear_bit ,"bclr")
H8300_GEN_BITOP(change_bit,"bnot")
#define __set_bit(nr,addr)    set_bit((nr),(addr))
#define __clear_bit(nr,addr)  clear_bit((nr),(addr))
#define __change_bit(nr,addr) change_bit((nr),(addr))

#undef H8300_GEN_BITOP
#undef H8300_GEN_BITOP_CONST

static __inline__ int test_bit(int nr, const unsigned long* addr)
{
	return (*((volatile unsigned char *)addr + 
               ((nr >> 3) ^ 3)) & (1UL << (nr & 7))) != 0;
}

#define __test_bit(nr, addr) test_bit(nr, addr)

#define H8300_GEN_TEST_BITOP_CONST_INT(OP,BIT)			     \
	case BIT:						     \
	__asm__("stc ccr,%w1\n\t"				     \
		"orc #0x80,ccr\n\t"				     \
		"bld #" #BIT ",@%4\n\t"				     \
		OP " #" #BIT ",@%4\n\t"				     \
		"rotxl.l %0\n\t"				     \
		"ldc %w1,ccr"					     \
		: "=r"(retval),"=&r"(ccrsave),"=m"(*b_addr)	     \
		: "0" (retval),"r" (b_addr)			     \
		: "memory");                                         \
        break;

#define H8300_GEN_TEST_BITOP_CONST(OP,BIT)			     \
	case BIT:						     \
	__asm__("bld #" #BIT ",@%3\n\t"				     \
		OP " #" #BIT ",@%3\n\t"				     \
		"rotxl.l %0\n\t"				     \
		: "=r"(retval),"=m"(*b_addr)			     \
		: "0" (retval),"r" (b_addr)			     \
		: "memory");                                         \
        break;

#define H8300_GEN_TEST_BITOP(FNNAME,OP)				     \
static __inline__ int FNNAME(int nr, volatile void * addr)	     \
{								     \
	int retval = 0;						     \
	char ccrsave;						     \
	volatile unsigned char *b_addr;				     \
	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);   \
	if (__builtin_constant_p(nr)) {				     \
		switch(nr & 7) {				     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,0)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,1)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,2)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,3)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,4)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,5)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,6)	     \
			H8300_GEN_TEST_BITOP_CONST_INT(OP,7)	     \
		}						     \
	} else {						     \
		__asm__("stc ccr,%w1\n\t"			     \
			"orc #0x80,ccr\n\t"			     \
			"btst %w5,@%4\n\t"			     \
			OP " %w5,@%4\n\t"			     \
			"beq 1f\n\t"				     \
			"inc.l #1,%0\n"				     \
			"1:\n\t"				     \
			"ldc %w1,ccr"				     \
			: "=r"(retval),"=&r"(ccrsave),"=m"(*b_addr)  \
			: "0" (retval),"r" (b_addr),"r"(nr)	     \
			: "memory");				     \
	}							     \
	return retval;						     \
}								     \
								     \
static __inline__ int __ ## FNNAME(int nr, volatile void * addr)     \
{								     \
	int retval = 0;						     \
	volatile unsigned char *b_addr;				     \
	b_addr = (volatile unsigned char *)addr + ((nr >> 3) ^ 3);   \
	if (__builtin_constant_p(nr)) {				     \
		switch(nr & 7) {				     \
			H8300_GEN_TEST_BITOP_CONST(OP,0) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,1) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,2) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,3) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,4) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,5) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,6) 	     \
			H8300_GEN_TEST_BITOP_CONST(OP,7) 	     \
		}						     \
	} else {						     \
		__asm__("btst %w4,@%3\n\t"			     \
			OP " %w4,@%3\n\t"			     \
			"beq 1f\n\t"				     \
			"inc.l #1,%0\n"				     \
			"1:"					     \
			: "=r"(retval),"=m"(*b_addr)		     \
			: "0" (retval),"r" (b_addr),"r"(nr)	     \
			: "memory");				     \
	}							     \
	return retval;						     \
}

H8300_GEN_TEST_BITOP(test_and_set_bit,	 "bset")
H8300_GEN_TEST_BITOP(test_and_clear_bit, "bclr")
H8300_GEN_TEST_BITOP(test_and_change_bit,"bnot")
#undef H8300_GEN_TEST_BITOP_CONST
#undef H8300_GEN_TEST_BITOP_CONST_INT
#undef H8300_GEN_TEST_BITOP

#include <asm-generic/bitops/ffs.h>

static __inline__ unsigned long __ffs(unsigned long word)
{
	unsigned long result;

	result = -1;
	__asm__("1:\n\t"
		"shlr.l %2\n\t"
		"adds #1,%0\n\t"
		"bcc 1b"
		: "=r" (result)
		: "0"(result),"r"(word));
	return result;
}

#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/ext2-non-atomic.h>
#include <asm-generic/bitops/ext2-atomic.h>
#include <asm-generic/bitops/minix.h>

#endif /* __KERNEL__ */

#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/fls64.h>

#endif /* _H8300_BITOPS_H */
back to top