Revision 471f0aa7fa64e23766a1473b32d9ec3f0718895a authored by Chang S. Bae on 21 October 2022, 18:58:44 UTC, committed by Dave Hansen on 21 October 2022, 22:22:09 UTC
When an extended state component is not present in fpstate, but in init
state, the function copies from init_fpstate via copy_feature().

But, dynamic states are not present in init_fpstate because of all-zeros
init states. Then retrieving them from init_fpstate will explode like this:

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 ...
 RIP: 0010:memcpy_erms+0x6/0x10
  ? __copy_xstate_to_uabi_buf+0x381/0x870
  fpu_copy_guest_fpstate_to_uabi+0x28/0x80
  kvm_arch_vcpu_ioctl+0x14c/0x1460 [kvm]
  ? __this_cpu_preempt_check+0x13/0x20
  ? vmx_vcpu_put+0x2e/0x260 [kvm_intel]
  kvm_vcpu_ioctl+0xea/0x6b0 [kvm]
  ? kvm_vcpu_ioctl+0xea/0x6b0 [kvm]
  ? __fget_light+0xd4/0x130
  __x64_sys_ioctl+0xe3/0x910
  ? debug_smp_processor_id+0x17/0x20
  ? fpregs_assert_state_consistent+0x27/0x50
  do_syscall_64+0x3f/0x90
  entry_SYSCALL_64_after_hwframe+0x63/0xcd

Adjust the 'mask' to zero out the userspace buffer for the features that
are not available both from fpstate and from init_fpstate.

The dynamic features depend on the compacted XSAVE format. Ensure it is
enabled before reading XCOMP_BV in init_fpstate.

Fixes: 2308ee57d93d ("x86/fpu/amx: Enable the AMX feature in 64-bit mode")
Reported-by: Yuan Yao <yuan.yao@intel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Yuan Yao <yuan.yao@intel.com>
Link: https://lore.kernel.org/lkml/BYAPR11MB3717EDEF2351C958F2C86EED95259@BYAPR11MB3717.namprd11.prod.outlook.com/
Link: https://lkml.kernel.org/r/20221021185844.13472-1-chang.seok.bae@intel.com
1 parent b329f5d
Raw File
filetable.h
// SPDX-License-Identifier: GPL-2.0
#ifndef IOU_FILE_TABLE_H
#define IOU_FILE_TABLE_H

#include <linux/file.h>
#include <linux/io_uring_types.h>

/*
 * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
 * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we
 * can't safely always dereference the file when the task has exited and ring
 * cleanup is done. If a file is tracked and part of SCM, then unix gc on
 * process exit may reap it before __io_sqe_files_unregister() is run.
 */
#define FFS_NOWAIT		0x1UL
#define FFS_ISREG		0x2UL
#if defined(CONFIG_64BIT)
#define FFS_SCM			0x4UL
#else
#define IO_URING_SCM_ALL
#define FFS_SCM			0x0UL
#endif
#define FFS_MASK		~(FFS_NOWAIT|FFS_ISREG|FFS_SCM)

bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files);
void io_free_file_tables(struct io_file_table *table);

int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
			struct file *file, unsigned int file_slot);
int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
				unsigned int file_slot);
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset);

int io_register_file_alloc_range(struct io_ring_ctx *ctx,
				 struct io_uring_file_index_range __user *arg);

unsigned int io_file_get_flags(struct file *file);

static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
{
	__clear_bit(bit, table->bitmap);
	table->alloc_hint = bit;
}

static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
{
	WARN_ON_ONCE(test_bit(bit, table->bitmap));
	__set_bit(bit, table->bitmap);
	table->alloc_hint = bit + 1;
}

static inline struct io_fixed_file *
io_fixed_file_slot(struct io_file_table *table, unsigned i)
{
	return &table->files[i];
}

static inline struct file *io_file_from_index(struct io_file_table *table,
					      int index)
{
	struct io_fixed_file *slot = io_fixed_file_slot(table, index);

	return (struct file *) (slot->file_ptr & FFS_MASK);
}

static inline void io_fixed_file_set(struct io_fixed_file *file_slot,
				     struct file *file)
{
	unsigned long file_ptr = (unsigned long) file;

	file_ptr |= io_file_get_flags(file);
	file_slot->file_ptr = file_ptr;
}

static inline void io_reset_alloc_hint(struct io_ring_ctx *ctx)
{
	ctx->file_table.alloc_hint = ctx->file_alloc_start;
}

static inline void io_file_table_set_alloc_range(struct io_ring_ctx *ctx,
						 unsigned off, unsigned len)
{
	ctx->file_alloc_start = off;
	ctx->file_alloc_end = off + len;
	io_reset_alloc_hint(ctx);
}

#endif
back to top