kmsan_hooks.c
/*
* KMSAN hooks for kernel subsystems.
*
* These functions handle creation of KMSAN metadata for memory allocations.
*
* Copyright (C) 2018 Google, Inc
* Author: Alexander Potapenko <glider@google.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/slab.h>
#include "../slab.h"
#include "kmsan.h"
/* TODO(glider): do we need to export these symbols? */
/*
* The functions may call back to instrumented code, which, in turn, may call
* these hooks again. To avoid re-entrancy, we use __GFP_NO_KMSAN_SHADOW.
* Instrumented functions shouldn't be called under
* ENTER_RUNTIME()/LEAVE_RUNTIME(), because this will lead to skipping
* effects of functions like memset() inside instrumented code.
*/
/* Called from kernel/kthread.c, kernel/fork.c */
void kmsan_task_create(struct task_struct *task)
{
unsigned long irq_flags;
if (!task)
return;
ENTER_RUNTIME(irq_flags);
do_kmsan_task_create(task);
LEAVE_RUNTIME(irq_flags);
}
EXPORT_SYMBOL(kmsan_task_create);
/* Helper function to allocate page metadata. */
int kmsan_internal_alloc_meta_for_pages(struct page *page, unsigned int order,
unsigned int actual_size, gfp_t flags, int node)
{
struct page *shadow, *origin;
int pages = 1 << order;
int i;
bool initialized = (flags & __GFP_ZERO) || !kmsan_ready;
depot_stack_handle_t handle;
// If |actual_size| is non-zero, we allocate |1 << order| metadata pages
// for |actual_size| bytes of memory. We can't set shadow for more than
// |actual_size >> PAGE_SHIFT| pages in that case.
if (actual_size)
pages = ALIGN(actual_size, PAGE_SIZE) >> PAGE_SHIFT;
if (flags & __GFP_NO_KMSAN_SHADOW) {
for (i = 0; i < pages; i++) {
page[i].shadow = NULL;
page[i].origin = NULL;
}
return 0;
}
flags = GFP_ATOMIC; // TODO(glider)
if (initialized)
flags |= __GFP_ZERO;
shadow = alloc_pages_node(node, flags | __GFP_NO_KMSAN_SHADOW, order);
if (!shadow) {
for (i = 0; i < pages; i++) {
page[i].shadow = NULL;
page[i].origin = NULL;
}
return -ENOMEM;
}
if (!initialized)
__memset(page_address(shadow), -1, PAGE_SIZE * pages);
origin = alloc_pages_node(node, flags | __GFP_NO_KMSAN_SHADOW, order);
// Assume we've allocated the origin.
if (!origin) {
__free_pages(shadow, order);
for (i = 0; i < pages; i++) {
page[i].shadow = NULL;
page[i].origin = NULL;
}
return -ENOMEM;
}
if (!initialized) {
handle = kmsan_save_stack_with_flags(flags);
// Addresses are page-aligned, pages are contiguous, so it's ok
// to just fill the origin pages with |handle|.
for (i = 0; i < PAGE_SIZE * pages / sizeof(handle); i++) {
((depot_stack_handle_t*)page_address(origin))[i] = handle;
}
}
for (i = 0; i < pages; i++) {
// TODO(glider): sometimes page[i].shadow is initialized. Let's skip the check for now.
///if (page[i].shadow) continue;
page[i].shadow = &shadow[i];
page[i].shadow->shadow = NULL;
page[i].shadow->origin = NULL;
// TODO(glider): sometimes page[i].origin is initialized. Let's skip the check for now.
BUG_ON(page[i].origin && 0);
// page.origin is struct page.
page[i].origin = &origin[i];
page[i].origin->shadow = NULL;
page[i].origin->origin = NULL;
}
return 0;
}
/* Called from kernel/exit.c */
void kmsan_task_exit(struct task_struct *task)
{
unsigned long irq_flags;
kmsan_task_state *state = &task->kmsan;
if (!kmsan_ready)
return;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
state->enabled = false;
state->allow_reporting = false;
state->is_reporting = false;
LEAVE_RUNTIME(irq_flags);
}
EXPORT_SYMBOL(kmsan_task_exit);
/* Called from mm/slab.c */
void kmsan_poison_slab(struct page *page, gfp_t flags)
{
unsigned long irq_flags;
if (!kmsan_ready || IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
if (flags & __GFP_ZERO) {
kmsan_internal_unpoison_shadow(
page_address(page), PAGE_SIZE << compound_order(page), /*checked*/true);
} else {
kmsan_internal_poison_shadow(
page_address(page), PAGE_SIZE << compound_order(page),
flags, /*checked*/true);
}
LEAVE_RUNTIME(irq_flags);
}
/* Called from mm/slab.c, mm/slub.c */
void kmsan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
gfp_t flags)
{
unsigned long irq_flags;
if (unlikely(object == NULL))
return;
if (!kmsan_ready)
return;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
if (flags & __GFP_ZERO) {
// TODO(glider) do we poison by default?
kmsan_internal_unpoison_shadow((void *)object, size, /*checked*/true);
} else {
if (!cache->ctor)
kmsan_internal_poison_shadow((void *)object, size,
flags, /*checked*/true);
}
LEAVE_RUNTIME(irq_flags);
}
/* Called from mm/slab.c, mm/slab.h */
void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags)
{
kmsan_kmalloc(s, object, s->object_size, flags);
}
/* Called from mm/slab.c, mm/slub.c */
bool kmsan_slab_free(struct kmem_cache *s, void *object)
{
/* RCU slabs could be legally used after free within the RCU period */
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
return false;
kmsan_internal_poison_shadow((void *)object, s->object_size,
GFP_KERNEL, /*checked*/true);
return true;
}
/* Called from mm/slub.c */
void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
{
unsigned long irq_flags;
if (unlikely(ptr == NULL))
return;
if (!kmsan_ready || IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
if (flags & __GFP_ZERO) {
// TODO(glider) do we poison by default?
kmsan_internal_unpoison_shadow((void *)ptr, size, /*checked*/true);
} else {
kmsan_internal_poison_shadow((void *)ptr, size, flags, /*checked*/true);
}
LEAVE_RUNTIME(irq_flags);
}
/* Called from mm/slub.c */
void kmsan_kfree_large(const void *ptr)
{
struct page *page;
unsigned long irq_flags;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
page = virt_to_page_or_null(ptr);
kmsan_internal_poison_shadow(
(void *)ptr, PAGE_SIZE << compound_order(page), GFP_KERNEL, /*checked*/true);
LEAVE_RUNTIME(irq_flags);
}
bool kmsan_vmalloc_area_node(struct vm_struct *area, gfp_t alloc_mask, gfp_t nested_gfp, gfp_t highmem_mask, pgprot_t prot, int node)
{
struct page **s_pages, **o_pages;
struct vm_struct *s_area, *o_area;
size_t area_size = get_vm_area_size(area);
unsigned int nr_pages = area->nr_pages;
unsigned int array_size = nr_pages * sizeof(struct page *);
unsigned long irq_flags;
int i;
if (!kmsan_ready || IN_RUNTIME())
return true;
if (alloc_mask & __GFP_NO_KMSAN_SHADOW)
return true;
s_area = get_vm_area(area_size, /*flags*/0);
o_area = get_vm_area(area_size, /*flags*/0);
if (array_size > PAGE_SIZE) {
s_pages = __vmalloc_node_flags_caller(array_size, node, nested_gfp|highmem_mask|__GFP_NO_KMSAN_SHADOW, (void*)area->caller);
o_pages = __vmalloc_node_flags_caller(array_size, node, nested_gfp|highmem_mask|__GFP_NO_KMSAN_SHADOW, (void*)area->caller);
} else {
s_pages = kmalloc_node(array_size, nested_gfp | __GFP_NO_KMSAN_SHADOW, node);
o_pages = kmalloc_node(array_size, nested_gfp | __GFP_NO_KMSAN_SHADOW, node);
}
if (!s_pages || !o_pages)
goto fail;
for (i = 0; i < area->nr_pages; i++) {
s_pages[i] = area->pages[i]->shadow;
o_pages[i] = area->pages[i]->origin;
}
s_area->pages = s_pages;
o_area->pages = o_pages;
if (map_vm_area(s_area, prot, s_pages))
goto fail;
if (map_vm_area(o_area, prot, o_pages))
goto fail;
area->shadow = s_area;
area->origin = o_area;
return true;
fail:
remove_vm_area(s_area->addr);
remove_vm_area(o_area->addr);
kfree(s_area);
kfree(o_area);
return false;
}
/* Called from mm/vmalloc.c */
void kmsan_vmap(struct vm_struct *area,
struct page **pages, unsigned int count, unsigned long flags,
pgprot_t prot, void *caller)
{
struct vm_struct *shadow, *origin;
struct page **s_pages = NULL, **o_pages = NULL;
unsigned long irq_flags, size;
int i;
if (!kmsan_ready || IN_RUNTIME())
return;
if (flags & __GFP_NO_KMSAN_SHADOW)
return;
size = (unsigned long)count << PAGE_SHIFT;
// It's important to call get_vm_area_caller() (which calls kmalloc())
// and kmalloc() outside the runtime.
// Calling kmalloc() may potentially allocate a new slab without
// corresponding shadow pages. Accesses to any subsequent allocations
// from that slab will crash the kernel.
shadow = get_vm_area_caller(size, flags | __GFP_NO_KMSAN_SHADOW, caller);
origin = get_vm_area_caller(size, flags | __GFP_NO_KMSAN_SHADOW, caller);
/* TODO(glider): __GFP_NO_KMSAN_SHADOW below indicates that kmalloc won't be
* calling KMSAN hooks again, but it cannot guarantee the allocation
* will be performed from an untracked page (we would need a separate
* kmalloc cache for that). To make sure the pages are unpoisoned, we also
* allocate with __GFP_ZERO.
*/
s_pages = kmalloc(count * sizeof(struct page *), GFP_KERNEL | __GFP_NO_KMSAN_SHADOW | __GFP_ZERO);
if (!s_pages)
goto err_free;
o_pages = kmalloc(count * sizeof(struct page *), GFP_KERNEL | __GFP_NO_KMSAN_SHADOW | __GFP_ZERO);
for (i = 0; i < count; i++) {
if (!pages[i]->shadow)
goto err_free;
s_pages[i] = pages[i]->shadow;
o_pages[i] = pages[i]->origin;
}
// Don't enter the runtime when allocating memory with kmalloc().
if (map_vm_area(shadow, prot, s_pages) ||
map_vm_area(origin, prot, o_pages)) {
goto err_free;
}
shadow->pages = s_pages;
shadow->nr_pages = count;
shadow->shadow = NULL;
shadow->origin = NULL;
origin->pages = o_pages;
origin->nr_pages = count;
origin->shadow = NULL;
origin->origin = NULL;
area->shadow = shadow;
area->origin = origin;
return;
err_free:
if (s_pages)
kfree(s_pages);
if (o_pages)
kfree(o_pages);
if (shadow)
vunmap(shadow->addr);
if (origin)
vunmap(origin->addr);
}
/* Called from mm/vmalloc.c */
void kmsan_vunmap(const void *addr, struct vm_struct *area, int deallocate_pages)
{
unsigned long irq_flags;
struct vm_struct *vms, *shadow, *origin;
int i;
if (!kmsan_ready || IN_RUNTIME())
return;
if (!vms || !vms->shadow)
return;
shadow = vms->shadow;
origin = vms->origin;
vunmap(vms->shadow->addr);
vunmap(vms->origin->addr);
BUG_ON(shadow->nr_pages != origin->nr_pages);
for (i = 0; i < shadow->nr_pages; i++) {
BUG_ON(shadow->pages[i]);
__free_pages(shadow->pages[i], 0);
BUG_ON(origin->pages[i]);
__free_pages(origin->pages[i], 0);
}
kfree(shadow->pages);
kfree(origin->pages);
}
EXPORT_SYMBOL(kmsan_vunmap);
/* Called from mm/page_alloc.c, mm/slab.c */
int kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags)
{
unsigned long irq_flags;
int ret;
if (IN_RUNTIME())
return 0;
ENTER_RUNTIME(irq_flags);
ret = kmsan_internal_alloc_meta_for_pages(
page, order, /*actual_size*/ 0, flags, -1);
LEAVE_RUNTIME(irq_flags);
return ret;
}
/* Called from mm/page_alloc.c, mm/slab.c */
void kmsan_free_page(struct page *page, unsigned int order)
{
struct page *shadow, *origin, *cur_page;
int pages = 1 << order;
int i;
unsigned long irq_flags;
if (!page->shadow) {
for (i = 0; i < pages; i++) {
cur_page = &page[i];
BUG_ON(cur_page->shadow);
}
return;
}
/* TODO(glider): order? */
if (!kmsan_ready) {
for (i = 0; i < pages; i++) {
cur_page = &page[i];
cur_page->shadow = NULL;
cur_page->origin = NULL;
}
return;
}
if (IN_RUNTIME()) {
/* TODO(glider): looks legit. depot_save_stack() may call
* free_pages().
*/
return;
}
ENTER_RUNTIME(irq_flags);
if (!page[0].shadow) {
/* TODO(glider): can we free a page without a shadow?
* Maybe if it was allocated at boot time?
* Anyway, all shadow pages must be NULL then.
*/
for (i = 0; i < pages; i++)
if (page[i].shadow) {
current->kmsan.is_reporting = true;
for (i = 0; i < pages; i++)
kmsan_pr_err("page[%d].shadow=%px\n",
i, page[i].shadow);
current->kmsan.is_reporting = false;
break;
}
LEAVE_RUNTIME(irq_flags);
return;
}
shadow = page[0].shadow;
origin = page[0].origin;
/* TODO(glider): this is racy. */
for (i = 0; i < pages; i++) {
BUG_ON((page[i].shadow->shadow));
page[i].shadow = NULL;
BUG_ON(page[i].origin->shadow);
page[i].origin = NULL;
}
BUG_ON(shadow->shadow);
__free_pages(shadow, order);
BUG_ON(origin->shadow);
__free_pages(origin, order);
LEAVE_RUNTIME(irq_flags);
}
EXPORT_SYMBOL(kmsan_free_page);
/* Called from mm/page_alloc.c */
void kmsan_split_page(struct page *page, unsigned int order)
{
struct page *shadow, *origin;
unsigned long irq_flags;
if (!kmsan_ready)
return;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
if (!page[0].shadow) {
BUG_ON(page[0].origin);
LEAVE_RUNTIME(irq_flags);
return;
}
shadow = page[0].shadow;
split_page(shadow, order);
origin = page[0].origin;
split_page(origin, order);
LEAVE_RUNTIME(irq_flags);
}
EXPORT_SYMBOL(kmsan_split_page);
/* Called from drivers/acpi/osl.c */
void kmsan_acpi_map(void *vaddr, unsigned long size)
{
struct page *page;
unsigned long irq_flags;
int order;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
page = vmalloc_to_page_or_null(vaddr);
if (!page) {
LEAVE_RUNTIME(irq_flags);
return;
}
order = order_from_size(size);
/* Although the address is virtual, corresponding ACPI physical pages
* are consequent.
*/
kmsan_internal_alloc_meta_for_pages(page, order, size,
GFP_KERNEL | __GFP_ZERO, -1);
LEAVE_RUNTIME(irq_flags);
}
/* Called from drivers/acpi/osl.c */
void kmsan_acpi_unmap(void *vaddr, unsigned long size)
{
struct page *page;
unsigned long irq_flags;
int order;
int pages, i;
return;
if (IN_RUNTIME())
return;
ENTER_RUNTIME(irq_flags);
page = vmalloc_to_page_or_null(vaddr);
if (size == -1)
size = get_vm_area_size(find_vm_area(vaddr));
order = order_from_size(size);
if (page->shadow)
__free_pages(page->shadow, order);
if (page->origin)
__free_pages(page->origin, order);
pages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
for (i = 0; i < pages; i++) {
page[i].shadow = NULL;
page[i].origin = NULL;
}
LEAVE_RUNTIME(irq_flags);
}
/* Called from mm/memory.c */
void kmsan_copy_page_meta(struct page *dst, struct page *src)
{
unsigned long irq_flags;
if (!kmsan_ready)
return;
if (IN_RUNTIME())
return;
if (!src->shadow) {
/* TODO(glider): are we leaking pages here? */
dst->shadow = 0;
dst->origin = 0;
return;
}
if (!dst->shadow)
return;
ENTER_RUNTIME(irq_flags);
if (!src->shadow || !dst->shadow) {
kmsan_pr_err("Copying %px (page %px, shadow %px) "
"to %px (page %px, shadow %px)\n",
page_address(src), src, src->shadow,
page_address(dst), dst, dst->shadow);
BUG();
}
__memcpy(page_address(dst->shadow), page_address(src->shadow),
PAGE_SIZE);
BUG_ON(!src->origin || !dst->origin);
__memcpy(page_address(dst->origin), page_address(src->origin),
PAGE_SIZE);
LEAVE_RUNTIME(irq_flags);
}
EXPORT_SYMBOL(kmsan_copy_page_meta);
/* Called from include/linux/uaccess.h, include/linux/uaccess.h */
void kmsan_copy_to_user(const void *to, const void *from,
size_t to_copy, size_t left)
{
void *shadow;
/* TODO(glider): at this point we've copied the memory already.
* Might be better to check it before copying.
*/
/* copy_to_user() may copy zero bytes. No need to check. */
if (!to_copy)
return;
/* Or maybe copy_to_user() failed to copy anything. */
if (to_copy == left)
return;
if ((u64)to < TASK_SIZE) {
/* This is a user memory access, check it. */
kmsan_internal_check_memory(from, to_copy - left, to,
REASON_COPY_TO_USER);
return;
}
/* Otherwise this is a kernel memory access. This happens when a compat
* syscall passes an argument allocated on the kernel stack to a real
* syscall.
* Don't check anything, just copy the shadow of the copied bytes.
*/
shadow = kmsan_get_metadata_or_null((u64)to, to_copy - left, /*origin*/false);
if (shadow) {
kmsan_memcpy_metadata(to, from, to_copy - left);
}
}
EXPORT_SYMBOL(kmsan_copy_to_user);