Revision 74c42e1baacf206338b1dd6b6199ac964512b5bb authored by Rongwei Wang on 28 October 2021, 21:36:27 UTC, committed by Linus Torvalds on 29 October 2021, 00:18:55 UTC
Currently collapse_file does not explicitly check PG_writeback, instead,
page_has_private and try_to_release_page are used to filter writeback
pages.  This does not work for xfs with blocksize equal to or larger
than pagesize, because in such case xfs has no page->private.

This makes collapse_file bail out early for writeback page.  Otherwise,
xfs end_page_writeback will panic as follows.

  page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:ffff0003f88c86a8 index:0x0 pfn:0x84ef32
  aops:xfs_address_space_operations [xfs] ino:30000b7 dentry name:"libtest.so"
  flags: 0x57fffe0000008027(locked|referenced|uptodate|active|writeback)
  raw: 57fffe0000008027 ffff80001b48bc28 ffff80001b48bc28 ffff0003f88c86a8
  raw: 0000000000000000 0000000000000000 00000000ffffffff ffff0000c3e9a000
  page dumped because: VM_BUG_ON_PAGE(((unsigned int) page_ref_count(page) + 127u <= 127u))
  page->mem_cgroup:ffff0000c3e9a000
  ------------[ cut here ]------------
  kernel BUG at include/linux/mm.h:1212!
  Internal error: Oops - BUG: 0 [#1] SMP
  Modules linked in:
  BUG: Bad page state in process khugepaged  pfn:84ef32
   xfs(E)
  page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:0 index:0x0 pfn:0x84ef32
   libcrc32c(E) rfkill(E) aes_ce_blk(E) crypto_simd(E) ...
  CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Tainted: ...
  pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--)
  Call trace:
    end_page_writeback+0x1c0/0x214
    iomap_finish_page_writeback+0x13c/0x204
    iomap_finish_ioend+0xe8/0x19c
    iomap_writepage_end_bio+0x38/0x50
    bio_endio+0x168/0x1ec
    blk_update_request+0x278/0x3f0
    blk_mq_end_request+0x34/0x15c
    virtblk_request_done+0x38/0x74 [virtio_blk]
    blk_done_softirq+0xc4/0x110
    __do_softirq+0x128/0x38c
    __irq_exit_rcu+0x118/0x150
    irq_exit+0x1c/0x30
    __handle_domain_irq+0x8c/0xf0
    gic_handle_irq+0x84/0x108
    el1_irq+0xcc/0x180
    arch_cpu_idle+0x18/0x40
    default_idle_call+0x4c/0x1a0
    cpuidle_idle_call+0x168/0x1e0
    do_idle+0xb4/0x104
    cpu_startup_entry+0x30/0x9c
    secondary_start_kernel+0x104/0x180
  Code: d4210000 b0006161 910c8021 94013f4d (d4210000)
  ---[ end trace 4a88c6a074082f8c ]---
  Kernel panic - not syncing: Oops - BUG: Fatal exception in interrupt

Link: https://lkml.kernel.org/r/20211022023052.33114-1-rongwei.wang@linux.alibaba.com
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Suggested-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Song Liu <song@kernel.org>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent ffb29b1
Raw File
holder.c
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/genhd.h>

struct bd_holder_disk {
	struct list_head	list;
	struct block_device	*bdev;
	int			refcnt;
};

static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
						  struct gendisk *disk)
{
	struct bd_holder_disk *holder;

	list_for_each_entry(holder, &disk->slave_bdevs, list)
		if (holder->bdev == bdev)
			return holder;
	return NULL;
}

static int add_symlink(struct kobject *from, struct kobject *to)
{
	return sysfs_create_link(from, to, kobject_name(to));
}

static void del_symlink(struct kobject *from, struct kobject *to)
{
	sysfs_remove_link(from, kobject_name(to));
}

static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
	int ret;

	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
	if (ret)
		return ret;
	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
	if (ret)
		del_symlink(disk->slave_dir, bdev_kobj(bdev));
	return ret;
}

/**
 * bd_link_disk_holder - create symlinks between holding disk and slave bdev
 * @bdev: the claimed slave bdev
 * @disk: the holding disk
 *
 * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
 *
 * This functions creates the following sysfs symlinks.
 *
 * - from "slaves" directory of the holder @disk to the claimed @bdev
 * - from "holders" directory of the @bdev to the holder @disk
 *
 * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
 * passed to bd_link_disk_holder(), then:
 *
 *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
 *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
 *
 * The caller must have claimed @bdev before calling this function and
 * ensure that both @bdev and @disk are valid during the creation and
 * lifetime of these symlinks.
 *
 * CONTEXT:
 * Might sleep.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
	struct bd_holder_disk *holder;
	int ret = 0;

	mutex_lock(&disk->open_mutex);

	WARN_ON_ONCE(!bdev->bd_holder);

	/* FIXME: remove the following once add_disk() handles errors */
	if (WARN_ON(!bdev->bd_holder_dir))
		goto out_unlock;

	holder = bd_find_holder_disk(bdev, disk);
	if (holder) {
		holder->refcnt++;
		goto out_unlock;
	}

	holder = kzalloc(sizeof(*holder), GFP_KERNEL);
	if (!holder) {
		ret = -ENOMEM;
		goto out_unlock;
	}

	INIT_LIST_HEAD(&holder->list);
	holder->bdev = bdev;
	holder->refcnt = 1;
	if (disk->slave_dir) {
		ret = __link_disk_holder(bdev, disk);
		if (ret) {
			kfree(holder);
			goto out_unlock;
		}
	}

	list_add(&holder->list, &disk->slave_bdevs);
	/*
	 * del_gendisk drops the initial reference to bd_holder_dir, so we need
	 * to keep our own here to allow for cleanup past that point.
	 */
	kobject_get(bdev->bd_holder_dir);

out_unlock:
	mutex_unlock(&disk->open_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);

static void __unlink_disk_holder(struct block_device *bdev,
		struct gendisk *disk)
{
	del_symlink(disk->slave_dir, bdev_kobj(bdev));
	del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
}

/**
 * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
 * @bdev: the calimed slave bdev
 * @disk: the holding disk
 *
 * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
 *
 * CONTEXT:
 * Might sleep.
 */
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
	struct bd_holder_disk *holder;

	mutex_lock(&disk->open_mutex);
	holder = bd_find_holder_disk(bdev, disk);
	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
		if (disk->slave_dir)
			__unlink_disk_holder(bdev, disk);
		kobject_put(bdev->bd_holder_dir);
		list_del_init(&holder->list);
		kfree(holder);
	}
	mutex_unlock(&disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);

int bd_register_pending_holders(struct gendisk *disk)
{
	struct bd_holder_disk *holder;
	int ret;

	mutex_lock(&disk->open_mutex);
	list_for_each_entry(holder, &disk->slave_bdevs, list) {
		ret = __link_disk_holder(holder->bdev, disk);
		if (ret)
			goto out_undo;
	}
	mutex_unlock(&disk->open_mutex);
	return 0;

out_undo:
	list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
		__unlink_disk_holder(holder->bdev, disk);
	mutex_unlock(&disk->open_mutex);
	return ret;
}
back to top