https://github.com/torvalds/linux
Raw File
Tip revision: e8f897f4afef0031fe618a8e94127a0934896aba authored by Linus Torvalds on 10 March 2024, 20:38:09 UTC
Linux 6.8
Tip revision: e8f897f
mtty.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Mediated virtual PCI serial host device driver
 *
 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
 *     Author: Neo Jia <cjia@nvidia.com>
 *             Kirti Wankhede <kwankhede@nvidia.com>
 *
 * Sample driver that creates mdev device that simulates serial port over PCI
 * card.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/serial.h>
#include <uapi/linux/serial_reg.h>
#include <linux/eventfd.h>
#include <linux/anon_inodes.h>

/*
 * #defines
 */

#define VERSION_STRING  "0.1"
#define DRIVER_AUTHOR   "NVIDIA Corporation"

#define MTTY_CLASS_NAME "mtty"

#define MTTY_NAME       "mtty"

#define MTTY_STRING_LEN		16

#define MTTY_CONFIG_SPACE_SIZE  0xff
#define MTTY_IO_BAR_SIZE        0x8
#define MTTY_MMIO_BAR_SIZE      0x100000

#define STORE_LE16(addr, val)   (*(u16 *)addr = val)
#define STORE_LE32(addr, val)   (*(u32 *)addr = val)

#define MAX_FIFO_SIZE   16

#define CIRCULAR_BUF_INC_IDX(idx)    (idx = (idx + 1) & (MAX_FIFO_SIZE - 1))

#define MTTY_VFIO_PCI_OFFSET_SHIFT   40

#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> MTTY_VFIO_PCI_OFFSET_SHIFT)
#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) \
				((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT)
#define MTTY_VFIO_PCI_OFFSET_MASK    \
				(((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_MTTYS	24

/*
 * Global Structures
 */

static struct mtty_dev {
	dev_t		vd_devt;
	struct class	*vd_class;
	struct cdev	vd_cdev;
	struct idr	vd_idr;
	struct device	dev;
	struct mdev_parent parent;
} mtty_dev;

struct mdev_region_info {
	u64 start;
	u64 phys_start;
	u32 size;
	u64 vfio_offset;
};

#if defined(DEBUG_REGS)
static const char *wr_reg[] = {
	"TX",
	"IER",
	"FCR",
	"LCR",
	"MCR",
	"LSR",
	"MSR",
	"SCR"
};

static const char *rd_reg[] = {
	"RX",
	"IER",
	"IIR",
	"LCR",
	"MCR",
	"LSR",
	"MSR",
	"SCR"
};
#endif

/* loop back buffer */
struct rxtx {
	u8 fifo[MAX_FIFO_SIZE];
	u8 head, tail;
	u8 count;
};

struct serial_port {
	u8 uart_reg[8];         /* 8 registers */
	struct rxtx rxtx;       /* loop back buffer */
	bool dlab;
	bool overrun;
	u16 divisor;
	u8 fcr;                 /* FIFO control register */
	u8 max_fifo_size;
	u8 intr_trigger_level;  /* interrupt trigger level */
};

struct mtty_data {
	u64 magic;
#define MTTY_MAGIC 0x7e9d09898c3e2c4e /* Nothing clever, just random */
	u32 major_ver;
#define MTTY_MAJOR_VER 1
	u32 minor_ver;
#define MTTY_MINOR_VER 0
	u32 nr_ports;
	u32 flags;
	struct serial_port ports[2];
};

struct mdev_state;

struct mtty_migration_file {
	struct file *filp;
	struct mutex lock;
	struct mdev_state *mdev_state;
	struct mtty_data data;
	ssize_t filled_size;
	u8 disabled:1;
};

/* State of each mdev device */
struct mdev_state {
	struct vfio_device vdev;
	struct eventfd_ctx *intx_evtfd;
	struct eventfd_ctx *msi_evtfd;
	int irq_index;
	u8 *vconfig;
	struct mutex ops_lock;
	struct mdev_device *mdev;
	struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
	u32 bar_mask[VFIO_PCI_NUM_REGIONS];
	struct list_head next;
	struct serial_port s[2];
	struct mutex rxtx_lock;
	struct vfio_device_info dev_info;
	int nr_ports;
	enum vfio_device_mig_state state;
	struct mutex state_mutex;
	struct mutex reset_mutex;
	struct mtty_migration_file *saving_migf;
	struct mtty_migration_file *resuming_migf;
	u8 deferred_reset:1;
	u8 intx_mask:1;
};

static struct mtty_type {
	struct mdev_type type;
	int nr_ports;
} mtty_types[2] = {
	{ .nr_ports = 1, .type.sysfs_name = "1",
	  .type.pretty_name = "Single port serial" },
	{ .nr_ports = 2, .type.sysfs_name = "2",
	  .type.pretty_name = "Dual port serial" },
};

static struct mdev_type *mtty_mdev_types[] = {
	&mtty_types[0].type,
	&mtty_types[1].type,
};

static atomic_t mdev_avail_ports = ATOMIC_INIT(MAX_MTTYS);

static const struct file_operations vd_fops = {
	.owner          = THIS_MODULE,
};

static const struct vfio_device_ops mtty_dev_ops;

/* Helper functions */

static void dump_buffer(u8 *buf, uint32_t count)
{
#if defined(DEBUG)
	int i;

	pr_info("Buffer:\n");
	for (i = 0; i < count; i++) {
		pr_info("%2x ", *(buf + i));
		if ((i + 1) % 16 == 0)
			pr_info("\n");
	}
#endif
}

static bool is_intx(struct mdev_state *mdev_state)
{
	return mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX;
}

static bool is_msi(struct mdev_state *mdev_state)
{
	return mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX;
}

static bool is_noirq(struct mdev_state *mdev_state)
{
	return !is_intx(mdev_state) && !is_msi(mdev_state);
}

static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
{
	lockdep_assert_held(&mdev_state->ops_lock);

	if (is_msi(mdev_state)) {
		if (mdev_state->msi_evtfd)
			eventfd_signal(mdev_state->msi_evtfd);
	} else if (is_intx(mdev_state)) {
		if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
			eventfd_signal(mdev_state->intx_evtfd);
			mdev_state->intx_mask = true;
		}
	}
}

static void mtty_create_config_space(struct mdev_state *mdev_state)
{
	/* PCI dev ID */
	STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348);

	/* Control: I/O+, Mem-, BusMaster- */
	STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001);

	/* Status: capabilities list absent */
	STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200);

	/* Rev ID */
	mdev_state->vconfig[0x8] =  0x10;

	/* programming interface class : 16550-compatible serial controller */
	mdev_state->vconfig[0x9] =  0x02;

	/* Sub class : 00 */
	mdev_state->vconfig[0xa] =  0x00;

	/* Base class : Simple Communication controllers */
	mdev_state->vconfig[0xb] =  0x07;

	/* base address registers */
	/* BAR0: IO space */
	STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001);
	mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1;

	if (mdev_state->nr_ports == 2) {
		/* BAR1: IO space */
		STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001);
		mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1;
	}

	/* Subsystem ID */
	STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348);

	mdev_state->vconfig[0x34] =  0x00;   /* Cap Ptr */
	mdev_state->vconfig[0x3d] =  0x01;   /* interrupt pin (INTA#) */

	/* Vendor specific data */
	mdev_state->vconfig[0x40] =  0x23;
	mdev_state->vconfig[0x43] =  0x80;
	mdev_state->vconfig[0x44] =  0x23;
	mdev_state->vconfig[0x48] =  0x23;
	mdev_state->vconfig[0x4c] =  0x23;

	mdev_state->vconfig[0x60] =  0x50;
	mdev_state->vconfig[0x61] =  0x43;
	mdev_state->vconfig[0x62] =  0x49;
	mdev_state->vconfig[0x63] =  0x20;
	mdev_state->vconfig[0x64] =  0x53;
	mdev_state->vconfig[0x65] =  0x65;
	mdev_state->vconfig[0x66] =  0x72;
	mdev_state->vconfig[0x67] =  0x69;
	mdev_state->vconfig[0x68] =  0x61;
	mdev_state->vconfig[0x69] =  0x6c;
	mdev_state->vconfig[0x6a] =  0x2f;
	mdev_state->vconfig[0x6b] =  0x55;
	mdev_state->vconfig[0x6c] =  0x41;
	mdev_state->vconfig[0x6d] =  0x52;
	mdev_state->vconfig[0x6e] =  0x54;
}

static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
				 u8 *buf, u32 count)
{
	u32 cfg_addr, bar_mask, bar_index = 0;

	switch (offset) {
	case 0x04: /* device control */
	case 0x06: /* device status */
		/* do nothing */
		break;
	case 0x3c:  /* interrupt line */
		mdev_state->vconfig[0x3c] = buf[0];
		break;
	case 0x3d:
		/*
		 * Interrupt Pin is hardwired to INTA.
		 * This field is write protected by hardware
		 */
		break;
	case 0x10:  /* BAR0 */
	case 0x14:  /* BAR1 */
		if (offset == 0x10)
			bar_index = 0;
		else if (offset == 0x14)
			bar_index = 1;

		if ((mdev_state->nr_ports == 1) && (bar_index == 1)) {
			STORE_LE32(&mdev_state->vconfig[offset], 0);
			break;
		}

		cfg_addr = *(u32 *)buf;
		pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr);

		if (cfg_addr == 0xffffffff) {
			bar_mask = mdev_state->bar_mask[bar_index];
			cfg_addr = (cfg_addr & bar_mask);
		}

		cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul);
		STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
		break;
	case 0x18:  /* BAR2 */
	case 0x1c:  /* BAR3 */
	case 0x20:  /* BAR4 */
		STORE_LE32(&mdev_state->vconfig[offset], 0);
		break;
	default:
		pr_info("PCI config write @0x%x of %d bytes not handled\n",
			offset, count);
		break;
	}
}

static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
				u16 offset, u8 *buf, u32 count)
{
	u8 data = *buf;

	/* Handle data written by guest */
	switch (offset) {
	case UART_TX:
		/* if DLAB set, data is LSB of divisor */
		if (mdev_state->s[index].dlab) {
			mdev_state->s[index].divisor |= data;
			break;
		}

		mutex_lock(&mdev_state->rxtx_lock);

		/* save in TX buffer */
		if (mdev_state->s[index].rxtx.count <
				mdev_state->s[index].max_fifo_size) {
			mdev_state->s[index].rxtx.fifo[
					mdev_state->s[index].rxtx.head] = data;
			mdev_state->s[index].rxtx.count++;
			CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head);
			mdev_state->s[index].overrun = false;

			/*
			 * Trigger interrupt if receive data interrupt is
			 * enabled and fifo reached trigger level
			 */
			if ((mdev_state->s[index].uart_reg[UART_IER] &
						UART_IER_RDI) &&
			   (mdev_state->s[index].rxtx.count ==
				    mdev_state->s[index].intr_trigger_level)) {
				/* trigger interrupt */
#if defined(DEBUG_INTR)
				pr_err("Serial port %d: Fifo level trigger\n",
					index);
#endif
				mtty_trigger_interrupt(mdev_state);
			}
		} else {
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: Buffer Overflow\n", index);
#endif
			mdev_state->s[index].overrun = true;

			/*
			 * Trigger interrupt if receiver line status interrupt
			 * is enabled
			 */
			if (mdev_state->s[index].uart_reg[UART_IER] &
								UART_IER_RLSI)
				mtty_trigger_interrupt(mdev_state);
		}
		mutex_unlock(&mdev_state->rxtx_lock);
		break;

	case UART_IER:
		/* if DLAB set, data is MSB of divisor */
		if (mdev_state->s[index].dlab)
			mdev_state->s[index].divisor |= (u16)data << 8;
		else {
			mdev_state->s[index].uart_reg[offset] = data;
			mutex_lock(&mdev_state->rxtx_lock);
			if ((data & UART_IER_THRI) &&
			    (mdev_state->s[index].rxtx.head ==
					mdev_state->s[index].rxtx.tail)) {
#if defined(DEBUG_INTR)
				pr_err("Serial port %d: IER_THRI write\n",
					index);
#endif
				mtty_trigger_interrupt(mdev_state);
			}

			mutex_unlock(&mdev_state->rxtx_lock);
		}

		break;

	case UART_FCR:
		mdev_state->s[index].fcr = data;

		mutex_lock(&mdev_state->rxtx_lock);
		if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) {
			/* clear loop back FIFO */
			mdev_state->s[index].rxtx.count = 0;
			mdev_state->s[index].rxtx.head = 0;
			mdev_state->s[index].rxtx.tail = 0;
		}
		mutex_unlock(&mdev_state->rxtx_lock);

		switch (data & UART_FCR_TRIGGER_MASK) {
		case UART_FCR_TRIGGER_1:
			mdev_state->s[index].intr_trigger_level = 1;
			break;

		case UART_FCR_TRIGGER_4:
			mdev_state->s[index].intr_trigger_level = 4;
			break;

		case UART_FCR_TRIGGER_8:
			mdev_state->s[index].intr_trigger_level = 8;
			break;

		case UART_FCR_TRIGGER_14:
			mdev_state->s[index].intr_trigger_level = 14;
			break;
		}

		/*
		 * Set trigger level to 1 otherwise or  implement timer with
		 * timeout of 4 characters and on expiring that timer set
		 * Recevice data timeout in IIR register
		 */
		mdev_state->s[index].intr_trigger_level = 1;
		if (data & UART_FCR_ENABLE_FIFO)
			mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE;
		else {
			mdev_state->s[index].max_fifo_size = 1;
			mdev_state->s[index].intr_trigger_level = 1;
		}

		break;

	case UART_LCR:
		if (data & UART_LCR_DLAB) {
			mdev_state->s[index].dlab = true;
			mdev_state->s[index].divisor = 0;
		} else
			mdev_state->s[index].dlab = false;

		mdev_state->s[index].uart_reg[offset] = data;
		break;

	case UART_MCR:
		mdev_state->s[index].uart_reg[offset] = data;

		if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
				(data & UART_MCR_OUT2)) {
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: MCR_OUT2 write\n", index);
#endif
			mtty_trigger_interrupt(mdev_state);
		}

		if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
				(data & (UART_MCR_RTS | UART_MCR_DTR))) {
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: MCR RTS/DTR write\n", index);
#endif
			mtty_trigger_interrupt(mdev_state);
		}
		break;

	case UART_LSR:
	case UART_MSR:
		/* do nothing */
		break;

	case UART_SCR:
		mdev_state->s[index].uart_reg[offset] = data;
		break;

	default:
		break;
	}
}

static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
			    u16 offset, u8 *buf, u32 count)
{
	/* Handle read requests by guest */
	switch (offset) {
	case UART_RX:
		/* if DLAB set, data is LSB of divisor */
		if (mdev_state->s[index].dlab) {
			*buf  = (u8)mdev_state->s[index].divisor;
			break;
		}

		mutex_lock(&mdev_state->rxtx_lock);
		/* return data in tx buffer */
		if (mdev_state->s[index].rxtx.head !=
				 mdev_state->s[index].rxtx.tail) {
			*buf = mdev_state->s[index].rxtx.fifo[
						mdev_state->s[index].rxtx.tail];
			mdev_state->s[index].rxtx.count--;
			CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail);
		}

		if (mdev_state->s[index].rxtx.head ==
				mdev_state->s[index].rxtx.tail) {
		/*
		 *  Trigger interrupt if tx buffer empty interrupt is
		 *  enabled and fifo is empty
		 */
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: Buffer Empty\n", index);
#endif
			if (mdev_state->s[index].uart_reg[UART_IER] &
							 UART_IER_THRI)
				mtty_trigger_interrupt(mdev_state);
		}
		mutex_unlock(&mdev_state->rxtx_lock);

		break;

	case UART_IER:
		if (mdev_state->s[index].dlab) {
			*buf = (u8)(mdev_state->s[index].divisor >> 8);
			break;
		}
		*buf = mdev_state->s[index].uart_reg[offset] & 0x0f;
		break;

	case UART_IIR:
	{
		u8 ier = mdev_state->s[index].uart_reg[UART_IER];
		*buf = 0;

		mutex_lock(&mdev_state->rxtx_lock);
		/* Interrupt priority 1: Parity, overrun, framing or break */
		if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun)
			*buf |= UART_IIR_RLSI;

		/* Interrupt priority 2: Fifo trigger level reached */
		if ((ier & UART_IER_RDI) &&
		    (mdev_state->s[index].rxtx.count >=
		      mdev_state->s[index].intr_trigger_level))
			*buf |= UART_IIR_RDI;

		/* Interrupt priotiry 3: transmitter holding register empty */
		if ((ier & UART_IER_THRI) &&
		    (mdev_state->s[index].rxtx.head ==
				mdev_state->s[index].rxtx.tail))
			*buf |= UART_IIR_THRI;

		/* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD  */
		if ((ier & UART_IER_MSI) &&
		    (mdev_state->s[index].uart_reg[UART_MCR] &
				 (UART_MCR_RTS | UART_MCR_DTR)))
			*buf |= UART_IIR_MSI;

		/* bit0: 0=> interrupt pending, 1=> no interrupt is pending */
		if (*buf == 0)
			*buf = UART_IIR_NO_INT;

		/* set bit 6 & 7 to be 16550 compatible */
		*buf |= 0xC0;
		mutex_unlock(&mdev_state->rxtx_lock);
	}
	break;

	case UART_LCR:
	case UART_MCR:
		*buf = mdev_state->s[index].uart_reg[offset];
		break;

	case UART_LSR:
	{
		u8 lsr = 0;

		mutex_lock(&mdev_state->rxtx_lock);
		/* atleast one char in FIFO */
		if (mdev_state->s[index].rxtx.head !=
				 mdev_state->s[index].rxtx.tail)
			lsr |= UART_LSR_DR;

		/* if FIFO overrun */
		if (mdev_state->s[index].overrun)
			lsr |= UART_LSR_OE;

		/* transmit FIFO empty and tramsitter empty */
		if (mdev_state->s[index].rxtx.head ==
				 mdev_state->s[index].rxtx.tail)
			lsr |= UART_LSR_TEMT | UART_LSR_THRE;

		mutex_unlock(&mdev_state->rxtx_lock);
		*buf = lsr;
		break;
	}
	case UART_MSR:
		*buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD;

		mutex_lock(&mdev_state->rxtx_lock);
		/* if AFE is 1 and FIFO have space, set CTS bit */
		if (mdev_state->s[index].uart_reg[UART_MCR] &
						 UART_MCR_AFE) {
			if (mdev_state->s[index].rxtx.count <
					mdev_state->s[index].max_fifo_size)
				*buf |= UART_MSR_CTS | UART_MSR_DCTS;
		} else
			*buf |= UART_MSR_CTS | UART_MSR_DCTS;
		mutex_unlock(&mdev_state->rxtx_lock);

		break;

	case UART_SCR:
		*buf = mdev_state->s[index].uart_reg[offset];
		break;

	default:
		break;
	}
}

static void mdev_read_base(struct mdev_state *mdev_state)
{
	int index, pos;
	u32 start_lo, start_hi;
	u32 mem_type;

	pos = PCI_BASE_ADDRESS_0;

	for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {

		if (!mdev_state->region_info[index].size)
			continue;

		start_lo = (*(u32 *)(mdev_state->vconfig + pos)) &
			PCI_BASE_ADDRESS_MEM_MASK;
		mem_type = (*(u32 *)(mdev_state->vconfig + pos)) &
			PCI_BASE_ADDRESS_MEM_TYPE_MASK;

		switch (mem_type) {
		case PCI_BASE_ADDRESS_MEM_TYPE_64:
			start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4));
			pos += 4;
			break;
		case PCI_BASE_ADDRESS_MEM_TYPE_32:
		case PCI_BASE_ADDRESS_MEM_TYPE_1M:
			/* 1M mem BAR treated as 32-bit BAR */
		default:
			/* mem unknown type treated as 32-bit BAR */
			start_hi = 0;
			break;
		}
		pos += 4;
		mdev_state->region_info[index].start = ((u64)start_hi << 32) |
							start_lo;
	}
}

static ssize_t mdev_access(struct mdev_state *mdev_state, u8 *buf, size_t count,
			   loff_t pos, bool is_write)
{
	unsigned int index;
	loff_t offset;
	int ret = 0;

	if (!buf)
		return -EINVAL;

	mutex_lock(&mdev_state->ops_lock);

	index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
	offset = pos & MTTY_VFIO_PCI_OFFSET_MASK;
	switch (index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:

#if defined(DEBUG)
		pr_info("%s: PCI config space %s at offset 0x%llx\n",
			 __func__, is_write ? "write" : "read", offset);
#endif
		if (is_write) {
			dump_buffer(buf, count);
			handle_pci_cfg_write(mdev_state, offset, buf, count);
		} else {
			memcpy(buf, (mdev_state->vconfig + offset), count);
			dump_buffer(buf, count);
		}

		break;

	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
		if (!mdev_state->region_info[index].start)
			mdev_read_base(mdev_state);

		if (is_write) {
			dump_buffer(buf, count);

#if defined(DEBUG_REGS)
			pr_info("%s: BAR%d  WR @0x%llx %s val:0x%02x dlab:%d\n",
				__func__, index, offset, wr_reg[offset],
				*buf, mdev_state->s[index].dlab);
#endif
			handle_bar_write(index, mdev_state, offset, buf, count);
		} else {
			handle_bar_read(index, mdev_state, offset, buf, count);
			dump_buffer(buf, count);

#if defined(DEBUG_REGS)
			pr_info("%s: BAR%d  RD @0x%llx %s val:0x%02x dlab:%d\n",
				__func__, index, offset, rd_reg[offset],
				*buf, mdev_state->s[index].dlab);
#endif
		}
		break;

	default:
		ret = -1;
		goto accessfailed;
	}

	ret = count;


accessfailed:
	mutex_unlock(&mdev_state->ops_lock);

	return ret;
}

static size_t mtty_data_size(struct mdev_state *mdev_state)
{
	return offsetof(struct mtty_data, ports) +
		(mdev_state->nr_ports * sizeof(struct serial_port));
}

static void mtty_disable_file(struct mtty_migration_file *migf)
{
	mutex_lock(&migf->lock);
	migf->disabled = true;
	migf->filled_size = 0;
	migf->filp->f_pos = 0;
	mutex_unlock(&migf->lock);
}

static void mtty_disable_files(struct mdev_state *mdev_state)
{
	if (mdev_state->saving_migf) {
		mtty_disable_file(mdev_state->saving_migf);
		fput(mdev_state->saving_migf->filp);
		mdev_state->saving_migf = NULL;
	}

	if (mdev_state->resuming_migf) {
		mtty_disable_file(mdev_state->resuming_migf);
		fput(mdev_state->resuming_migf->filp);
		mdev_state->resuming_migf = NULL;
	}
}

static void mtty_state_mutex_unlock(struct mdev_state *mdev_state)
{
again:
	mutex_lock(&mdev_state->reset_mutex);
	if (mdev_state->deferred_reset) {
		mdev_state->deferred_reset = false;
		mutex_unlock(&mdev_state->reset_mutex);
		mdev_state->state = VFIO_DEVICE_STATE_RUNNING;
		mtty_disable_files(mdev_state);
		goto again;
	}
	mutex_unlock(&mdev_state->state_mutex);
	mutex_unlock(&mdev_state->reset_mutex);
}

static int mtty_release_migf(struct inode *inode, struct file *filp)
{
	struct mtty_migration_file *migf = filp->private_data;

	mtty_disable_file(migf);
	mutex_destroy(&migf->lock);
	kfree(migf);

	return 0;
}

static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd,
			       unsigned long arg)
{
	struct mtty_migration_file *migf = filp->private_data;
	struct mdev_state *mdev_state = migf->mdev_state;
	loff_t *pos = &filp->f_pos;
	struct vfio_precopy_info info = {};
	unsigned long minsz;
	int ret;

	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
		return -ENOTTY;

	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);

	if (copy_from_user(&info, (void __user *)arg, minsz))
		return -EFAULT;
	if (info.argsz < minsz)
		return -EINVAL;

	mutex_lock(&mdev_state->state_mutex);
	if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY &&
	    mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
		ret = -EINVAL;
		goto unlock;
	}

	mutex_lock(&migf->lock);

	if (migf->disabled) {
		mutex_unlock(&migf->lock);
		ret = -ENODEV;
		goto unlock;
	}

	if (*pos > migf->filled_size) {
		mutex_unlock(&migf->lock);
		ret = -EINVAL;
		goto unlock;
	}

	info.dirty_bytes = 0;
	info.initial_bytes = migf->filled_size - *pos;
	mutex_unlock(&migf->lock);

	ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
unlock:
	mtty_state_mutex_unlock(mdev_state);
	return ret;
}

static ssize_t mtty_save_read(struct file *filp, char __user *buf,
			      size_t len, loff_t *pos)
{
	struct mtty_migration_file *migf = filp->private_data;
	ssize_t ret = 0;

	if (pos)
		return -ESPIPE;

	pos = &filp->f_pos;

	mutex_lock(&migf->lock);

	dev_dbg(migf->mdev_state->vdev.dev, "%s ask %zu\n", __func__, len);

	if (migf->disabled) {
		ret = -ENODEV;
		goto out_unlock;
	}

	if (*pos > migf->filled_size) {
		ret = -EINVAL;
		goto out_unlock;
	}

	len = min_t(size_t, migf->filled_size - *pos, len);
	if (len) {
		if (copy_to_user(buf, (void *)&migf->data + *pos, len)) {
			ret = -EFAULT;
			goto out_unlock;
		}
		*pos += len;
		ret = len;
	}
out_unlock:
	dev_dbg(migf->mdev_state->vdev.dev, "%s read %zu\n", __func__, ret);
	mutex_unlock(&migf->lock);
	return ret;
}

static const struct file_operations mtty_save_fops = {
	.owner = THIS_MODULE,
	.read = mtty_save_read,
	.unlocked_ioctl = mtty_precopy_ioctl,
	.compat_ioctl = compat_ptr_ioctl,
	.release = mtty_release_migf,
	.llseek = no_llseek,
};

static void mtty_save_state(struct mdev_state *mdev_state)
{
	struct mtty_migration_file *migf = mdev_state->saving_migf;
	int i;

	mutex_lock(&migf->lock);
	for (i = 0; i < mdev_state->nr_ports; i++) {
		memcpy(&migf->data.ports[i],
			&mdev_state->s[i], sizeof(struct serial_port));
		migf->filled_size += sizeof(struct serial_port);
	}
	dev_dbg(mdev_state->vdev.dev,
		"%s filled to %zu\n", __func__, migf->filled_size);
	mutex_unlock(&migf->lock);
}

static int mtty_load_state(struct mdev_state *mdev_state)
{
	struct mtty_migration_file *migf = mdev_state->resuming_migf;
	int i;

	mutex_lock(&migf->lock);
	/* magic and version already tested by resume write fn */
	if (migf->filled_size < mtty_data_size(mdev_state)) {
		dev_dbg(mdev_state->vdev.dev, "%s expected %zu, got %zu\n",
			__func__, mtty_data_size(mdev_state),
			migf->filled_size);
		mutex_unlock(&migf->lock);
		return -EINVAL;
	}

	for (i = 0; i < mdev_state->nr_ports; i++)
		memcpy(&mdev_state->s[i],
		       &migf->data.ports[i], sizeof(struct serial_port));

	mutex_unlock(&migf->lock);
	return 0;
}

static struct mtty_migration_file *
mtty_save_device_data(struct mdev_state *mdev_state,
		      enum vfio_device_mig_state state)
{
	struct mtty_migration_file *migf = mdev_state->saving_migf;
	struct mtty_migration_file *ret = NULL;

	if (migf) {
		if (state == VFIO_DEVICE_STATE_STOP_COPY)
			goto fill_data;
		return ret;
	}

	migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
	if (!migf)
		return ERR_PTR(-ENOMEM);

	migf->filp = anon_inode_getfile("mtty_mig", &mtty_save_fops,
					migf, O_RDONLY);
	if (IS_ERR(migf->filp)) {
		int rc = PTR_ERR(migf->filp);

		kfree(migf);
		return ERR_PTR(rc);
	}

	stream_open(migf->filp->f_inode, migf->filp);
	mutex_init(&migf->lock);
	migf->mdev_state = mdev_state;

	migf->data.magic = MTTY_MAGIC;
	migf->data.major_ver = MTTY_MAJOR_VER;
	migf->data.minor_ver = MTTY_MINOR_VER;
	migf->data.nr_ports = mdev_state->nr_ports;

	migf->filled_size = offsetof(struct mtty_data, ports);

	dev_dbg(mdev_state->vdev.dev, "%s filled header to %zu\n",
		__func__, migf->filled_size);

	ret = mdev_state->saving_migf = migf;

fill_data:
	if (state == VFIO_DEVICE_STATE_STOP_COPY)
		mtty_save_state(mdev_state);

	return ret;
}

static ssize_t mtty_resume_write(struct file *filp, const char __user *buf,
				 size_t len, loff_t *pos)
{
	struct mtty_migration_file *migf = filp->private_data;
	struct mdev_state *mdev_state = migf->mdev_state;
	loff_t requested_length;
	ssize_t ret = 0;

	if (pos)
		return -ESPIPE;

	pos = &filp->f_pos;

	if (*pos < 0 ||
	    check_add_overflow((loff_t)len, *pos, &requested_length))
		return -EINVAL;

	if (requested_length > mtty_data_size(mdev_state))
		return -ENOMEM;

	mutex_lock(&migf->lock);

	if (migf->disabled) {
		ret = -ENODEV;
		goto out_unlock;
	}

	if (copy_from_user((void *)&migf->data + *pos, buf, len)) {
		ret = -EFAULT;
		goto out_unlock;
	}

	*pos += len;
	ret = len;

	dev_dbg(migf->mdev_state->vdev.dev, "%s received %zu, total %zu\n",
		__func__, len, migf->filled_size + len);

	if (migf->filled_size < offsetof(struct mtty_data, ports) &&
	    migf->filled_size + len >= offsetof(struct mtty_data, ports)) {
		if (migf->data.magic != MTTY_MAGIC || migf->data.flags ||
		    migf->data.major_ver != MTTY_MAJOR_VER ||
		    migf->data.minor_ver != MTTY_MINOR_VER ||
		    migf->data.nr_ports != mdev_state->nr_ports) {
			dev_dbg(migf->mdev_state->vdev.dev,
				"%s failed validation\n", __func__);
			ret = -EFAULT;
		} else {
			dev_dbg(migf->mdev_state->vdev.dev,
				"%s header validated\n", __func__);
		}
	}

	migf->filled_size += len;

out_unlock:
	mutex_unlock(&migf->lock);
	return ret;
}

static const struct file_operations mtty_resume_fops = {
	.owner = THIS_MODULE,
	.write = mtty_resume_write,
	.release = mtty_release_migf,
	.llseek = no_llseek,
};

static struct mtty_migration_file *
mtty_resume_device_data(struct mdev_state *mdev_state)
{
	struct mtty_migration_file *migf;
	int ret;

	migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
	if (!migf)
		return ERR_PTR(-ENOMEM);

	migf->filp = anon_inode_getfile("mtty_mig", &mtty_resume_fops,
					migf, O_WRONLY);
	if (IS_ERR(migf->filp)) {
		ret = PTR_ERR(migf->filp);
		kfree(migf);
		return ERR_PTR(ret);
	}

	stream_open(migf->filp->f_inode, migf->filp);
	mutex_init(&migf->lock);
	migf->mdev_state = mdev_state;

	mdev_state->resuming_migf = migf;

	return migf;
}

static struct file *mtty_step_state(struct mdev_state *mdev_state,
				     enum vfio_device_mig_state new)
{
	enum vfio_device_mig_state cur = mdev_state->state;

	dev_dbg(mdev_state->vdev.dev, "%s: %d -> %d\n", __func__, cur, new);

	/*
	 * The following state transitions are no-op considering
	 * mtty does not do DMA nor require any explicit start/stop.
	 *
	 *         RUNNING -> RUNNING_P2P
	 *         RUNNING_P2P -> RUNNING
	 *         RUNNING_P2P -> STOP
	 *         PRE_COPY -> PRE_COPY_P2P
	 *         PRE_COPY_P2P -> PRE_COPY
	 *         STOP -> RUNNING_P2P
	 */
	if ((cur == VFIO_DEVICE_STATE_RUNNING &&
	     new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
	     (new == VFIO_DEVICE_STATE_RUNNING ||
	      new == VFIO_DEVICE_STATE_STOP)) ||
	    (cur == VFIO_DEVICE_STATE_PRE_COPY &&
	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
	     new == VFIO_DEVICE_STATE_PRE_COPY) ||
	    (cur == VFIO_DEVICE_STATE_STOP &&
	     new == VFIO_DEVICE_STATE_RUNNING_P2P))
		return NULL;

	/*
	 * The following state transitions simply close migration files,
	 * with the exception of RESUMING -> STOP, which needs to load
	 * the state first.
	 *
	 *         RESUMING -> STOP
	 *         PRE_COPY -> RUNNING
	 *         PRE_COPY_P2P -> RUNNING_P2P
	 *         STOP_COPY -> STOP
	 */
	if (cur == VFIO_DEVICE_STATE_RESUMING &&
	    new == VFIO_DEVICE_STATE_STOP) {
		int ret;

		ret = mtty_load_state(mdev_state);
		if (ret)
			return ERR_PTR(ret);
		mtty_disable_files(mdev_state);
		return NULL;
	}

	if ((cur == VFIO_DEVICE_STATE_PRE_COPY &&
	     new == VFIO_DEVICE_STATE_RUNNING) ||
	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
	     new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
	    (cur == VFIO_DEVICE_STATE_STOP_COPY &&
	     new == VFIO_DEVICE_STATE_STOP)) {
		mtty_disable_files(mdev_state);
		return NULL;
	}

	/*
	 * The following state transitions return migration files.
	 *
	 *         RUNNING -> PRE_COPY
	 *         RUNNING_P2P -> PRE_COPY_P2P
	 *         STOP -> STOP_COPY
	 *         STOP -> RESUMING
	 *         PRE_COPY_P2P -> STOP_COPY
	 */
	if ((cur == VFIO_DEVICE_STATE_RUNNING &&
	     new == VFIO_DEVICE_STATE_PRE_COPY) ||
	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
	    (cur == VFIO_DEVICE_STATE_STOP &&
	     new == VFIO_DEVICE_STATE_STOP_COPY) ||
	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
	     new == VFIO_DEVICE_STATE_STOP_COPY)) {
		struct mtty_migration_file *migf;

		migf = mtty_save_device_data(mdev_state, new);
		if (IS_ERR(migf))
			return ERR_CAST(migf);

		if (migf) {
			get_file(migf->filp);

			return migf->filp;
		}
		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_STOP &&
	    new == VFIO_DEVICE_STATE_RESUMING) {
		struct mtty_migration_file *migf;

		migf = mtty_resume_device_data(mdev_state);
		if (IS_ERR(migf))
			return ERR_CAST(migf);

		get_file(migf->filp);

		return migf->filp;
	}

	/* vfio_mig_get_next_state() does not use arcs other than the above */
	WARN_ON(true);
	return ERR_PTR(-EINVAL);
}

static struct file *mtty_set_state(struct vfio_device *vdev,
				   enum vfio_device_mig_state new_state)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);
	struct file *ret = NULL;

	dev_dbg(vdev->dev, "%s -> %d\n", __func__, new_state);

	mutex_lock(&mdev_state->state_mutex);
	while (mdev_state->state != new_state) {
		enum vfio_device_mig_state next_state;
		int rc = vfio_mig_get_next_state(vdev, mdev_state->state,
						 new_state, &next_state);
		if (rc) {
			ret = ERR_PTR(rc);
			break;
		}

		ret = mtty_step_state(mdev_state, next_state);
		if (IS_ERR(ret))
			break;

		mdev_state->state = next_state;

		if (WARN_ON(ret && new_state != next_state)) {
			fput(ret);
			ret = ERR_PTR(-EINVAL);
			break;
		}
	}
	mtty_state_mutex_unlock(mdev_state);
	return ret;
}

static int mtty_get_state(struct vfio_device *vdev,
			  enum vfio_device_mig_state *current_state)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);

	mutex_lock(&mdev_state->state_mutex);
	*current_state = mdev_state->state;
	mtty_state_mutex_unlock(mdev_state);
	return 0;
}

static int mtty_get_data_size(struct vfio_device *vdev,
			      unsigned long *stop_copy_length)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);

	*stop_copy_length = mtty_data_size(mdev_state);
	return 0;
}

static const struct vfio_migration_ops mtty_migration_ops = {
	.migration_set_state = mtty_set_state,
	.migration_get_state = mtty_get_state,
	.migration_get_data_size = mtty_get_data_size,
};

static int mtty_log_start(struct vfio_device *vdev,
			  struct rb_root_cached *ranges,
			  u32 nnodes, u64 *page_size)
{
	return 0;
}

static int mtty_log_stop(struct vfio_device *vdev)
{
	return 0;
}

static int mtty_log_read_and_clear(struct vfio_device *vdev,
				   unsigned long iova, unsigned long length,
				   struct iova_bitmap *dirty)
{
	return 0;
}

static const struct vfio_log_ops mtty_log_ops = {
	.log_start = mtty_log_start,
	.log_stop = mtty_log_stop,
	.log_read_and_clear = mtty_log_read_and_clear,
};

static int mtty_init_dev(struct vfio_device *vdev)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);
	struct mdev_device *mdev = to_mdev_device(vdev->dev);
	struct mtty_type *type =
		container_of(mdev->type, struct mtty_type, type);
	int avail_ports = atomic_read(&mdev_avail_ports);
	int ret;

	do {
		if (avail_ports < type->nr_ports)
			return -ENOSPC;
	} while (!atomic_try_cmpxchg(&mdev_avail_ports,
				     &avail_ports,
				     avail_ports - type->nr_ports));

	mdev_state->nr_ports = type->nr_ports;
	mdev_state->irq_index = -1;
	mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
	mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
	mutex_init(&mdev_state->rxtx_lock);

	mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
	if (!mdev_state->vconfig) {
		ret = -ENOMEM;
		goto err_nr_ports;
	}

	mutex_init(&mdev_state->ops_lock);
	mdev_state->mdev = mdev;
	mtty_create_config_space(mdev_state);

	mutex_init(&mdev_state->state_mutex);
	mutex_init(&mdev_state->reset_mutex);
	vdev->migration_flags = VFIO_MIGRATION_STOP_COPY |
				VFIO_MIGRATION_P2P |
				VFIO_MIGRATION_PRE_COPY;
	vdev->mig_ops = &mtty_migration_ops;
	vdev->log_ops = &mtty_log_ops;
	mdev_state->state = VFIO_DEVICE_STATE_RUNNING;

	return 0;

err_nr_ports:
	atomic_add(type->nr_ports, &mdev_avail_ports);
	return ret;
}

static int mtty_probe(struct mdev_device *mdev)
{
	struct mdev_state *mdev_state;
	int ret;

	mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
				       &mtty_dev_ops);
	if (IS_ERR(mdev_state))
		return PTR_ERR(mdev_state);

	ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
	if (ret)
		goto err_put_vdev;
	dev_set_drvdata(&mdev->dev, mdev_state);
	return 0;

err_put_vdev:
	vfio_put_device(&mdev_state->vdev);
	return ret;
}

static void mtty_release_dev(struct vfio_device *vdev)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);

	mutex_destroy(&mdev_state->reset_mutex);
	mutex_destroy(&mdev_state->state_mutex);
	atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
	kfree(mdev_state->vconfig);
}

static void mtty_remove(struct mdev_device *mdev)
{
	struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);

	vfio_unregister_group_dev(&mdev_state->vdev);
	vfio_put_device(&mdev_state->vdev);
}

static int mtty_reset(struct mdev_state *mdev_state)
{
	pr_info("%s: called\n", __func__);

	mutex_lock(&mdev_state->reset_mutex);
	mdev_state->deferred_reset = true;
	if (!mutex_trylock(&mdev_state->state_mutex)) {
		mutex_unlock(&mdev_state->reset_mutex);
		return 0;
	}
	mutex_unlock(&mdev_state->reset_mutex);
	mtty_state_mutex_unlock(mdev_state);

	return 0;
}

static ssize_t mtty_read(struct vfio_device *vdev, char __user *buf,
			 size_t count, loff_t *ppos)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);
	unsigned int done = 0;
	int ret;

	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			ret =  mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					   *ppos, false);
			if (ret <= 0)
				goto read_err;

			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;

			filled = 4;
		} else if (count >= 2 && !(*ppos % 2)) {
			u16 val;

			ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					  *ppos, false);
			if (ret <= 0)
				goto read_err;

			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;

			filled = 2;
		} else {
			u8 val;

			ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					  *ppos, false);
			if (ret <= 0)
				goto read_err;

			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;

			filled = 1;
		}

		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	return done;

read_err:
	return -EFAULT;
}

static ssize_t mtty_write(struct vfio_device *vdev, const char __user *buf,
		   size_t count, loff_t *ppos)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);
	unsigned int done = 0;
	int ret;

	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;

			ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					  *ppos, true);
			if (ret <= 0)
				goto write_err;

			filled = 4;
		} else if (count >= 2 && !(*ppos % 2)) {
			u16 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;

			ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					  *ppos, true);
			if (ret <= 0)
				goto write_err;

			filled = 2;
		} else {
			u8 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;

			ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
					  *ppos, true);
			if (ret <= 0)
				goto write_err;

			filled = 1;
		}
		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	return done;
write_err:
	return -EFAULT;
}

static void mtty_disable_intx(struct mdev_state *mdev_state)
{
	if (mdev_state->intx_evtfd) {
		eventfd_ctx_put(mdev_state->intx_evtfd);
		mdev_state->intx_evtfd = NULL;
		mdev_state->intx_mask = false;
		mdev_state->irq_index = -1;
	}
}

static void mtty_disable_msi(struct mdev_state *mdev_state)
{
	if (mdev_state->msi_evtfd) {
		eventfd_ctx_put(mdev_state->msi_evtfd);
		mdev_state->msi_evtfd = NULL;
		mdev_state->irq_index = -1;
	}
}

static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
			 unsigned int index, unsigned int start,
			 unsigned int count, void *data)
{
	int ret = 0;

	mutex_lock(&mdev_state->ops_lock);
	switch (index) {
	case VFIO_PCI_INTX_IRQ_INDEX:
		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
		case VFIO_IRQ_SET_ACTION_MASK:
			if (!is_intx(mdev_state) || start != 0 || count != 1) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_NONE) {
				mdev_state->intx_mask = true;
			} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
				uint8_t mask = *(uint8_t *)data;

				if (mask)
					mdev_state->intx_mask = true;
			} else if (flags &  VFIO_IRQ_SET_DATA_EVENTFD) {
				ret = -ENOTTY; /* No support for mask fd */
			}
			break;
		case VFIO_IRQ_SET_ACTION_UNMASK:
			if (!is_intx(mdev_state) || start != 0 || count != 1) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_NONE) {
				mdev_state->intx_mask = false;
			} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
				uint8_t mask = *(uint8_t *)data;

				if (mask)
					mdev_state->intx_mask = false;
			} else if (flags &  VFIO_IRQ_SET_DATA_EVENTFD) {
				ret = -ENOTTY; /* No support for unmask fd */
			}
			break;
		case VFIO_IRQ_SET_ACTION_TRIGGER:
			if (is_intx(mdev_state) && !count &&
			    (flags & VFIO_IRQ_SET_DATA_NONE)) {
				mtty_disable_intx(mdev_state);
				break;
			}

			if (!(is_intx(mdev_state) || is_noirq(mdev_state)) ||
			    start != 0 || count != 1) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
				int fd = *(int *)data;
				struct eventfd_ctx *evt;

				mtty_disable_intx(mdev_state);

				if (fd < 0)
					break;

				evt = eventfd_ctx_fdget(fd);
				if (IS_ERR(evt)) {
					ret = PTR_ERR(evt);
					break;
				}
				mdev_state->intx_evtfd = evt;
				mdev_state->irq_index = index;
				break;
			}

			if (!is_intx(mdev_state)) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_NONE) {
				mtty_trigger_interrupt(mdev_state);
			} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
				uint8_t trigger = *(uint8_t *)data;

				if (trigger)
					mtty_trigger_interrupt(mdev_state);
			}
			break;
		}
		break;
	case VFIO_PCI_MSI_IRQ_INDEX:
		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
		case VFIO_IRQ_SET_ACTION_MASK:
		case VFIO_IRQ_SET_ACTION_UNMASK:
			ret = -ENOTTY;
			break;
		case VFIO_IRQ_SET_ACTION_TRIGGER:
			if (is_msi(mdev_state) && !count &&
			    (flags & VFIO_IRQ_SET_DATA_NONE)) {
				mtty_disable_msi(mdev_state);
				break;
			}

			if (!(is_msi(mdev_state) || is_noirq(mdev_state)) ||
			    start != 0 || count != 1) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
				int fd = *(int *)data;
				struct eventfd_ctx *evt;

				mtty_disable_msi(mdev_state);

				if (fd < 0)
					break;

				evt = eventfd_ctx_fdget(fd);
				if (IS_ERR(evt)) {
					ret = PTR_ERR(evt);
					break;
				}
				mdev_state->msi_evtfd = evt;
				mdev_state->irq_index = index;
				break;
			}

			if (!is_msi(mdev_state)) {
				ret = -EINVAL;
				break;
			}

			if (flags & VFIO_IRQ_SET_DATA_NONE) {
				mtty_trigger_interrupt(mdev_state);
			} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
				uint8_t trigger = *(uint8_t *)data;

				if (trigger)
					mtty_trigger_interrupt(mdev_state);
			}
			break;
		}
		break;
	case VFIO_PCI_MSIX_IRQ_INDEX:
		dev_dbg(mdev_state->vdev.dev, "%s: MSIX_IRQ\n", __func__);
		ret = -ENOTTY;
		break;
	case VFIO_PCI_ERR_IRQ_INDEX:
		dev_dbg(mdev_state->vdev.dev, "%s: ERR_IRQ\n", __func__);
		ret = -ENOTTY;
		break;
	case VFIO_PCI_REQ_IRQ_INDEX:
		dev_dbg(mdev_state->vdev.dev, "%s: REQ_IRQ\n", __func__);
		ret = -ENOTTY;
		break;
	}

	mutex_unlock(&mdev_state->ops_lock);
	return ret;
}

static int mtty_get_region_info(struct mdev_state *mdev_state,
			 struct vfio_region_info *region_info,
			 u16 *cap_type_id, void **cap_type)
{
	unsigned int size = 0;
	u32 bar_index;

	bar_index = region_info->index;
	if (bar_index >= VFIO_PCI_NUM_REGIONS)
		return -EINVAL;

	mutex_lock(&mdev_state->ops_lock);

	switch (bar_index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:
		size = MTTY_CONFIG_SPACE_SIZE;
		break;
	case VFIO_PCI_BAR0_REGION_INDEX:
		size = MTTY_IO_BAR_SIZE;
		break;
	case VFIO_PCI_BAR1_REGION_INDEX:
		if (mdev_state->nr_ports == 2)
			size = MTTY_IO_BAR_SIZE;
		break;
	default:
		size = 0;
		break;
	}

	mdev_state->region_info[bar_index].size = size;
	mdev_state->region_info[bar_index].vfio_offset =
		MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);

	region_info->size = size;
	region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
	region_info->flags = VFIO_REGION_INFO_FLAG_READ |
		VFIO_REGION_INFO_FLAG_WRITE;
	mutex_unlock(&mdev_state->ops_lock);
	return 0;
}

static int mtty_get_irq_info(struct vfio_irq_info *irq_info)
{
	if (irq_info->index != VFIO_PCI_INTX_IRQ_INDEX &&
	    irq_info->index != VFIO_PCI_MSI_IRQ_INDEX)
		return -EINVAL;

	irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
	irq_info->count = 1;

	if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
		irq_info->flags |= VFIO_IRQ_INFO_MASKABLE |
				   VFIO_IRQ_INFO_AUTOMASKED;
	else
		irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;

	return 0;
}

static int mtty_get_device_info(struct vfio_device_info *dev_info)
{
	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;

	return 0;
}

static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
			unsigned long arg)
{
	struct mdev_state *mdev_state =
		container_of(vdev, struct mdev_state, vdev);
	int ret = 0;
	unsigned long minsz;

	switch (cmd) {
	case VFIO_DEVICE_GET_INFO:
	{
		struct vfio_device_info info;

		minsz = offsetofend(struct vfio_device_info, num_irqs);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;

		if (info.argsz < minsz)
			return -EINVAL;

		ret = mtty_get_device_info(&info);
		if (ret)
			return ret;

		memcpy(&mdev_state->dev_info, &info, sizeof(info));

		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;

		return 0;
	}
	case VFIO_DEVICE_GET_REGION_INFO:
	{
		struct vfio_region_info info;
		u16 cap_type_id = 0;
		void *cap_type = NULL;

		minsz = offsetofend(struct vfio_region_info, offset);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;

		if (info.argsz < minsz)
			return -EINVAL;

		ret = mtty_get_region_info(mdev_state, &info, &cap_type_id,
					   &cap_type);
		if (ret)
			return ret;

		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;

		return 0;
	}

	case VFIO_DEVICE_GET_IRQ_INFO:
	{
		struct vfio_irq_info info;

		minsz = offsetofend(struct vfio_irq_info, count);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;

		if ((info.argsz < minsz) ||
		    (info.index >= mdev_state->dev_info.num_irqs))
			return -EINVAL;

		ret = mtty_get_irq_info(&info);
		if (ret)
			return ret;

		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;

		return 0;
	}
	case VFIO_DEVICE_SET_IRQS:
	{
		struct vfio_irq_set hdr;
		u8 *data = NULL, *ptr = NULL;
		size_t data_size = 0;

		minsz = offsetofend(struct vfio_irq_set, count);

		if (copy_from_user(&hdr, (void __user *)arg, minsz))
			return -EFAULT;

		ret = vfio_set_irqs_validate_and_prepare(&hdr,
						mdev_state->dev_info.num_irqs,
						VFIO_PCI_NUM_IRQS,
						&data_size);
		if (ret)
			return ret;

		if (data_size) {
			ptr = data = memdup_user((void __user *)(arg + minsz),
						 data_size);
			if (IS_ERR(data))
				return PTR_ERR(data);
		}

		ret = mtty_set_irqs(mdev_state, hdr.flags, hdr.index, hdr.start,
				    hdr.count, data);

		kfree(ptr);
		return ret;
	}
	case VFIO_DEVICE_RESET:
		return mtty_reset(mdev_state);
	}
	return -ENOTTY;
}

static ssize_t
sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
		     char *buf)
{
	return sprintf(buf, "This is MDEV %s\n", dev_name(dev));
}

static DEVICE_ATTR_RO(sample_mdev_dev);

static struct attribute *mdev_dev_attrs[] = {
	&dev_attr_sample_mdev_dev.attr,
	NULL,
};

static const struct attribute_group mdev_dev_group = {
	.name  = "vendor",
	.attrs = mdev_dev_attrs,
};

static const struct attribute_group *mdev_dev_groups[] = {
	&mdev_dev_group,
	NULL,
};

static unsigned int mtty_get_available(struct mdev_type *mtype)
{
	struct mtty_type *type = container_of(mtype, struct mtty_type, type);

	return atomic_read(&mdev_avail_ports) / type->nr_ports;
}

static void mtty_close(struct vfio_device *vdev)
{
	struct mdev_state *mdev_state =
				container_of(vdev, struct mdev_state, vdev);

	mtty_disable_files(mdev_state);
	mtty_disable_intx(mdev_state);
	mtty_disable_msi(mdev_state);
}

static const struct vfio_device_ops mtty_dev_ops = {
	.name = "vfio-mtty",
	.init = mtty_init_dev,
	.release = mtty_release_dev,
	.read = mtty_read,
	.write = mtty_write,
	.ioctl = mtty_ioctl,
	.bind_iommufd	= vfio_iommufd_emulated_bind,
	.unbind_iommufd	= vfio_iommufd_emulated_unbind,
	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
	.detach_ioas	= vfio_iommufd_emulated_detach_ioas,
	.close_device	= mtty_close,
};

static struct mdev_driver mtty_driver = {
	.device_api = VFIO_DEVICE_API_PCI_STRING,
	.driver = {
		.name = "mtty",
		.owner = THIS_MODULE,
		.mod_name = KBUILD_MODNAME,
		.dev_groups = mdev_dev_groups,
	},
	.probe = mtty_probe,
	.remove	= mtty_remove,
	.get_available = mtty_get_available,
};

static void mtty_device_release(struct device *dev)
{
	dev_dbg(dev, "mtty: released\n");
}

static int __init mtty_dev_init(void)
{
	int ret = 0;

	pr_info("mtty_dev: %s\n", __func__);

	memset(&mtty_dev, 0, sizeof(mtty_dev));

	idr_init(&mtty_dev.vd_idr);

	ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK + 1,
				  MTTY_NAME);

	if (ret < 0) {
		pr_err("Error: failed to register mtty_dev, err:%d\n", ret);
		return ret;
	}

	cdev_init(&mtty_dev.vd_cdev, &vd_fops);
	cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK + 1);

	pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt));

	ret = mdev_register_driver(&mtty_driver);
	if (ret)
		goto err_cdev;

	mtty_dev.vd_class = class_create(MTTY_CLASS_NAME);

	if (IS_ERR(mtty_dev.vd_class)) {
		pr_err("Error: failed to register mtty_dev class\n");
		ret = PTR_ERR(mtty_dev.vd_class);
		goto err_driver;
	}

	mtty_dev.dev.class = mtty_dev.vd_class;
	mtty_dev.dev.release = mtty_device_release;
	dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME);

	ret = device_register(&mtty_dev.dev);
	if (ret)
		goto err_put;

	ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev,
				   &mtty_driver, mtty_mdev_types,
				   ARRAY_SIZE(mtty_mdev_types));
	if (ret)
		goto err_device;
	return 0;

err_device:
	device_del(&mtty_dev.dev);
err_put:
	put_device(&mtty_dev.dev);
	class_destroy(mtty_dev.vd_class);
err_driver:
	mdev_unregister_driver(&mtty_driver);
err_cdev:
	cdev_del(&mtty_dev.vd_cdev);
	unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
	return ret;
}

static void __exit mtty_dev_exit(void)
{
	mtty_dev.dev.bus = NULL;
	mdev_unregister_parent(&mtty_dev.parent);

	device_unregister(&mtty_dev.dev);
	idr_destroy(&mtty_dev.vd_idr);
	mdev_unregister_driver(&mtty_driver);
	cdev_del(&mtty_dev.vd_cdev);
	unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
	class_destroy(mtty_dev.vd_class);
	mtty_dev.vd_class = NULL;
	pr_info("mtty_dev: Unloaded!\n");
}

module_init(mtty_dev_init)
module_exit(mtty_dev_exit)

MODULE_LICENSE("GPL v2");
MODULE_INFO(supported, "Test driver that simulate serial port over PCI");
MODULE_VERSION(VERSION_STRING);
MODULE_AUTHOR(DRIVER_AUTHOR);
back to top