Revision 6e474083f3daf3a3546737f5d7d502ad12eb257c authored by Wei Xu on 01 December 2017, 10:10:36 UTC, committed by David S. Miller on 03 December 2017, 02:31:03 UTC
Matthew found a roughly 40% tcp throughput regression with commit
c67df11f(vhost_net: try batch dequing from skb array) as discussed
in the following thread:
https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html

Eventually we figured out that it was a skb leak in handle_rx()
when sending packets to the VM. This usually happens when a guest
can not drain out vq as fast as vhost fills in, afterwards it sets
off the traffic jam and leaks skb(s) which occurs as no headcount
to send on the vq from vhost side.

This can be avoided by making sure we have got enough headcount
before actually consuming a skb from the batched rx array while
transmitting, which is simply done by moving checking the zero
headcount a bit ahead.

Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent fa935ca
Raw File
hw-txe.h
/*
 *
 * Intel Management Engine Interface (Intel MEI) Linux driver
 * Copyright (c) 2013-2014, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 */

#ifndef _MEI_HW_TXE_H_
#define _MEI_HW_TXE_H_

#include <linux/irqreturn.h>

#include "hw.h"
#include "hw-txe-regs.h"

#define MEI_TXI_RPM_TIMEOUT    500 /* ms */

/* Flatten Hierarchy interrupt cause */
#define TXE_INTR_READINESS_BIT  0 /* HISR_INT_0_STS */
#define TXE_INTR_READINESS      HISR_INT_0_STS
#define TXE_INTR_ALIVENESS_BIT  1 /* HISR_INT_1_STS */
#define TXE_INTR_ALIVENESS      HISR_INT_1_STS
#define TXE_INTR_OUT_DB_BIT     2 /* HISR_INT_2_STS */
#define TXE_INTR_OUT_DB         HISR_INT_2_STS
#define TXE_INTR_IN_READY_BIT   8 /* beyond HISR */
#define TXE_INTR_IN_READY       BIT(8)

/**
 * struct mei_txe_hw - txe hardware specifics
 *
 * @mem_addr:            SeC and BRIDGE bars
 * @aliveness:           aliveness (power gating) state of the hardware
 * @readiness:           readiness state of the hardware
 * @slots:               number of empty slots
 * @wait_aliveness_resp: aliveness wait queue
 * @intr_cause:          translated interrupt cause
 */
struct mei_txe_hw {
	void __iomem * const *mem_addr;
	u32 aliveness;
	u32 readiness;
	u32 slots;

	wait_queue_head_t wait_aliveness_resp;

	unsigned long intr_cause;
};

#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw)

static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw)
{
	return container_of((void *)hw, struct mei_device, hw);
}

struct mei_device *mei_txe_dev_init(struct pci_dev *pdev);

irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id);
irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id);

int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req);

int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range);


#endif /* _MEI_HW_TXE_H_ */
back to top