Discussion:
[dpdk-dev] [PATCH v4 0/8] support reset of VF link
(too old to reply)
Zhe Tao
2016-06-07 06:53:02 UTC
Permalink
If the PF link is down and up, VF link will not work
accordingly.
This patch set addes the support of VF link reset. So, when VF
receices the messges of physical link down/up. APP can reset the
VF link and let it recover.

PS: This patch set is splitted from a previous patch set, *automatic
link recovery on ixgbe/igb VF*, and it's base on the patch set
*support mailbox interruption on ixgbe/igb VF*.

Wenzhuo Lu (6):
lib/librte_ether: support device reset
lib/librte_ether: defind RX/TX lock mode
ixgbe: RX/TX with lock on VF
ixgbe: implement device reset on VF
igb: RX/TX with lock on VF
igb: implement device reset on VF

Zhe Tao (2):
i40e: RX/TX with lock on VF
i40e: implement device reset on VF

v1:
Added the implementation for the VF reset functionality.
v2:
Changed the i40e related operations during VF reset.
v3:
Resent the patches because of the mail sent issue.
v4:
Removed some VF reset emulation code.

doc/guides/rel_notes/release_16_07.rst | 14 +++
drivers/net/e1000/e1000_ethdev.h | 126 +++++++++++++++++++++++++++
drivers/net/e1000/igb_ethdev.c | 118 ++++++++++++++++++++++++-
drivers/net/e1000/igb_rxtx.c | 148 +++++++++-----------------------
drivers/net/i40e/i40e_ethdev.c | 4 +-
drivers/net/i40e/i40e_ethdev.h | 5 ++
drivers/net/i40e/i40e_ethdev_vf.c | 152 ++++++++++++++++++++++++++++++++-
drivers/net/i40e/i40e_rxtx.c | 55 ++++++++----
drivers/net/i40e/i40e_rxtx.h | 34 ++++++++
drivers/net/ixgbe/ixgbe_ethdev.c | 120 +++++++++++++++++++++++++-
drivers/net/ixgbe/ixgbe_ethdev.h | 32 ++++++-
drivers/net/ixgbe/ixgbe_rxtx.c | 116 ++++++++++++++++++++++---
drivers/net/ixgbe/ixgbe_rxtx.h | 13 +++
drivers/net/ixgbe/ixgbe_rxtx_vec.c | 6 ++
lib/librte_ether/rte_ethdev.c | 17 ++++
lib/librte_ether/rte_ethdev.h | 76 +++++++++++++++++
lib/librte_ether/rte_ether_version.map | 7 ++
17 files changed, 895 insertions(+), 148 deletions(-)
--
2.1.4
Zhe Tao
2016-06-07 06:53:04 UTC
Permalink
Define lock mode for RX/TX queue. Because when resetting
the device we want the resetting thread to get the lock
of the RX/TX queue to make sure the RX/TX is stopped.

Using next ABI macro for this ABI change as it has too
much impact. 7 APIs and 1 global variable are impacted.

Signed-off-by: Wenzhuo Lu <***@intel.com>
Signed-off-by: Zhe Tao <***@intel.com>
---
lib/librte_ether/rte_ethdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};

/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};

/**
+ * The macros for the RX/TX lock mode functions
+ */
+#ifdef RTE_NEXT_ABI
+#define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func
+#endif
+
+/* Add the lock RX/TX function for VF reset */
+#define GENERATE_RX_LOCK(func, nic) \
+uint16_t func ## _lock(void *rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \
+uint16_t func ## _lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Ananyev, Konstantin
2016-06-07 09:58:36 UTC
Permalink
Hi Zhe & Wenzhuo,

Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Post by Zhe Tao
Define lock mode for RX/TX queue. Because when resetting
the device we want the resetting thread to get the lock
of the RX/TX queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too
much impact. 7 APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions
+ */
+#ifdef RTE_NEXT_ABI
+#define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func
+#endif
+
+/* Add the lock RX/TX function for VF reset */
+#define GENERATE_RX_LOCK(func, nic) \
+uint16_t func ## _lock(void *rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \
+uint16_t func ## _lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could
(and I think better be) impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx function
in each PMD.

2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds into
RX/TX config strcutures.
Would help to avoid any confusion, I think.

3. I thought the plan was to introduce a locking in all appropriate control path
functions (dev_start/dev_stop etc.)
Without that locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset, but I suppose
the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so we still
need some synchronisation between them.

To be more specific, I thought about something like that:

static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];

#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);

if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
return 0;
}
#endif

+ if (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].lock) == 0)
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
+ return 0;
+

nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);

+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);

....

return nb_rx;
}

And inside queue_start:

int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
{
struct rte_eth_dev *dev;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
return -EINVAL;
}

RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);

rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)

if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);

rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);

return ret;
}

Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Post by Zhe Tao
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Lu, Wenzhuo
2016-06-08 07:24:01 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 5:59 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Zhe & Wenzhuo,
Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Thanks for the comments.
Honestly, our purpose is 16.07. Realizing the big impact, we use NEXT_ABI to comment our change. So, I think although we want to merge it in 16.07 this change will become effective after we remove NEXT_ABI in 16.11.
Define lock mode for RX/TX queue. Because when resetting the device we
want the resetting thread to get the lock of the RX/TX queue to make
sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too much impact. 7
APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62
+++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h
b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions */ #ifdef
+RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func #endif
+
+/* Add the lock RX/TX function for VF reset */ #define
+GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void *rx_queue,
+\
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could (and I think better be)
impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx function in each
PMD.
One purpose of implementing the lock in PMD layer is to avoid ABI change. But we introduce the field lock_mode in struct rte_eth_rx/txmode. So seems it's not a good reason now :)
The other purpose is we want to add a lock for every queue. But in rte layer the queue is void *, so we add the lock in the specific structures of the NICs. But as you mentioned below, we can add the lock as dev->data->rx_queue_state it the struct rte_eth_dev_data.
So, I prefer to add the lock in rte layer now.
2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds into RX/TX
config strcutures.
Would help to avoid any confusion, I think.
We want the users to choose the rx/tx path without lock if they're sensitive to the performance and can handle the reset event in their APP. After introducing new fields of config struct, users can change the config to choose the different path.
If we introduce new API, it may be harder for the use to use it. I mean when users want to use lock mode, they may need to replace all the rte_eth_rx/tx_burst by rte_eth_rx/tx_burst_lock. So if we add the lock in rte layer, I still prefer adding lock_mode in the configuration, and the rte_eth_rx/tx_burst is changed like this,
rte_eth_rx/tx_burst
{
+ if lock_mode
+ try_lock
......
+ if lock_mode
+ release_lock
}
3. I thought the plan was to introduce a locking in all appropriate control path
functions (dev_start/dev_stop etc.) Without that locking version of RX/TX seems
a bit useless.
Yes, I understand that you do use locking inside dev_reset, but I suppose the
plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so we still need
some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
return 0;
}
#endif
+ if (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].lock) == 0)
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) {
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before starting the queue. For example, when stop the port, the resource of the queues may be released. The rx/tx cannot be executed. So I prefer to get the lock before stopping the ports. Maybe better to keep the spinlock in the dev_reset.
if (dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);
rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
return ret;
}
Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Please see the answer above.
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Ananyev, Konstantin
2016-06-08 09:19:45 UTC
Permalink
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 5:59 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Zhe & Wenzhuo,
Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Thanks for the comments.
Honestly, our purpose is 16.07. Realizing the big impact, we use NEXT_ABI to comment our change. So, I think although we want to
merge it in 16.07 this change will become effective after we remove NEXT_ABI in 16.11.
I don't think it is achievable.
First I think your code is not in proper shape yet, right now.
Second, as you said, it is a significant change and I would like to hear opinions from the rest of the community.
Post by Lu, Wenzhuo
Define lock mode for RX/TX queue. Because when resetting the device we
want the resetting thread to get the lock of the RX/TX queue to make
sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too much impact. 7
APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62
+++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h
b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */
+#endif
};
/**
+ * The macros for the RX/TX lock mode functions */ #ifdef
+RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func #endif
+
+/* Add the lock RX/TX function for VF reset */ #define
+GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void *rx_queue,
+\
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could (and I think better be)
impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx function in each
PMD.
One purpose of implementing the lock in PMD layer is to avoid ABI change. But we introduce the field lock_mode in struct
rte_eth_rx/txmode. So seems it's not a good reason now :)
The other purpose is we want to add a lock for every queue. But in rte layer the queue is void *, so we add the lock in the specific
structures of the NICs. But as you mentioned below, we can add the lock as dev->data->rx_queue_state it the struct
rte_eth_dev_data.
So, I prefer to add the lock in rte layer now.
OK.
Post by Lu, Wenzhuo
2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds into RX/TX
config strcutures.
Would help to avoid any confusion, I think.
We want the users to choose the rx/tx path without lock if they're sensitive to the performance and can handle the reset event in
their APP. After introducing new fields of config struct, users can change the config to choose the different path.
I understand what you are doing.
Post by Lu, Wenzhuo
If we introduce new API, it may be harder for the use to use it. I mean when users want to use lock mode, they may need to replace
all the rte_eth_rx/tx_burst by rte_eth_rx/tx_burst_lock.
Yes, my opinion if users would like to use locking API they need to call it explicitly.
Post by Lu, Wenzhuo
So if we add the lock in rte layer, I still prefer adding lock_mode in the
configuration, and the rte_eth_rx/tx_burst is changed like this,
rte_eth_rx/tx_burst
{
+ if lock_mode
+ try_lock
......
+ if lock_mode
+ release_lock
}
My preference is to keep existing rx/tx_burst() functions unaffected by that patch.
At least for now.
I suppose that will minimise the risks and help users to avoid confusion what API
(locking/non-locking) is in use.
Post by Lu, Wenzhuo
3. I thought the plan was to introduce a locking in all appropriate control path
functions (dev_start/dev_stop etc.) Without that locking version of RX/TX seems
a bit useless.
Yes, I understand that you do use locking inside dev_reset, but I suppose the
plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so we still need
some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
return 0;
}
#endif
+ if (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].lock) == 0)
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) {
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before starting the queue. For example, when stop the port, the resource of
the queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed, right?
Post by Lu, Wenzhuo
The rx/tx cannot be executed. So I prefer to get the lock before stopping the ports.
Might be I wasn't clear enough here.
What I think we need to have:
-To stop/start/rx/tx the queue (or do any other action that might change the queue internal structure)
you have to grab the lock.
After queue is stopped it's state has to be changed to QUEUE_STATE_STOPPED (whti queue lock grabbed),
so rx/tx_locked wouldn't proceed with that queue.
- dev_stop() - has to stop all its queues first, i.e. it needs to call queue_stop() for all of them.
So after dev_stop() had finished - all device queues have to be in QUEUE_STATE_STOPPED
Same about dev_start() - after it does all other things - it will call queue_start() for all it's queues.
that will bring them into QUEUE_STARTED.
After that rx/tx_locked can use them again.
Post by Lu, Wenzhuo
Maybe better to keep the spinlock in the dev_reset.
Might be not :)
Post by Lu, Wenzhuo
if (dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);
rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
return ret;
}
Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Please see the answer above.
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Lu, Wenzhuo
2016-06-12 02:00:39 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Wednesday, June 8, 2016 5:20 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 5:59 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Zhe & Wenzhuo,
Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Thanks for the comments.
Honestly, our purpose is 16.07. Realizing the big impact, we use
NEXT_ABI to comment our change. So, I think although we want to merge it in
16.07 this change will become effective after we remove NEXT_ABI in 16.11.
I don't think it is achievable.
First I think your code is not in proper shape yet, right now.
Second, as you said, it is a significant change and I would like to hear opinions
from the rest of the community.
Agree it should have risk. I mean our target is 16.07. But surely if it can be achieved depends on the feedback from the community.
Post by Lu, Wenzhuo
Define lock mode for RX/TX queue. Because when resetting the
device we want the resetting thread to get the lock of the RX/TX
queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too much
impact. 7 APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62
+++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h
b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */ #endif
};
/**
+ * The macros for the RX/TX lock mode functions */ #ifdef
+RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func #endif
+
+/* Add the lock RX/TX function for VF reset */ #define
+GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could (and
I think better be) impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx
function in each PMD.
One purpose of implementing the lock in PMD layer is to avoid ABI
change. But we introduce the field lock_mode in struct
rte_eth_rx/txmode. So seems it's not a good reason now :) The other
purpose is we want to add a lock for every queue. But in rte layer the
queue is void *, so we add the lock in the specific structures of the NICs. But as
you mentioned below, we can add the lock as dev->data->rx_queue_state it the
struct rte_eth_dev_data.
Post by Lu, Wenzhuo
So, I prefer to add the lock in rte layer now.
OK.
Post by Lu, Wenzhuo
2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds
into RX/TX config strcutures.
Would help to avoid any confusion, I think.
We want the users to choose the rx/tx path without lock if they're
sensitive to the performance and can handle the reset event in their APP. After
introducing new fields of config struct, users can change the config to choose
the different path.
I understand what you are doing.
Post by Lu, Wenzhuo
If we introduce new API, it may be harder for the use to use it. I
mean when users want to use lock mode, they may need to replace all the
rte_eth_rx/tx_burst by rte_eth_rx/tx_burst_lock.
Yes, my opinion if users would like to use locking API they need to call it explicitly.
Post by Lu, Wenzhuo
So if we add the lock in rte layer, I still prefer adding lock_mode in
the configuration, and the rte_eth_rx/tx_burst is changed like this,
rte_eth_rx/tx_burst {
+ if lock_mode
+ try_lock
......
+ if lock_mode
+ release_lock
}
My preference is to keep existing rx/tx_burst() functions unaffected by that patch.
At least for now.
I suppose that will minimise the risks and help users to avoid confusion what API
(locking/non-locking) is in use.
OK. Let me add new APIs.
Post by Lu, Wenzhuo
3. I thought the plan was to introduce a locking in all appropriate
control path functions (dev_start/dev_stop etc.) Without that
locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset, but I
suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so we
still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
return 0;
}
#endif
+ if (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].lock)
== 0)
Post by Lu, Wenzhuo
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) {
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before
starting the queue. For example, when stop the port, the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed, right?
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I didn't know you also want to change rte_eth_dev_rx_queue_stop too.
Agree this should work it we call queue_start/stop when reset the port. But we will not call them. I find the queue_stop/start are per-queue functions and not supported by all NICs.
Our solution now is stop the whole port and restart the whole port. We will not stop/restart queue by queue.
Post by Lu, Wenzhuo
The rx/tx cannot be executed. So I prefer to get the lock before stopping the
ports.
Might be I wasn't clear enough here.
-To stop/start/rx/tx the queue (or do any other action that might change the
queue internal structure)
you have to grab the lock.
After queue is stopped it's state has to be changed to
QUEUE_STATE_STOPPED (whti queue lock grabbed),
so rx/tx_locked wouldn't proceed with that queue.
- dev_stop() - has to stop all its queues first, i.e. it needs to call queue_stop()
for all of them.
So after dev_stop() had finished - all device queues have to be in
QUEUE_STATE_STOPPED
Same about dev_start() - after it does all other things - it will call queue_start()
for all it's queues.
that will bring them into QUEUE_STARTED.
After that rx/tx_locked can use them again.
Post by Lu, Wenzhuo
Maybe better to keep the spinlock in the dev_reset.
Might be not :)
Post by Lu, Wenzhuo
if (dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with
port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);
rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
return ret;
}
Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Please see the answer above.
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Ananyev, Konstantin
2016-06-12 23:16:45 UTC
Permalink
Hi Wenzhuo,
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Wednesday, June 8, 2016 5:20 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 5:59 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Zhe & Wenzhuo,
Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Thanks for the comments.
Honestly, our purpose is 16.07. Realizing the big impact, we use
NEXT_ABI to comment our change. So, I think although we want to merge it in
16.07 this change will become effective after we remove NEXT_ABI in 16.11.
I don't think it is achievable.
First I think your code is not in proper shape yet, right now.
Second, as you said, it is a significant change and I would like to hear opinions
from the rest of the community.
Agree it should have risk. I mean our target is 16.07. But surely if it can be achieved depends on the feedback from the community.
Post by Lu, Wenzhuo
Define lock mode for RX/TX queue. Because when resetting the
device we want the resetting thread to get the lock of the RX/TX
queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too much
impact. 7 APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62
+++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h
b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */ #endif
};
/**
+ * The macros for the RX/TX lock mode functions */ #ifdef
+RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func #endif
+
+/* Add the lock RX/TX function for VF reset */ #define
+GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could (and
I think better be) impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx
function in each PMD.
One purpose of implementing the lock in PMD layer is to avoid ABI
change. But we introduce the field lock_mode in struct
rte_eth_rx/txmode. So seems it's not a good reason now :) The other
purpose is we want to add a lock for every queue. But in rte layer the
queue is void *, so we add the lock in the specific structures of the NICs. But as
you mentioned below, we can add the lock as dev->data->rx_queue_state it the
struct rte_eth_dev_data.
Post by Lu, Wenzhuo
So, I prefer to add the lock in rte layer now.
OK.
Post by Lu, Wenzhuo
2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds
into RX/TX config strcutures.
Would help to avoid any confusion, I think.
We want the users to choose the rx/tx path without lock if they're
sensitive to the performance and can handle the reset event in their APP. After
introducing new fields of config struct, users can change the config to choose
the different path.
I understand what you are doing.
Post by Lu, Wenzhuo
If we introduce new API, it may be harder for the use to use it. I
mean when users want to use lock mode, they may need to replace all the
rte_eth_rx/tx_burst by rte_eth_rx/tx_burst_lock.
Yes, my opinion if users would like to use locking API they need to call it explicitly.
Post by Lu, Wenzhuo
So if we add the lock in rte layer, I still prefer adding lock_mode in
the configuration, and the rte_eth_rx/tx_burst is changed like this,
rte_eth_rx/tx_burst {
+ if lock_mode
+ try_lock
......
+ if lock_mode
+ release_lock
}
My preference is to keep existing rx/tx_burst() functions unaffected by that patch.
At least for now.
I suppose that will minimise the risks and help users to avoid confusion what API
(locking/non-locking) is in use.
OK. Let me add new APIs.
Post by Lu, Wenzhuo
3. I thought the plan was to introduce a locking in all appropriate
control path functions (dev_start/dev_stop etc.) Without that
locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset, but I
suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so we
still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
return 0;
}
#endif
+ if (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].lock)
== 0)
Post by Lu, Wenzhuo
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) {
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before
starting the queue. For example, when stop the port, the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed, right?
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I didn't know you also want to change rte_eth_dev_rx_queue_stop
too.
Agree this should work it we call queue_start/stop when reset the port. But we will not call them. I find the queue_stop/start are per-
queue functions and not supported by all NICs.
But right now you do reset only for ixgbe/i40e.
For these devices we defiantly do support queue start/stop.
And again, it is not only about reset op.
If we want to add rx locked (synced), I think it should be in sync with all control API
that changes queue state.
As I said before it is a lot of work and a lot of hassle...
So probably the easiest (and might be safiest) way just leave things as there are right now:
we allow user to setup a callback on VF reset, and it is user responsibility to make
sure no RX/TX is active while reset operation is performed.
Pretty much what Olivier and Stephen suggested, as I understand.
Konstantin
Post by Lu, Wenzhuo
Our solution now is stop the whole port and restart the whole port. We will not stop/restart queue by queue.
Post by Lu, Wenzhuo
The rx/tx cannot be executed. So I prefer to get the lock before stopping the
ports.
Might be I wasn't clear enough here.
-To stop/start/rx/tx the queue (or do any other action that might change the
queue internal structure)
you have to grab the lock.
After queue is stopped it's state has to be changed to
QUEUE_STATE_STOPPED (whti queue lock grabbed),
so rx/tx_locked wouldn't proceed with that queue.
- dev_stop() - has to stop all its queues first, i.e. it needs to call queue_stop()
for all of them.
So after dev_stop() had finished - all device queues have to be in
QUEUE_STATE_STOPPED
Same about dev_start() - after it does all other things - it will call queue_start()
for all it's queues.
that will bring them into QUEUE_STARTED.
After that rx/tx_locked can use them again.
Post by Lu, Wenzhuo
Maybe better to keep the spinlock in the dev_reset.
Might be not :)
Post by Lu, Wenzhuo
if (dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with
port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);
rte_spinlock_unlock(&dev->data->rx_queue_state[rx_queue_id].unlock);
return ret;
}
Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Please see the answer above.
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Lu, Wenzhuo
2016-06-13 01:06:54 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Monday, June 13, 2016 7:17 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Wenzhuo,
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Wednesday, June 8, 2016 5:20 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Post by Lu, Wenzhuo
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 5:59 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang,
Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Zhe & Wenzhuo,
Please find my comments below.
BTW, for clarification - is that patch for 16.11?
I believe it's too late to introduce such significant change in 16.07.
Thanks
Konstantin
Thanks for the comments.
Honestly, our purpose is 16.07. Realizing the big impact, we use
NEXT_ABI to comment our change. So, I think although we want to merge it in
16.07 this change will become effective after we remove NEXT_ABI in
16.11.
Post by Lu, Wenzhuo
I don't think it is achievable.
First I think your code is not in proper shape yet, right now.
Second, as you said, it is a significant change and I would like to
hear opinions from the rest of the community.
Agree it should have risk. I mean our target is 16.07. But surely if it can be
achieved depends on the feedback from the community.
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
Define lock mode for RX/TX queue. Because when resetting the
device we want the resetting thread to get the lock of the
RX/TX queue to make sure the RX/TX is stopped.
Using next ABI macro for this ABI change as it has too much
impact. 7 APIs and 1 global variable are impacted.
---
lib/librte_ether/rte_ethdev.h | 62
+++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.h
b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -354,7 +354,12 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt
enable. */
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
hw_strip_crc : 1, /**< Enable CRC stripping by
hardware. */
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
enable_scatter : 1, /**< Enable scatter packets rx
handler */
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
+#ifndef RTE_NEXT_ABI
enable_lro : 1; /**< Enable LRO */
+#else
+ enable_lro : 1, /**< Enable LRO */
+ lock_mode : 1; /**< Using lock path */
+#endif
};
/**
@@ -634,11 +639,68 @@ struct rte_eth_txmode {
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
/**< If set, reject sending out untagged pkts */
+#ifndef RTE_NEXT_ABI
hw_vlan_insert_pvid : 1;
/**< If set, enable port based VLAN insertion */
+#else
+ hw_vlan_insert_pvid : 1,
+ /**< If set, enable port based VLAN insertion */
+ lock_mode : 1;
+ /**< If set, using lock path */ #endif
};
/**
+ * The macros for the RX/TX lock mode functions */ #ifdef
+RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.rxmode.lock_mode ? \
+ func ## _lock : func)
+
+#define TX_LOCK_FUNCTION(dev, func) \
+ (dev->data->dev_conf.txmode.lock_mode ? \
+ func ## _lock : func)
+#else
+#define RX_LOCK_FUNCTION(dev, func) func
+
+#define TX_LOCK_FUNCTION(dev, func) func #endif
+
+/* Add the lock RX/TX function for VF reset */ #define
+GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void
+*rx_queue, \
+ struct rte_mbuf **rx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _rx_queue *rxq = rx_queue; \
+ uint16_t nb_rx = 0; \
+ \
+ if (rte_spinlock_trylock(&rxq->rx_lock)) { \
+ nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&rxq->rx_lock); \
+ } \
+ \
+ return nb_rx; \
+}
+
+#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ##
+_lock(void *tx_queue, \
+ struct rte_mbuf **tx_pkts, \
+ uint16_t nb_pkts) \
+{ \
+ struct nic ## _tx_queue *txq = tx_queue; \
+ uint16_t nb_tx = 0; \
+ \
+ if (rte_spinlock_trylock(&txq->tx_lock)) { \
+ nb_tx = func(tx_queue, tx_pkts, nb_pkts); \
+ rte_spinlock_unlock(&txq->tx_lock); \
+ } \
+ \
+ return nb_tx; \
+}
1. As I said in off-line dicussiion, I think this locking could
(and I think better be) impelented completely on rte_ethdev layer.
So actual PMD code will be unaffected.
Again that avoids us to introduce _lock version of every RX/Tx
function in each PMD.
One purpose of implementing the lock in PMD layer is to avoid ABI
change. But we introduce the field lock_mode in struct
rte_eth_rx/txmode. So seems it's not a good reason now :) The
other purpose is we want to add a lock for every queue. But in rte
layer the queue is void *, so we add the lock in the specific
structures of the NICs. But as
you mentioned below, we can add the lock as
dev->data->rx_queue_state it the struct rte_eth_dev_data.
Post by Lu, Wenzhuo
So, I prefer to add the lock in rte layer now.
OK.
Post by Lu, Wenzhuo
2. Again, as discussed offline, I think it is better to have an explicit
rte_eth_(rx|tx)_burst_lock(sync?) API, instead of add new fileds
into RX/TX config strcutures.
Would help to avoid any confusion, I think.
We want the users to choose the rx/tx path without lock if
they're sensitive to the performance and can handle the reset
event in their APP. After
introducing new fields of config struct, users can change the config
to choose the different path.
I understand what you are doing.
Post by Lu, Wenzhuo
If we introduce new API, it may be harder for the use to use it. I
mean when users want to use lock mode, they may need to replace all the
rte_eth_rx/tx_burst by rte_eth_rx/tx_burst_lock.
Yes, my opinion if users would like to use locking API they need to
call it explicitly.
Post by Lu, Wenzhuo
So if we add the lock in rte layer, I still prefer adding lock_mode
in the configuration, and the rte_eth_rx/tx_burst is changed like
this, rte_eth_rx/tx_burst {
+ if lock_mode
+ try_lock
......
+ if lock_mode
+ release_lock
}
My preference is to keep existing rx/tx_burst() functions unaffected by that patch.
At least for now.
I suppose that will minimise the risks and help users to avoid confusion what API
(locking/non-locking) is in use.
OK. Let me add new APIs.
Post by Lu, Wenzhuo
3. I thought the plan was to introduce a locking in all
appropriate control path functions (dev_start/dev_stop etc.)
Without that locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset, but
I suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so
we still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
queue_id);
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
return 0;
}
#endif
+ if
+ (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].
+ lock)
== 0)
Post by Lu, Wenzhuo
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].unlock);
Post by Lu, Wenzhuo
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].un
Post by Lu, Wenzhuo
+ lock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
{
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before
starting the queue. For example, when stop the port, the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed,
right?
Post by Lu, Wenzhuo
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I didn't
know you also want to change rte_eth_dev_rx_queue_stop too.
Agree this should work it we call queue_start/stop when reset the
port. But we will not call them. I find the queue_stop/start are per- queue
functions and not supported by all NICs.
But right now you do reset only for ixgbe/i40e.
Not only for ixgbe/i40e. You forget igb, which doesn't support queue_start/stop :)
For these devices we defiantly do support queue start/stop.
And again, it is not only about reset op.
If we want to add rx locked (synced), I think it should be in sync with all
control API that changes queue state.
As I said before it is a lot of work and a lot of hassle...
So probably the easiest (and might be safiest) way just leave things as there
we allow user to setup a callback on VF reset, and it is user responsibility to
make sure no RX/TX is active while reset operation is performed.
Pretty much what Olivier and Stephen suggested, as I understand.
Agree. It's not a good way to add lock for just one feature. It could be tricky if we want to extend the lock to other features. A whole picture is needed.
We've sent another patch set to let the user setup a callback on VF reset. Depend on that, user can use existing rte APIs to reset the VF port. But how about your opinion if we add a specific rte_reset API? It may be easier for the user.
Konstantin
Post by Lu, Wenzhuo
Our solution now is stop the whole port and restart the whole port. We
will not stop/restart queue by queue.
Post by Lu, Wenzhuo
Post by Lu, Wenzhuo
The rx/tx cannot be executed. So I prefer to get the lock before stopping the
ports.
Might be I wasn't clear enough here.
-To stop/start/rx/tx the queue (or do any other action that might
change the queue internal structure)
you have to grab the lock.
After queue is stopped it's state has to be changed to
QUEUE_STATE_STOPPED (whti queue lock grabbed),
so rx/tx_locked wouldn't proceed with that queue.
- dev_stop() - has to stop all its queues first, i.e. it needs to
call queue_stop() for all of them.
So after dev_stop() had finished - all device queues have to be in
QUEUE_STATE_STOPPED Same about dev_start() - after it does all other
things - it will call queue_start() for all it's queues.
that will bring them into QUEUE_STARTED.
After that rx/tx_locked can use them again.
Post by Lu, Wenzhuo
Maybe better to keep the spinlock in the dev_reset.
Might be not :)
Post by Lu, Wenzhuo
if (dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED) {
RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device
with port_id=%" PRIu8
" already started\n",
rx_queue_id, port_id);
ret = -EINVAL 0;
} else
ret = dev->dev_ops->rx_queue_start(dev, rx_queue_id);
rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].unlo
Post by Lu, Wenzhuo
ck);
return ret;
}
Then again, we don't need to do explicit locking inside dev_reset().
Does it make sense to you guys?
Please see the answer above.
+
+/**
* A structure used to configure an RX ring of an Ethernet port.
*/
struct rte_eth_rxconf {
--
2.1.4
Ananyev, Konstantin
2016-06-13 10:47:35 UTC
Permalink
Hi Wenzhuo,
Post by Lu, Wenzhuo
Post by Zhe Tao
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
3. I thought the plan was to introduce a locking in all
appropriate control path functions (dev_start/dev_stop etc.)
Without that locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset, but
I suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or so, so
we still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
queue_id);
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return 0;
}
#endif
+ if
+ (rte_spinlock_trylock(&dev->data->rx_queue_state[rx_queue_id].
+ lock)
== 0)
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].unlock);
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].un
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ lock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
{
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start,
-ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lock)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier before
starting the queue. For example, when stop the port, the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed,
right?
Post by Ananyev, Konstantin
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I didn't
know you also want to change rte_eth_dev_rx_queue_stop too.
Agree this should work it we call queue_start/stop when reset the
port. But we will not call them. I find the queue_stop/start are per- queue
functions and not supported by all NICs.
But right now you do reset only for ixgbe/i40e.
Not only for ixgbe/i40e. You forget igb, which doesn't support queue_start/stop :)
Post by Zhe Tao
For these devices we defiantly do support queue start/stop.
And again, it is not only about reset op.
If we want to add rx locked (synced), I think it should be in sync with all
control API that changes queue state.
As I said before it is a lot of work and a lot of hassle...
So probably the easiest (and might be safiest) way just leave things as there
we allow user to setup a callback on VF reset, and it is user responsibility to
make sure no RX/TX is active while reset operation is performed.
Pretty much what Olivier and Stephen suggested, as I understand.
Agree. It's not a good way to add lock for just one feature. It could be tricky if we want to extend the lock to other features. A whole
picture is needed.
We've sent another patch set to let the user setup a callback on VF reset. Depend on that, user can use existing rte APIs to reset the
VF port. But how about your opinion if we add a specific rte_reset API? It may be easier for the user.
You mean add rte_eth_dev_reset() without any locking inside?
So it when VF reset happens, it would be user responsibility to make sure all IO
over that device is stopped, and then he can call rte_eth_dev_reset(), correct?
Konstantin
Lu, Wenzhuo
2016-06-14 00:42:09 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Monday, June 13, 2016 6:48 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
Hi Wenzhuo,
Post by Ananyev, Konstantin
Post by Zhe Tao
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
3. I thought the plan was to introduce a locking in all
appropriate control path functions (dev_start/dev_stop etc.)
Without that locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset,
but I suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or
so, so we still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX
queue_id=%d\n",
queue_id);
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return 0;
}
#endif
+ if
+ (rte_spinlock_trylock(&dev->data-
rx_queue_state[rx_queue_id].
Post by Zhe Tao
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ lock)
== 0)
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].unlock);
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].un
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ lock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
{
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX
queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lo
ck)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier
before starting the queue. For example, when stop the port,
the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed,
right?
Post by Ananyev, Konstantin
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I
didn't know you also want to change rte_eth_dev_rx_queue_stop too.
Agree this should work it we call queue_start/stop when reset the
port. But we will not call them. I find the queue_stop/start are per- queue
functions and not supported by all NICs.
But right now you do reset only for ixgbe/i40e.
Not only for ixgbe/i40e. You forget igb, which doesn't support queue_start/stop :)
Post by Zhe Tao
For these devices we defiantly do support queue start/stop.
And again, it is not only about reset op.
If we want to add rx locked (synced), I think it should be in sync
with all control API that changes queue state.
As I said before it is a lot of work and a lot of hassle...
So probably the easiest (and might be safiest) way just leave things
we allow user to setup a callback on VF reset, and it is user
responsibility to make sure no RX/TX is active while reset operation is
performed.
Post by Ananyev, Konstantin
Post by Zhe Tao
Pretty much what Olivier and Stephen suggested, as I understand.
Agree. It's not a good way to add lock for just one feature. It could
be tricky if we want to extend the lock to other features. A whole picture
is needed.
Post by Ananyev, Konstantin
We've sent another patch set to let the user setup a callback on VF
reset. Depend on that, user can use existing rte APIs to reset the VF port.
But how about your opinion if we add a specific rte_reset API? It may be
easier for the user.
You mean add rte_eth_dev_reset() without any locking inside?
So it when VF reset happens, it would be user responsibility to make sure
all IO over that device is stopped, and then he can call rte_eth_dev_reset(),
correct?
Konstantin
Yes, that's exactly what I plan to do :)
Ananyev, Konstantin
2016-06-14 08:42:47 UTC
Permalink
Hi Wenzhuo,
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Zhe Tao
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
3. I thought the plan was to introduce a locking in all
appropriate control path functions (dev_start/dev_stop etc.)
Without that locking version of RX/TX seems a bit useless.
Yes, I understand that you do use locking inside dev_reset,
but I suppose the plan was to have a generic solution, no?
Again, interrupt fire when user invokes dev_start/stop or
so, so we still need some synchronisation between them.
static inline uint16_t
rte_eth_rx_burst_lock(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) {
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX
queue_id=%d\n",
queue_id);
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return 0;
}
#endif
+ if
+ (rte_spinlock_trylock(&dev->data-
rx_queue_state[rx_queue_id].
Post by Zhe Tao
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ lock)
== 0)
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+ else if (dev->data->rx_queue_state[rx_queue_id] ==
RTE_ETH_QUEUE_STATE_STOPPED)) {
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].unlock);
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ return 0;
+
nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
rx_pkts, nb_pkts);
+ rte_spinlock_unlock(&dev->data-
rx_queue_state[rx_queue_id].un
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
+ lock
+ );
....
return nb_rx;
}
int
rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t
rx_queue_id)
{
Post by Ananyev, Konstantin
Post by Ananyev, Konstantin
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_PMD_DEBUG_TRACE("Invalid RX
queue_id=%d\n",
rx_queue_id);
Post by Lu, Wenzhuo
Post by Ananyev, Konstantin
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start,
-ENOTSUP);
rte_spinlock_lock(&dev->data->rx_queue_state[rx_queue_id].lo
ck)
I think you add the lock here to stop the rx/tx.
But to my opinion, we should lock the rx/tx much earlier
before starting the queue. For example, when stop the port,
the resource of the
queues may be released.
I didn't get you here...
Before releasing the queue resources, queue_stop() has to be executed,
right?
Post by Ananyev, Konstantin
Sorry, I saw your example with rte_eth_dev_rx_queue_start, I
didn't know you also want to change rte_eth_dev_rx_queue_stop too.
Agree this should work it we call queue_start/stop when reset the
port. But we will not call them. I find the queue_stop/start are per- queue
functions and not supported by all NICs.
But right now you do reset only for ixgbe/i40e.
Not only for ixgbe/i40e. You forget igb, which doesn't support queue_start/stop :)
Post by Zhe Tao
For these devices we defiantly do support queue start/stop.
And again, it is not only about reset op.
If we want to add rx locked (synced), I think it should be in sync
with all control API that changes queue state.
As I said before it is a lot of work and a lot of hassle...
So probably the easiest (and might be safiest) way just leave things
we allow user to setup a callback on VF reset, and it is user
responsibility to make sure no RX/TX is active while reset operation is
performed.
Post by Ananyev, Konstantin
Post by Zhe Tao
Pretty much what Olivier and Stephen suggested, as I understand.
Agree. It's not a good way to add lock for just one feature. It could
be tricky if we want to extend the lock to other features. A whole picture
is needed.
Post by Ananyev, Konstantin
We've sent another patch set to let the user setup a callback on VF
reset. Depend on that, user can use existing rte APIs to reset the VF port.
But how about your opinion if we add a specific rte_reset API? It may be
easier for the user.
You mean add rte_eth_dev_reset() without any locking inside?
So it when VF reset happens, it would be user responsibility to make sure
all IO over that device is stopped, and then he can call rte_eth_dev_reset(),
correct?
Konstantin
Yes, that's exactly what I plan to do :)
Sounds reaosanable to me :)
Konstantin

Zhe Tao
2016-06-07 06:53:03 UTC
Permalink
Add an API to reset the device.
It's for VF device in this scenario, kernel PF + DPDK VF.
When the PF port down/up, APP should call this API to
reset VF port. Most likely, APP should call it in its
management thread and guarantee the thread safe.

Signed-off-by: Wenzhuo Lu <***@intel.com>
---
lib/librte_ether/rte_ethdev.c | 17 +++++++++++++++++
lib/librte_ether/rte_ethdev.h | 14 ++++++++++++++
lib/librte_ether/rte_ether_version.map | 7 +++++++
3 files changed, 38 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e148028..e43dca9 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3346,3 +3346,20 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
-ENOTSUP);
return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
}
+
+int
+rte_eth_dev_reset(uint8_t port_id)
+{
+ struct rte_eth_dev *dev;
+ int diag;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+ dev = &rte_eth_devices[port_id];
+
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP);
+
+ diag = (*dev->dev_ops->dev_reset)(dev);
+
+ return diag;
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2757510..74e895f 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1318,6 +1318,9 @@ typedef int (*eth_l2_tunnel_offload_set_t)
uint8_t en);
/**< @internal enable/disable the l2 tunnel offload functions */

+typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to reset a configured Ethernet device. */
+
#ifdef RTE_NIC_BYPASS

enum {
@@ -1508,6 +1511,8 @@ struct eth_dev_ops {
eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf;
/** Enable/disable l2 tunnel offload functions */
eth_l2_tunnel_offload_set_t l2_tunnel_offload_set;
+ /** Reset device. */
+ eth_dev_reset_t dev_reset;
};

/**
@@ -4253,6 +4258,15 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
uint32_t mask,
uint8_t en);

+/**
+ * Reset an Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ */
+int
+rte_eth_dev_reset(uint8_t port_id);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 214ecc7..c34207e 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -132,3 +132,10 @@ DPDK_16.04 {
rte_eth_tx_buffer_set_err_callback;

} DPDK_2.2;
+
+DPDK_16.07 {
+ global:
+
+ rte_eth_dev_reset;
+
+} DPDK_16.04;
--
2.1.4
Zhe Tao
2016-06-07 06:53:05 UTC
Permalink
Add RX/TX paths with lock for VF. It's used when
the function of link reset on VF is needed.
When the lock for RX/TX is added, the RX/TX can be
stopped. Then we have a chance to reset the VF link.

Please be aware there's performence drop if the lock
path is chosen.

Signed-off-by: Wenzhuo Lu <***@intel.com>
---
drivers/net/ixgbe/ixgbe_ethdev.c | 12 +++++--
drivers/net/ixgbe/ixgbe_ethdev.h | 20 +++++++++++
drivers/net/ixgbe/ixgbe_rxtx.c | 74 ++++++++++++++++++++++++++++++++------
drivers/net/ixgbe/ixgbe_rxtx.h | 13 +++++++
drivers/net/ixgbe/ixgbe_rxtx_vec.c | 6 ++++
5 files changed, 112 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 05f4f29..fd2682f 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1325,8 +1325,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
- eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
- eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+ eth_dev->rx_pkt_burst = RX_LOCK_FUNCTION(eth_dev, ixgbe_recv_pkts);
+ eth_dev->tx_pkt_burst = TX_LOCK_FUNCTION(eth_dev, ixgbe_xmit_pkts);

/* for secondary processes, we don't initialise any further as primary
* has already done this work. Only check we don't need a different
@@ -3012,7 +3012,15 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev)
if (dev->rx_pkt_burst == ixgbe_recv_pkts ||
dev->rx_pkt_burst == ixgbe_recv_pkts_lro_single_alloc ||
dev->rx_pkt_burst == ixgbe_recv_pkts_lro_bulk_alloc ||
+#ifndef RTE_NEXT_ABI
dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc)
+#else
+ dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc ||
+ dev->rx_pkt_burst == ixgbe_recv_pkts_lock ||
+ dev->rx_pkt_burst == ixgbe_recv_pkts_lro_single_alloc_lock ||
+ dev->rx_pkt_burst == ixgbe_recv_pkts_lro_bulk_alloc_lock ||
+ dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc_lock)
+#endif
return ptypes;
return NULL;
}
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..701107b 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -390,12 +390,32 @@ uint16_t ixgbe_recv_pkts_lro_single_alloc(void *rx_queue,
uint16_t ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);

+uint16_t ixgbe_recv_pkts_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_bulk_alloc_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_lro_single_alloc_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_lro_bulk_alloc_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_xmit_pkts_lock(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_simple_lock(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9c6eaf2..a45d115 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -353,6 +353,8 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_tx;
}

+GENERATE_TX_LOCK(ixgbe_xmit_pkts_simple, ixgbe)
+
static inline void
ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
@@ -904,6 +906,8 @@ end_of_tx:
return nb_tx;
}

+GENERATE_TX_LOCK(ixgbe_xmit_pkts, ixgbe)
+
/*********************************************************************
*
* RX functions
@@ -1524,6 +1528,8 @@ ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}

+GENERATE_RX_LOCK(ixgbe_recv_pkts_bulk_alloc, ixgbe)
+
uint16_t
ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -1712,6 +1718,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}

+GENERATE_RX_LOCK(ixgbe_recv_pkts, ixgbe)
+
/**
* Detect an RSC descriptor.
*/
@@ -2071,6 +2079,8 @@ ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
}

+GENERATE_RX_LOCK(ixgbe_recv_pkts_lro_single_alloc, ixgbe)
+
uint16_t
ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -2078,6 +2088,8 @@ ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
}

+GENERATE_RX_LOCK(ixgbe_recv_pkts_lro_bulk_alloc, ixgbe)
+
/*********************************************************************
*
* Queue management functions
@@ -2186,10 +2198,12 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
(rte_eal_process_type() != RTE_PROC_PRIMARY ||
ixgbe_txq_vec_setup(txq) == 0)) {
PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
- dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
+ dev->tx_pkt_burst =
+ TX_LOCK_FUNCTION(dev, ixgbe_xmit_pkts_vec);
} else
#endif
- dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
+ dev->tx_pkt_burst =
+ TX_LOCK_FUNCTION(dev, ixgbe_xmit_pkts_simple);
} else {
PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
PMD_INIT_LOG(DEBUG,
@@ -2200,7 +2214,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
" - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
(unsigned long)txq->tx_rs_thresh,
(unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
- dev->tx_pkt_burst = ixgbe_xmit_pkts;
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, ixgbe_xmit_pkts);
}
}

@@ -2347,6 +2361,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->txq_flags = tx_conf->txq_flags;
txq->ops = &def_txq_ops;
txq->tx_deferred_start = tx_conf->tx_deferred_start;
+ rte_spinlock_init(&txq->tx_lock);

/*
* Modification to set VFTDT for virtual function if vf is detected
@@ -2625,6 +2640,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
0 : ETHER_CRC_LEN);
rxq->drop_en = rx_conf->rx_drop_en;
rxq->rx_deferred_start = rx_conf->rx_deferred_start;
+ rte_spinlock_init(&rxq->rx_lock);

/*
* The packet type in RX descriptor is different for different NICs.
@@ -4172,11 +4188,15 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
if (adapter->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
"allocation version");
- dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_pkts_lro_bulk_alloc);
} else {
PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
"allocation version");
- dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_pkts_lro_single_alloc);
}
} else if (dev->data->scattered_rx) {
/*
@@ -4188,12 +4208,16 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
"callback (port=%d).",
dev->data->port_id);

- dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_scattered_pkts_vec);
} else if (adapter->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
"allocation callback (port=%d).",
dev->data->port_id);
- dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_pkts_lro_bulk_alloc);
} else {
PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
"single allocation) "
@@ -4201,7 +4225,9 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
"(port=%d).",
dev->data->port_id);

- dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_pkts_lro_single_alloc);
}
/*
* Below we set "simple" callbacks according to port/queues parameters.
@@ -4217,28 +4243,36 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
RTE_IXGBE_DESCS_PER_LOOP,
dev->data->port_id);

- dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, ixgbe_recv_pkts_vec);
} else if (adapter->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
"satisfied. Rx Burst Bulk Alloc function "
"will be used on port=%d.",
dev->data->port_id);

- dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ ixgbe_recv_pkts_bulk_alloc);
} else {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
"satisfied, or Scattered Rx is requested "
"(port=%d).",
dev->data->port_id);

- dev->rx_pkt_burst = ixgbe_recv_pkts;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, ixgbe_recv_pkts);
}

/* Propagate information about RX function choice through all queues. */

rx_using_sse =
(dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
+#ifndef RTE_NEXT_ABI
dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
+#else
+ dev->rx_pkt_burst == ixgbe_recv_pkts_vec ||
+ dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec_lock ||
+ dev->rx_pkt_burst == ixgbe_recv_pkts_vec_lock);
+#endif

for (i = 0; i < dev->data->nb_rx_queues; i++) {
struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
@@ -5225,6 +5259,15 @@ ixgbe_recv_pkts_vec(
}

uint16_t __attribute__((weak))
+ixgbe_recv_pkts_vec_lock(
+ void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+uint16_t __attribute__((weak))
ixgbe_recv_scattered_pkts_vec(
void __rte_unused *rx_queue,
struct rte_mbuf __rte_unused **rx_pkts,
@@ -5233,6 +5276,15 @@ ixgbe_recv_scattered_pkts_vec(
return 0;
}

+uint16_t __attribute__((weak))
+ixgbe_recv_scattered_pkts_vec_lock(
+ void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
int __attribute__((weak))
ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
{
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 3691a19..5f0ca1f 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -34,6 +34,8 @@
#ifndef _IXGBE_RXTX_H_
#define _IXGBE_RXTX_H_

+#include <rte_spinlock.h>
+
/*
* Rings setup and release.
*
@@ -126,6 +128,7 @@ struct ixgbe_rx_queue {
struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
uint64_t mbuf_initializer; /**< value to init mbufs */
+ rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
uint16_t nb_rx_desc; /**< number of RX descriptors. */
uint16_t rx_tail; /**< current value of RDT register. */
uint16_t nb_rx_hold; /**< number of held free RX desc. */
@@ -212,6 +215,7 @@ struct ixgbe_tx_queue {
struct ixgbe_tx_entry_v *sw_ring_v; /**< address of SW ring for vector PMD */
};
volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
+ rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
uint16_t nb_tx_desc; /**< number of TX descriptors. */
uint16_t tx_tail; /**< current value of TDT reg. */
/**< Start freeing TX buffers if there are less free descriptors than
@@ -301,6 +305,12 @@ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t ixgbe_recv_pkts_vec_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t ixgbe_recv_scattered_pkts_vec_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
@@ -309,6 +319,9 @@ void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);

uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_vec_lock(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
int ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq);

#endif /* RTE_IXGBE_INC_VECTOR */
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
index e97ea82..32ecbd2 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
@@ -420,6 +420,8 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
}

+GENERATE_RX_LOCK(ixgbe_recv_pkts_vec, ixgbe)
+
static inline uint16_t
reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
uint16_t nb_bufs, uint8_t *split_flags)
@@ -526,6 +528,8 @@ ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
&split_flags[i]);
}

+GENERATE_RX_LOCK(ixgbe_recv_scattered_pkts_vec, ixgbe)
+
static inline void
vtx1(volatile union ixgbe_adv_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
@@ -680,6 +684,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_pkts;
}

+GENERATE_TX_LOCK(ixgbe_xmit_pkts_vec, ixgbe)
+
static void __attribute__((cold))
ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
{
--
2.1.4
Zhe Tao
2016-06-07 06:53:06 UTC
Permalink
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.

Signed-off-by: Wenzhuo Lu <***@intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 9 +++
drivers/net/ixgbe/ixgbe_ethdev.c | 108 ++++++++++++++++++++++++++++++++-
drivers/net/ixgbe/ixgbe_ethdev.h | 12 +++-
drivers/net/ixgbe/ixgbe_rxtx.c | 42 ++++++++++++-
4 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index a761e3c..d36c4b1 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,6 +53,15 @@ New Features
VF. To handle this link up/down event, add the mailbox interruption
support to receive the message.

+* **Added device reset support for ixgbe VF.**
+
+ Added the device reset API. APP can call this API to reset the VF port
+ when it's not working.
+ Based on the mailbox interruption support, when VF reseives the control
+ message from PF, it means the PF link state changes, VF uses the reset
+ callback in the message handler to notice the APP. APP need call the device
+ reset API to reset the VF port.
+

Resolved Issues
---------------
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index fd2682f..1e3520b 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -381,6 +381,8 @@ static int ixgbe_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
struct rte_eth_udp_tunnel *udp_tunnel);

+static int ixgbevf_dev_reset(struct rte_eth_dev *dev);
+
/*
* Define VF Stats MACRO for Non "cleared on read" register
*/
@@ -586,6 +588,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
.reta_query = ixgbe_dev_rss_reta_query,
.rss_hash_update = ixgbe_dev_rss_hash_update,
.rss_hash_conf_get = ixgbe_dev_rss_hash_conf_get,
+ .dev_reset = ixgbevf_dev_reset,
};

/* store statistics names and its offset in stats structure */
@@ -4060,7 +4063,8 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
ETH_VLAN_EXTEND_MASK;
ixgbevf_vlan_offload_set(dev, mask);

- ixgbevf_dev_rxtx_start(dev);
+ if (ixgbevf_dev_rxtx_start(dev))
+ return -1;

/* check and configure queue intr-vector mapping */
if (dev->data->dev_conf.intr_conf.rxq != 0) {
@@ -7193,6 +7197,108 @@ static void ixgbevf_mbx_process(struct rte_eth_dev *dev)
}

static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev)
+{
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
+
+ /* Performance VF reset. */
+ do {
+ dev->data->dev_started = 0;
+ ixgbevf_dev_stop(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = ixgbe_dev_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Ixgbe VF reset: "
+ "Failed to update link.");
+ }
+ rte_delay_ms(1000);
+
+ diag = ixgbevf_dev_start(dev);
+ /*If fail to start the device, need to stop/start it again. */
+ if (diag) {
+ PMD_INIT_LOG(ERR, "Ixgbe VF reset: "
+ "Failed to start device.");
+ continue;
+ }
+ dev->data->dev_started = 1;
+ ixgbevf_dev_stats_reset(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = ixgbe_dev_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Ixgbe VF reset: "
+ "Failed to update link.");
+ diag = 0;
+ }
+
+ /**
+ * When the PF link is down, there has chance
+ * that VF cannot operate its registers. Will
+ * check if the registers is written
+ * successfully. If not, repeat stop/start until
+ * the PF link is up, in other words, until the
+ * registers can be written.
+ */
+ vteiam = IXGBE_READ_REG(hw, IXGBE_VTEIAM);
+ /* Reference ixgbevf_intr_enable when checking */
+ } while (diag || vteiam != IXGBE_VF_IRQ_ENABLE_MASK);
+
+ /**
+ * Release the locks for queues.
+ * Restore the RX/TX functions.
+ */
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_unlock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_unlock(&txq->tx_lock);
+ }
+
+ dev->rx_pkt_burst = adapter->rx_backup;
+ dev->tx_pkt_burst = adapter->tx_backup;
+
+ return 0;
+}
+
+static int
ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev)
{
uint32_t eicr;
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 701107b..d50fad4 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -289,6 +289,8 @@ struct ixgbe_adapter {
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
+ eth_rx_burst_t rx_backup;
+ eth_tx_burst_t tx_backup;
};

#define IXGBE_DEV_PRIVATE_TO_HW(adapter)\
@@ -377,7 +379,7 @@ int ixgbevf_dev_rx_init(struct rte_eth_dev *dev);

void ixgbevf_dev_tx_init(struct rte_eth_dev *dev);

-void ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev);
+int ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev);

uint16_t ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
@@ -409,6 +411,14 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbevf_recv_pkts_fake(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+uint16_t ixgbevf_xmit_pkts_fake(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
uint16_t ixgbe_xmit_pkts_lock(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index a45d115..b4e7659 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -5181,7 +5181,7 @@ ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
/*
* [VF] Start Transmit and Receive Units.
*/
-void __attribute__((cold))
+int __attribute__((cold))
ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
{
struct ixgbe_hw *hw;
@@ -5218,7 +5218,15 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
} while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
+#else
+ {
+ PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
+ if (dev->data->dev_conf.txmode.lock_mode)
+ return -1;
+ }
+#endif
}
for (i = 0; i < dev->data->nb_rx_queues; i++) {

@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
+#else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
rte_wmb();
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);

}
+
+ return 0;
}

/* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
@@ -5290,3 +5308,25 @@ ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
{
return -1;
}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+ixgbevf_recv_pkts_fake(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+ixgbevf_xmit_pkts_fake(void __rte_unused *tx_queue,
+ struct rte_mbuf __rte_unused **tx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
--
2.1.4
Ananyev, Konstantin
2016-06-07 10:03:04 UTC
Permalink
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
---
doc/guides/rel_notes/release_16_07.rst | 9 +++
drivers/net/ixgbe/ixgbe_ethdev.c | 108 ++++++++++++++++++++++++++++++++-
drivers/net/ixgbe/ixgbe_ethdev.h | 12 +++-
drivers/net/ixgbe/ixgbe_rxtx.c | 42 ++++++++++++-
4 files changed, 168 insertions(+), 3 deletions(-)
diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index a761e3c..d36c4b1 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,6 +53,15 @@ New Features
VF. To handle this link up/down event, add the mailbox interruption
support to receive the message.
+* **Added device reset support for ixgbe VF.**
+
+ Added the device reset API. APP can call this API to reset the VF port
+ when it's not working.
+ Based on the mailbox interruption support, when VF reseives the control
+ message from PF, it means the PF link state changes, VF uses the reset
+ callback in the message handler to notice the APP. APP need call the device
+ reset API to reset the VF port.
+
Resolved Issues
---------------
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index fd2682f..1e3520b 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -381,6 +381,8 @@ static int ixgbe_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
struct rte_eth_udp_tunnel *udp_tunnel);
+static int ixgbevf_dev_reset(struct rte_eth_dev *dev);
+
/*
* Define VF Stats MACRO for Non "cleared on read" register
*/
@@ -586,6 +588,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
.reta_query = ixgbe_dev_rss_reta_query,
.rss_hash_update = ixgbe_dev_rss_hash_update,
.rss_hash_conf_get = ixgbe_dev_rss_hash_conf_get,
+ .dev_reset = ixgbevf_dev_reset,
};
/* store statistics names and its offset in stats structure */
@@ -4060,7 +4063,8 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
ETH_VLAN_EXTEND_MASK;
ixgbevf_vlan_offload_set(dev, mask);
- ixgbevf_dev_rxtx_start(dev);
+ if (ixgbevf_dev_rxtx_start(dev))
+ return -1;
/* check and configure queue intr-vector mapping */
if (dev->data->dev_conf.intr_conf.rxq != 0) {
@@ -7193,6 +7197,108 @@ static void ixgbevf_mbx_process(struct rte_eth_dev *dev)
}
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev)
+{
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still need fake functions?
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
Probably worth to create a separate function for the lines above:
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
+
+ /* Performance VF reset. */
+ do {
+ dev->data->dev_started = 0;
+ ixgbevf_dev_stop(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = ixgbe_dev_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Ixgbe VF reset: "
+ "Failed to update link.");
+ }
+ rte_delay_ms(1000);
+
+ diag = ixgbevf_dev_start(dev);
+ /*If fail to start the device, need to stop/start it again. */
+ if (diag) {
+ PMD_INIT_LOG(ERR, "Ixgbe VF reset: "
+ "Failed to start device.");
+ continue;
+ }
+ dev->data->dev_started = 1;
+ ixgbevf_dev_stats_reset(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = ixgbe_dev_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Ixgbe VF reset: "
+ "Failed to update link.");
+ diag = 0;
+ }
+
+ /**
+ * When the PF link is down, there has chance
+ * that VF cannot operate its registers. Will
+ * check if the registers is written
+ * successfully. If not, repeat stop/start until
+ * the PF link is up, in other words, until the
+ * registers can be written.
+ */
+ vteiam = IXGBE_READ_REG(hw, IXGBE_VTEIAM);
+ /* Reference ixgbevf_intr_enable when checking */
+ } while (diag || vteiam != IXGBE_VF_IRQ_ENABLE_MASK);
+
+ /**
+ * Release the locks for queues.
+ * Restore the RX/TX functions.
+ */
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_unlock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_unlock(&txq->tx_lock);
+ }
+
+ dev->rx_pkt_burst = adapter->rx_backup;
+ dev->tx_pkt_burst = adapter->tx_backup;
+
+ return 0;
+}
+
+static int
ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev)
{
uint32_t eicr;
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 701107b..d50fad4 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -289,6 +289,8 @@ struct ixgbe_adapter {
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
+ eth_rx_burst_t rx_backup;
+ eth_tx_burst_t tx_backup;
};
#define IXGBE_DEV_PRIVATE_TO_HW(adapter)\
@@ -377,7 +379,7 @@ int ixgbevf_dev_rx_init(struct rte_eth_dev *dev);
void ixgbevf_dev_tx_init(struct rte_eth_dev *dev);
-void ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev);
+int ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev);
uint16_t ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
@@ -409,6 +411,14 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbevf_recv_pkts_fake(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+uint16_t ixgbevf_xmit_pkts_fake(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
uint16_t ixgbe_xmit_pkts_lock(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index a45d115..b4e7659 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -5181,7 +5181,7 @@ ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
/*
* [VF] Start Transmit and Receive Units.
*/
-void __attribute__((cold))
+int __attribute__((cold))
ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
{
struct ixgbe_hw *hw;
@@ -5218,7 +5218,15 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
} while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
+#else
+ {
+ PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
+ if (dev->data->dev_conf.txmode.lock_mode)
+ return -1;
+ }
+#endif
}
for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
+#else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
Thanks
Konstantin
rte_wmb();
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
}
+
+ return 0;
}
/* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
@@ -5290,3 +5308,25 @@ ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
{
return -1;
}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+ixgbevf_recv_pkts_fake(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+ixgbevf_xmit_pkts_fake(void __rte_unused *tx_queue,
+ struct rte_mbuf __rte_unused **tx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
--
2.1.4
Lu, Wenzhuo
2016-06-08 07:24:22 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 6:03 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce;
Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev) {
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data-
dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still need fake functions?
The fake functions are used to help saving the time of waiting for the locks.
As you see, we want to lock every queue. If we don't use fake functions we have to wait for every queue.
But if the real functions are replaced by fake functions, ideally when we're waiting for the release of the first queue's lock,
the other queues will run into the fake functions. So we need not wait for them and get the locks directly.
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
We're discussing it in the previous thread :)
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i); #else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
As you see this rxtx_start may have chance to fail. I want to expose this failure, so the reset function can try again.
Thanks
Konstantin
Ananyev, Konstantin
2016-06-08 08:42:24 UTC
Permalink
-----Original Message-----
From: Lu, Wenzhuo
Sent: Wednesday, June 08, 2016 8:24 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 6:03 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce;
Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev) {
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data-
dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still need fake functions?
The fake functions are used to help saving the time of waiting for the locks.
As you see, we want to lock every queue. If we don't use fake functions we have to wait for every queue.
But if the real functions are replaced by fake functions, ideally when we're waiting for the release of the first queue's lock,
the other queues will run into the fake functions. So we need not wait for them and get the locks directly.
Well, data-path invokes only try_lock(), so it shouldn't be affected significantly, right?
Control path still have to spin on lock and grab it before it can proceed, if it'll spin a bit longer
I wouldn't see a big deal here.
What I am trying to say - if we'll go that way - introduce sync control/datapath API anyway,
we don't need any additional tricks here with rx/tx function replacement, correct?
So let's keep it clean and simple, after all it is a control path and not need to be lightning fast.
Konstantin
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
We're discussing it in the previous thread :)
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i); #else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
As you see this rxtx_start may have chance to fail. I want to expose this failure, so the reset function can try again.
Thanks
Konstantin
Ananyev, Konstantin
2016-06-08 09:22:25 UTC
Permalink
-----Original Message-----
Sent: Wednesday, June 08, 2016 9:42 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: Re: [dpdk-dev] [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Lu, Wenzhuo
Sent: Wednesday, June 08, 2016 8:24 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 6:03 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce;
Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev) {
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data-
dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still need fake
functions?
The fake functions are used to help saving the time of waiting for the locks.
As you see, we want to lock every queue. If we don't use fake functions we have to wait for every queue.
But if the real functions are replaced by fake functions, ideally when we're waiting for the release of the first queue's lock,
the other queues will run into the fake functions. So we need not wait for them and get the locks directly.
Well, data-path invokes only try_lock(), so it shouldn't be affected significantly, right?
Control path still have to spin on lock and grab it before it can proceed, if it'll spin a bit longer
I wouldn't see a big deal here.
What I am trying to say - if we'll go that way - introduce sync control/datapath API anyway,
we don't need any additional tricks here with rx/tx function replacement, correct?
So let's keep it clean and simple, after all it is a control path and not need to be lightning fast.
Konstantin
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
We're discussing it in the previous thread :)
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i); #else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
As you see this rxtx_start may have chance to fail. I want to expose this failure, so the reset function can try again.
Still not sure I understand what do you mean here...
If you think function should fail here, then why only for lcok enabled, why not to make that change generic?
Thanks
Konstantin
Lu, Wenzhuo
2016-06-12 01:03:12 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Wednesday, June 8, 2016 5:22 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
Sent: Wednesday, June 08, 2016 9:42 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: Re: [dpdk-dev] [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Lu, Wenzhuo
Sent: Wednesday, June 08, 2016 8:24 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 6:03 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson,
Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev) {
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev-
data-
dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still
need fake functions?
The fake functions are used to help saving the time of waiting for the locks.
As you see, we want to lock every queue. If we don't use fake functions we
have to wait for every queue.
But if the real functions are replaced by fake functions, ideally
when we're waiting for the release of the first queue's lock, the other queues
will run into the fake functions. So we need not wait for them and get the locks
directly.
Well, data-path invokes only try_lock(), so it shouldn't be affected significantly,
right?
Control path still have to spin on lock and grab it before it can
proceed, if it'll spin a bit longer I wouldn't see a big deal here.
What I am trying to say - if we'll go that way - introduce sync
control/datapath API anyway, we don't need any additional tricks here with
rx/tx function replacement, correct?
So let's keep it clean and simple, after all it is a control path and not need to be
lightning fast.
Konstantin
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
We're discussing it in the previous thread :)
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev
*dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx
Queue %d",
i); #else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
As you see this rxtx_start may have chance to fail. I want to expose this
failure, so the reset function can try again.
Still not sure I understand what do you mean here...
If you think function should fail here, then why only for lcok enabled, why not to
make that change generic?
O, I should make it generic. I'll change it.
Thanks
Konstantin
Lu, Wenzhuo
2016-06-12 00:58:05 UTC
Permalink
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Wednesday, June 8, 2016 4:42 PM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Lu, Wenzhuo
Sent: Wednesday, June 08, 2016 8:24 AM
Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
Hi Konstantin,
-----Original Message-----
From: Ananyev, Konstantin
Sent: Tuesday, June 7, 2016 6:03 PM
Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
Wu, Jingjing; Zhang, Helin
Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
-----Original Message-----
From: Tao, Zhe
Sent: Tuesday, June 07, 2016 7:53 AM
Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce;
Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.
static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev) {
+ struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data-
dev_private);
+ struct ixgbe_adapter *adapter =
+ (struct ixgbe_adapter *)dev->data->dev_private;
+ int diag = 0;
+ uint32_t vteiam;
+ uint16_t i;
+ struct ixgbe_rx_queue *rxq;
+ struct ixgbe_tx_queue *txq;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
+ dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
If you have locking over each queue underneath, why do you still
need fake functions?
The fake functions are used to help saving the time of waiting for the locks.
As you see, we want to lock every queue. If we don't use fake functions we
have to wait for every queue.
But if the real functions are replaced by fake functions, ideally when
we're waiting for the release of the first queue's lock, the other queues will run
into the fake functions. So we need not wait for them and get the locks directly.
Well, data-path invokes only try_lock(), so it shouldn't be affected significantly, right?
Control path still have to spin on lock and grab it before it can proceed, if it'll
spin a bit longer I wouldn't see a big deal here.
What I am trying to say - if we'll go that way - introduce sync control/datapath
API anyway, we don't need any additional tricks here with rx/tx function
replacement, correct?
So let's keep it clean and simple, after all it is a control path and not need to be
lightning fast.
Konstantin
Agree, it's not necessary to add the fake functions. I'll remove them to make it simple.
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
lock_all_queues(), unlock_all_queues.
But as I sadi in previous mail - I think that code better be in rte_ethdev.
We're discussing it in the previous thread :)
@@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct rte_eth_dev
*dev)
rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
} while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!poll_ms)
+#ifndef RTE_NEXT_ABI
+ PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i); #else
+ {
PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
i);
+ if (dev->data->dev_conf.rxmode.lock_mode)
+ return -1;
+ }
+#endif
Why the code has to be different here?
As you see this rxtx_start may have chance to fail. I want to expose this failure,
so the reset function can try again.
Thanks
Konstantin
Zhe Tao
2016-06-07 06:53:07 UTC
Permalink
Add RX/TX paths with lock for VF. It's used when
the function of link reset on VF is needed.
When the lock for RX/TX is added, the RX/TX can be
stopped. Then we have a chance to reset the VF link.

Please be aware there's performence drop if the lock
path is chosen.

Signed-off-by: Wenzhuo Lu <***@intel.com>
---
drivers/net/e1000/e1000_ethdev.h | 10 ++++++++++
drivers/net/e1000/igb_ethdev.c | 14 +++++++++++---
drivers/net/e1000/igb_rxtx.c | 26 +++++++++++++++++++++-----
3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index e8bf8da..6a42994 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -319,6 +319,16 @@ uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t eth_igb_recv_scattered_pkts(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);

+uint16_t eth_igb_xmit_pkts_lock(void *txq,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t eth_igb_recv_pkts_lock(void *rxq,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t eth_igb_recv_scattered_pkts_lock(void *rxq,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
int eth_igb_rss_hash_update(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index b0e5e6a..8aad741 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -909,15 +909,17 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &igbvf_eth_dev_ops;
- eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
- eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
+ eth_dev->rx_pkt_burst = RX_LOCK_FUNCTION(eth_dev, eth_igb_recv_pkts);
+ eth_dev->tx_pkt_burst = TX_LOCK_FUNCTION(eth_dev, eth_igb_xmit_pkts);

/* for secondary processes, we don't initialise any further as primary
* has already done this work. Only check we don't need a different
* RX function */
if (rte_eal_process_type() != RTE_PROC_PRIMARY){
if (eth_dev->data->scattered_rx)
- eth_dev->rx_pkt_burst = &eth_igb_recv_scattered_pkts;
+ eth_dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(eth_dev,
+ eth_igb_recv_scattered_pkts);
return 0;
}

@@ -1999,7 +2001,13 @@ eth_igb_supported_ptypes_get(struct rte_eth_dev *dev)
};

if (dev->rx_pkt_burst == eth_igb_recv_pkts ||
+#ifndef RTE_NEXT_ABI
dev->rx_pkt_burst == eth_igb_recv_scattered_pkts)
+#else
+ dev->rx_pkt_burst == eth_igb_recv_scattered_pkts ||
+ dev->rx_pkt_burst == eth_igb_recv_pkts_lock ||
+ dev->rx_pkt_burst == eth_igb_recv_scattered_pkts_lock)
+#endif
return ptypes;
return NULL;
}
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 18aeead..7e97330 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -67,6 +67,7 @@
#include <rte_tcp.h>
#include <rte_sctp.h>
#include <rte_string_fns.h>
+#include <rte_spinlock.h>

#include "e1000_logs.h"
#include "base/e1000_api.h"
@@ -107,6 +108,7 @@ struct igb_rx_queue {
struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+ rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
uint16_t nb_rx_desc; /**< number of RX descriptors. */
uint16_t rx_tail; /**< current value of RDT register. */
uint16_t nb_rx_hold; /**< number of held free RX desc. */
@@ -174,6 +176,7 @@ struct igb_tx_queue {
volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
+ rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
uint32_t txd_type; /**< Device-specific TXD type */
uint16_t nb_tx_desc; /**< number of TX descriptors. */
@@ -615,6 +618,8 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_tx;
}

+GENERATE_TX_LOCK(eth_igb_xmit_pkts, igb)
+
/*********************************************************************
*
* RX functions
@@ -931,6 +936,8 @@ eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}

+GENERATE_RX_LOCK(eth_igb_recv_pkts, igb)
+
uint16_t
eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -1186,6 +1193,8 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
return nb_rx;
}

+GENERATE_RX_LOCK(eth_igb_recv_scattered_pkts, igb)
+
/*
* Maximum number of Ring Descriptors.
*
@@ -1344,6 +1353,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
txq->port_id = dev->data->port_id;
+ rte_spinlock_init(&txq->tx_lock);

txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
@@ -1361,7 +1371,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);

igb_reset_tx_queue(txq, dev);
- dev->tx_pkt_burst = eth_igb_xmit_pkts;
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, eth_igb_xmit_pkts);
dev->data->tx_queues[queue_idx] = txq;

return 0;
@@ -1467,6 +1477,7 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
rxq->port_id = dev->data->port_id;
rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
ETHER_CRC_LEN);
+ rte_spinlock_init(&rxq->rx_lock);

/*
* Allocate RX ring hardware descriptors. A memzone large enough to
@@ -2323,7 +2334,7 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)

/* Configure and enable each RX queue. */
rctl_bsize = 0;
- dev->rx_pkt_burst = eth_igb_recv_pkts;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, eth_igb_recv_pkts);
for (i = 0; i < dev->data->nb_rx_queues; i++) {
uint64_t bus_addr;
uint32_t rxdctl;
@@ -2370,7 +2381,9 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG,
"forcing scatter mode");
- dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ eth_igb_recv_scattered_pkts);
dev->data->scattered_rx = 1;
}
} else {
@@ -2381,7 +2394,9 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)
rctl_bsize = buf_size;
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
- dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev,
+ eth_igb_recv_scattered_pkts);
dev->data->scattered_rx = 1;
}

@@ -2414,7 +2429,8 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)
if (dev->data->dev_conf.rxmode.enable_scatter) {
if (!dev->data->scattered_rx)
PMD_INIT_LOG(DEBUG, "forcing scatter mode");
- dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
+ dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(dev, eth_igb_recv_scattered_pkts);
dev->data->scattered_rx = 1;
}
--
2.1.4
Zhe Tao
2016-06-07 06:53:08 UTC
Permalink
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks and restore the RX/TX functions.

BTW: The definition of some structures are moved from .c
file to .h file.

Signed-off-by: Wenzhuo Lu <***@intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 2 +-
drivers/net/e1000/e1000_ethdev.h | 116 ++++++++++++++++++++++++++++++
drivers/net/e1000/igb_ethdev.c | 104 +++++++++++++++++++++++++++
drivers/net/e1000/igb_rxtx.c | 128 ++++++---------------------------
4 files changed, 243 insertions(+), 107 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index d36c4b1..a4c0cc3 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,7 +53,7 @@ New Features
VF. To handle this link up/down event, add the mailbox interruption
support to receive the message.

-* **Added device reset support for ixgbe VF.**
+* **Added device reset support for ixgbe/igb VF.**

Added the device reset API. APP can call this API to reset the VF port
when it's not working.
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6a42994..4ae03ce 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -34,6 +34,7 @@
#ifndef _E1000_ETHDEV_H_
#define _E1000_ETHDEV_H_
#include <rte_time.h>
+#include <rte_spinlock.h>

/* need update link, bit flag */
#define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -261,6 +262,113 @@ struct e1000_adapter {
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
+ eth_rx_burst_t rx_backup;
+ eth_tx_burst_t tx_backup;
+};
+
+/**
+ * Structure associated with each descriptor of the RX ring of a RX queue.
+ */
+struct igb_rx_entry {
+ struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct igb_tx_entry {
+ struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
+ uint16_t next_id; /**< Index of next descriptor in ring. */
+ uint16_t last_id; /**< Index of last scattered descriptor. */
+};
+
+/**
+ * Hardware context number
+ */
+enum igb_advctx_num {
+ IGB_CTX_0 = 0, /**< CTX0 */
+ IGB_CTX_1 = 1, /**< CTX1 */
+ IGB_CTX_NUM = 2, /**< CTX_NUM */
+};
+
+/** Offload features */
+union igb_tx_offload {
+ uint64_t data;
+ struct {
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
+ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size. */
+
+ /* uint64_t unused:8; */
+ };
+};
+
+/**
+ * Strucutre to check if new context need be built
+ */
+struct igb_advctx_info {
+ uint64_t flags; /**< ol_flags related to context build. */
+ /** tx offload: vlan, tso, l2-l3-l4 lengths. */
+ union igb_tx_offload tx_offload;
+ /** compare mask for tx offload. */
+ union igb_tx_offload tx_offload_mask;
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct igb_rx_queue {
+ struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
+ volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
+ uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
+ volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
+ volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
+ struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
+ struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+ struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+ rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
+ uint16_t nb_rx_desc; /**< number of RX descriptors. */
+ uint16_t rx_tail; /**< current value of RDT register. */
+ uint16_t nb_rx_hold; /**< number of held free RX desc. */
+ uint16_t rx_free_thresh; /**< max free RX desc to hold. */
+ uint16_t queue_id; /**< RX queue index. */
+ uint16_t reg_idx; /**< RX queue register index. */
+ uint8_t port_id; /**< Device port identifier. */
+ uint8_t pthresh; /**< Prefetch threshold register. */
+ uint8_t hthresh; /**< Host threshold register. */
+ uint8_t wthresh; /**< Write-back threshold register. */
+ uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
+ uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
+};
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct igb_tx_queue {
+ volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
+ uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
+ struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
+ volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
+ rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
+ uint32_t txd_type; /**< Device-specific TXD type */
+ uint16_t nb_tx_desc; /**< number of TX descriptors. */
+ uint16_t tx_tail; /**< Current value of TDT register. */
+ uint16_t tx_head;
+ /**< Index of first used TX descriptor. */
+ uint16_t queue_id; /**< TX queue index. */
+ uint16_t reg_idx; /**< TX queue register index. */
+ uint8_t port_id; /**< Device port identifier. */
+ uint8_t pthresh; /**< Prefetch threshold register. */
+ uint8_t hthresh; /**< Host threshold register. */
+ uint8_t wthresh; /**< Write-back threshold register. */
+ uint32_t ctx_curr;
+ /**< Current used hardware descriptor. */
+ uint32_t ctx_start;
+ /**< Start context position for transmit queue. */
+ struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
+ /**< Hardware context history.*/
};

#define E1000_DEV_PRIVATE(adapter) \
@@ -316,6 +424,14 @@ uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igbvf_xmit_pkts_fake(void *txq,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
+uint16_t eth_igbvf_recv_pkts_fake(void *rxq,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
uint16_t eth_igb_recv_scattered_pkts(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 8aad741..4b78a25 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -268,6 +268,7 @@ static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
static void eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
void *param);
static void igbvf_mbx_process(struct rte_eth_dev *dev);
+static int igbvf_dev_reset(struct rte_eth_dev *dev);

/*
* Define VF Stats MACRO for Non "cleared on read" register
@@ -409,6 +410,7 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
.mac_addr_set = igbvf_default_mac_addr_set,
.get_reg_length = igbvf_get_reg_length,
.get_reg = igbvf_get_regs,
+ .dev_reset = igbvf_dev_reset,
};

/* store statistics names and its offset in stats structure */
@@ -2663,6 +2665,108 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
}

static int
+igbvf_dev_reset(struct rte_eth_dev *dev)
+{
+ struct e1000_adapter *adapter =
+ (struct e1000_adapter *)dev->data->dev_private;
+ struct e1000_hw *hw =
+ E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ int diag = 0;
+ uint32_t eiam;
+ uint16_t i;
+ struct igb_rx_queue *rxq;
+ struct igb_tx_queue *txq;
+ /* Reference igbvf_intr_enable */
+ uint32_t eiam_mbx = 1 << E1000_VTIVAR_MISC_MAILBOX;
+
+ /* Nothing needs to be done if the device is not started. */
+ if (!dev->data->dev_started)
+ return 0;
+
+ PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ adapter->rx_backup = dev->rx_pkt_burst;
+ adapter->tx_backup = dev->tx_pkt_burst;
+ dev->rx_pkt_burst = eth_igbvf_recv_pkts_fake;
+ dev->tx_pkt_burst = eth_igbvf_xmit_pkts_fake;
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
+
+ /* Performance VF reset. */
+ do {
+ dev->data->dev_started = 0;
+ igbvf_dev_stop(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = eth_igb_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Igb VF reset: "
+ "Failed to update link.");
+ }
+ rte_delay_ms(1000);
+
+ diag = igbvf_dev_start(dev);
+ if (diag) {
+ PMD_INIT_LOG(ERR, "Igb VF reset: "
+ "Failed to start device.");
+ return diag;
+ }
+ dev->data->dev_started = 1;
+ eth_igbvf_stats_reset(dev);
+ if (dev->data->dev_conf.intr_conf.lsc == 0)
+ diag = eth_igb_link_update(dev, 0);
+ if (diag) {
+ PMD_INIT_LOG(INFO, "Igb VF reset: "
+ "Failed to update link.");
+ }
+
+ /**
+ * When the PF link is down, there has chance
+ * that VF cannot operate its registers. Will
+ * check if the registers is written
+ * successfully. If not, repeat stop/start until
+ * the PF link is up, in other words, until the
+ * registers can be written.
+ */
+ eiam = E1000_READ_REG(hw, E1000_EIAM);
+ } while (!(eiam & eiam_mbx));
+
+ /**
+ * Release the locks for queues.
+ * Restore the RX/TX functions.
+ */
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_unlock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_unlock(&txq->tx_lock);
+ }
+
+ dev->rx_pkt_burst = adapter->rx_backup;
+ dev->tx_pkt_burst = adapter->tx_backup;
+
+ return 0;
+}
+
+static int
eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
{
struct e1000_interrupt *intr =
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 7e97330..5af7173 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -67,7 +67,6 @@
#include <rte_tcp.h>
#include <rte_sctp.h>
#include <rte_string_fns.h>
-#include <rte_spinlock.h>

#include "e1000_logs.h"
#include "base/e1000_api.h"
@@ -80,72 +79,6 @@
PKT_TX_L4_MASK | \
PKT_TX_TCP_SEG)

-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct igb_rx_entry {
- struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-/**
- * Structure associated with each descriptor of the TX ring of a TX queue.
- */
-struct igb_tx_entry {
- struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
- uint16_t next_id; /**< Index of next descriptor in ring. */
- uint16_t last_id; /**< Index of last scattered descriptor. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct igb_rx_queue {
- struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
- volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
- uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
- volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
- volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
- struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
- struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
- struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
- rte_spinlock_t rx_lock; /**< Lock for packet receiption. */
- uint16_t nb_rx_desc; /**< number of RX descriptors. */
- uint16_t rx_tail; /**< current value of RDT register. */
- uint16_t nb_rx_hold; /**< number of held free RX desc. */
- uint16_t rx_free_thresh; /**< max free RX desc to hold. */
- uint16_t queue_id; /**< RX queue index. */
- uint16_t reg_idx; /**< RX queue register index. */
- uint8_t port_id; /**< Device port identifier. */
- uint8_t pthresh; /**< Prefetch threshold register. */
- uint8_t hthresh; /**< Host threshold register. */
- uint8_t wthresh; /**< Write-back threshold register. */
- uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
- uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
-};
-
-/**
- * Hardware context number
- */
-enum igb_advctx_num {
- IGB_CTX_0 = 0, /**< CTX0 */
- IGB_CTX_1 = 1, /**< CTX1 */
- IGB_CTX_NUM = 2, /**< CTX_NUM */
-};
-
-/** Offload features */
-union igb_tx_offload {
- uint64_t data;
- struct {
- uint64_t l3_len:9; /**< L3 (IP) Header Length. */
- uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
- uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
- uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
- uint64_t tso_segsz:16; /**< TCP TSO segment size. */
-
- /* uint64_t unused:8; */
- };
-};
-
/*
* Compare mask for igb_tx_offload.data,
* should be in sync with igb_tx_offload layout.
@@ -158,45 +91,6 @@ union igb_tx_offload {
#define TX_TSO_CMP_MASK \
(TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)

-/**
- * Strucutre to check if new context need be built
- */
-struct igb_advctx_info {
- uint64_t flags; /**< ol_flags related to context build. */
- /** tx offload: vlan, tso, l2-l3-l4 lengths. */
- union igb_tx_offload tx_offload;
- /** compare mask for tx offload. */
- union igb_tx_offload tx_offload_mask;
-};
-
-/**
- * Structure associated with each TX queue.
- */
-struct igb_tx_queue {
- volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
- uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
- struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
- rte_spinlock_t tx_lock; /**< Lock for packet transmission. */
- volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
- uint32_t txd_type; /**< Device-specific TXD type */
- uint16_t nb_tx_desc; /**< number of TX descriptors. */
- uint16_t tx_tail; /**< Current value of TDT register. */
- uint16_t tx_head;
- /**< Index of first used TX descriptor. */
- uint16_t queue_id; /**< TX queue index. */
- uint16_t reg_idx; /**< TX queue register index. */
- uint8_t port_id; /**< Device port identifier. */
- uint8_t pthresh; /**< Prefetch threshold register. */
- uint8_t hthresh; /**< Host threshold register. */
- uint8_t wthresh; /**< Write-back threshold register. */
- uint32_t ctx_curr;
- /**< Current used hardware descriptor. */
- uint32_t ctx_start;
- /**< Start context position for transmit queue. */
- struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
- /**< Hardware context history.*/
-};
-
#if 1
#define RTE_PMD_USE_PREFETCH
#endif
@@ -2530,3 +2424,25 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
qinfo->conf.tx_thresh.hthresh = txq->hthresh;
qinfo->conf.tx_thresh.wthresh = txq->wthresh;
}
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+eth_igbvf_xmit_pkts_fake(void __rte_unused *tx_queue,
+ struct rte_mbuf __rte_unused **tx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+eth_igbvf_recv_pkts_fake(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
--
2.1.4
Zhe Tao
2016-06-07 06:53:09 UTC
Permalink
Add RX/TX paths with lock for VF. It's used when
the function of link reset on VF is needed.
When the lock for RX/TX is added, the RX/TX can be
stopped. Then we have a chance to reset the VF link.

Please be aware there's performence drop if the lock
path is chosen.

Signed-off-by: Zhe Tao <***@intel.com>
---
drivers/net/i40e/i40e_ethdev.c | 4 ++--
drivers/net/i40e/i40e_ethdev.h | 4 ++++
drivers/net/i40e/i40e_ethdev_vf.c | 4 ++--
drivers/net/i40e/i40e_rxtx.c | 45 +++++++++++++++++++++++++--------------
drivers/net/i40e/i40e_rxtx.h | 30 ++++++++++++++++++++++++++
5 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 24777d5..1380330 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -764,8 +764,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
PMD_INIT_FUNC_TRACE();

dev->dev_ops = &i40e_eth_dev_ops;
- dev->rx_pkt_burst = i40e_recv_pkts;
- dev->tx_pkt_burst = i40e_xmit_pkts;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_pkts);
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, i40e_xmit_pkts);

/* for secondary processes, we don't initialise any further as primary
* has already done this work. Only check we don't need a different
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index cfd2399..672d920 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -540,6 +540,10 @@ struct i40e_adapter {
struct rte_timecounter systime_tc;
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;
+
+ /* For VF reset backup */
+ eth_rx_burst_t rx_backup;
+ eth_tx_burst_t tx_backup;
};

int i40e_dev_switch_queues(struct i40e_pf *pf, bool on);
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 90682ac..46d8a7c 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1451,8 +1451,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)

/* assign ops func pointer */
eth_dev->dev_ops = &i40evf_eth_dev_ops;
- eth_dev->rx_pkt_burst = &i40e_recv_pkts;
- eth_dev->tx_pkt_burst = &i40e_xmit_pkts;
+ eth_dev->rx_pkt_burst = RX_LOCK_FUNCTION(eth_dev, i40e_recv_pkts);
+ eth_dev->tx_pkt_burst = TX_LOCK_FUNCTION(eth_dev, i40e_xmit_pkts);

/*
* For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index c833aa3..0a6dcfb 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -79,10 +79,6 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

-static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
- struct rte_mbuf **tx_pkts,
- uint16_t nb_pkts);
-
static inline void
i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
{
@@ -1144,7 +1140,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
return 0;
}

-static uint16_t
+uint16_t
i40e_recv_pkts_bulk_alloc(void *rx_queue,
struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -1169,7 +1165,7 @@ i40e_recv_pkts_bulk_alloc(void *rx_queue,
return nb_rx;
}
#else
-static uint16_t
+uint16_t
i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
struct rte_mbuf __rte_unused **rx_pkts,
uint16_t __rte_unused nb_pkts)
@@ -1892,7 +1888,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
return nb_pkts;
}

-static uint16_t
+uint16_t
i40e_xmit_pkts_simple(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
@@ -2121,10 +2117,13 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
};

if (dev->rx_pkt_burst == i40e_recv_pkts ||
+ dev->rx_pkt_burst == i40e_recv_pkts_lock ||
#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc ||
+ dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc_lock ||
#endif
- dev->rx_pkt_burst == i40e_recv_scattered_pkts)
+ dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
+ dev->rx_pkt_burst == i40e_recv_scattered_pkts_lock)
return ptypes;
return NULL;
}
@@ -2648,6 +2647,7 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)

rxq->rxrearm_start = 0;
rxq->rxrearm_nb = 0;
+ rte_spinlock_init(&rxq->rx_lock);
}

void
@@ -2704,6 +2704,7 @@ i40e_reset_tx_queue(struct i40e_tx_queue *txq)

txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
+ rte_spinlock_init(&txq->tx_lock);
}

/* Init the TX queue in hardware */
@@ -3155,12 +3156,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
"callback (port=%d).",
dev->data->port_id);

- dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_scattered_pkts_vec);
} else {
PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
"allocation callback (port=%d).",
dev->data->port_id);
- dev->rx_pkt_burst = i40e_recv_scattered_pkts;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_scattered_pkts);
}
/* If parameters allow we are going to choose between the following
* callbacks:
@@ -3174,27 +3175,29 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
RTE_I40E_DESCS_PER_LOOP,
dev->data->port_id);

- dev->rx_pkt_burst = i40e_recv_pkts_vec;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_pkts_vec);
} else if (ad->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
"satisfied. Rx Burst Bulk Alloc function "
"will be used on port=%d.",
dev->data->port_id);

- dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_pkts_bulk_alloc);
} else {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
"satisfied, or Scattered Rx is requested "
"(port=%d).",
dev->data->port_id);

- dev->rx_pkt_burst = i40e_recv_pkts;
+ dev->rx_pkt_burst = RX_LOCK_FUNCTION(dev, i40e_recv_pkts);
}

/* Propagate information about RX function choice through all queues. */
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
rx_using_sse =
(dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
+ dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_lock ||
+ dev->rx_pkt_burst == i40e_recv_pkts_vec_lock ||
dev->rx_pkt_burst == i40e_recv_pkts_vec);

for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -3250,14 +3253,14 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
if (ad->tx_simple_allowed) {
if (ad->tx_vec_allowed) {
PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
- dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, i40e_xmit_pkts_vec);
} else {
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
- dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, i40e_xmit_pkts_simple);
}
} else {
PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
- dev->tx_pkt_burst = i40e_xmit_pkts;
+ dev->tx_pkt_burst = TX_LOCK_FUNCTION(dev, i40e_xmit_pkts);
}
}

@@ -3311,3 +3314,13 @@ i40e_xmit_pkts_vec(void __rte_unused *tx_queue,
{
return 0;
}
+
+GENERATE_RX_LOCK(i40e_recv_pkts, i40e)
+GENERATE_RX_LOCK(i40e_recv_pkts_vec, i40e)
+GENERATE_RX_LOCK(i40e_recv_pkts_bulk_alloc, i40e)
+GENERATE_RX_LOCK(i40e_recv_scattered_pkts, i40e)
+GENERATE_RX_LOCK(i40e_recv_scattered_pkts_vec, i40e)
+
+GENERATE_TX_LOCK(i40e_xmit_pkts, i40e)
+GENERATE_TX_LOCK(i40e_xmit_pkts_vec, i40e)
+GENERATE_TX_LOCK(i40e_xmit_pkts_simple, i40e)
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 98179f0..a1c13b8 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -140,6 +140,7 @@ struct i40e_rx_queue {
bool rx_deferred_start; /**< don't start this queue in dev start */
uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
uint8_t dcb_tc; /**< Traffic class of rx queue */
+ rte_spinlock_t rx_lock; /**< lock for rx path */
};

struct i40e_tx_entry {
@@ -181,6 +182,7 @@ struct i40e_tx_queue {
bool q_set; /**< indicate if tx queue has been configured */
bool tx_deferred_start; /**< don't start this queue in dev start */
uint8_t dcb_tc; /**< Traffic class of tx queue */
+ rte_spinlock_t tx_lock; /**< lock for tx path */
};

/** Offload features */
@@ -223,6 +225,27 @@ uint16_t i40e_recv_scattered_pkts(void *rx_queue,
uint16_t i40e_xmit_pkts(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t i40e_xmit_pkts_lock(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_xmit_pkts_simple(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_xmit_pkts_simple_lock(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_recv_pkts_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_recv_scattered_pkts_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_recv_pkts_bulk_alloc(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_recv_pkts_bulk_alloc_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
int i40e_tx_queue_init(struct i40e_tx_queue *txq);
int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
void i40e_free_tx_resources(struct i40e_tx_queue *txq);
@@ -244,12 +267,19 @@ uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
+uint16_t i40e_recv_pkts_vec_lock(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t i40e_recv_scattered_pkts_vec_lock(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
int i40e_rxq_vec_setup(struct i40e_rx_queue *rxq);
int i40e_txq_vec_setup(struct i40e_tx_queue *txq);
void i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq);
uint16_t i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t i40e_xmit_pkts_vec_lock(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
void i40e_set_rx_function(struct rte_eth_dev *dev);
void i40e_set_tx_function_flag(struct rte_eth_dev *dev,
struct i40e_tx_queue *txq);
--
2.1.4
Zhe Tao
2016-06-07 06:53:10 UTC
Permalink
Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
the RX/TX functions with the fake ones and getting the
locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
release the locks.

Signed-off-by: Zhe Tao <***@intel.com>
---
doc/guides/rel_notes/release_16_07.rst | 5 ++
drivers/net/i40e/i40e_ethdev.h | 7 +-
drivers/net/i40e/i40e_ethdev_vf.c | 152 ++++++++++++++++++++++++++++++++-
drivers/net/i40e/i40e_rxtx.c | 10 +++
drivers/net/i40e/i40e_rxtx.h | 4 +
5 files changed, 172 insertions(+), 6 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index a4c0cc3..f43b867 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -62,6 +62,11 @@ New Features
callback in the message handler to notice the APP. APP need call the device
reset API to reset the VF port.

+* **Added VF reset support for i40e VF driver.**
+
+ Added a new implementaion to allow i40e VF driver to
+ reset the functionality and state of itself.
+

Resolved Issues
---------------
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 672d920..dcd6e0f 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -541,9 +541,8 @@ struct i40e_adapter {
struct rte_timecounter rx_tstamp_tc;
struct rte_timecounter tx_tstamp_tc;

- /* For VF reset backup */
- eth_rx_burst_t rx_backup;
- eth_tx_burst_t tx_backup;
+ /* For VF reset */
+ uint8_t reset_number;
};

int i40e_dev_switch_queues(struct i40e_pf *pf, bool on);
@@ -597,6 +596,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
struct rte_eth_txq_info *qinfo);

+void i40evf_emulate_vf_reset(uint8_t port_id);
+
/* I40E_DEV_PRIVATE_TO */
#define I40E_DEV_PRIVATE_TO_PF(adapter) \
(&((struct i40e_adapter *)adapter)->pf)
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 46d8a7c..d873d2d 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -157,6 +157,12 @@ i40evf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
static void i40evf_handle_pf_event(__rte_unused struct rte_eth_dev *dev,
uint8_t *msg,
uint16_t msglen);
+static int i40evf_dev_uninit(struct rte_eth_dev *eth_dev);
+static int i40evf_dev_init(struct rte_eth_dev *eth_dev);
+static void i40evf_dev_close(struct rte_eth_dev *dev);
+static int i40evf_dev_start(struct rte_eth_dev *dev);
+static int i40evf_dev_configure(struct rte_eth_dev *dev);
+static int i40evf_handle_vf_reset(struct rte_eth_dev *dev);

/* Default hash key buffer for RSS */
static uint32_t rss_key_default[I40E_VFQF_HKEY_MAX_INDEX + 1];
@@ -223,6 +229,7 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = {
.reta_query = i40evf_dev_rss_reta_query,
.rss_hash_update = i40evf_dev_rss_hash_update,
.rss_hash_conf_get = i40evf_dev_rss_hash_conf_get,
+ .dev_reset = i40evf_handle_vf_reset
};

/*
@@ -1309,6 +1316,140 @@ i40evf_uninit_vf(struct rte_eth_dev *dev)
}

static void
+i40e_vf_queue_reset(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
+
+ if (rxq->q_set) {
+ i40e_dev_rx_queue_setup(dev,
+ rxq->queue_id,
+ rxq->nb_rx_desc,
+ rxq->socket_id,
+ &rxq->rxconf,
+ rxq->mp);
+ }
+
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_trylock(&rxq->rx_lock);
+ }
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct i40e_tx_queue *txq = dev->data->tx_queues[i];
+
+ if (txq->q_set) {
+ i40e_dev_tx_queue_setup(dev,
+ txq->queue_id,
+ txq->nb_tx_desc,
+ txq->socket_id,
+ &txq->txconf);
+ }
+
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_trylock(&txq->tx_lock);
+ }
+}
+
+static void
+i40e_vf_reset_dev(struct rte_eth_dev *dev)
+{
+ struct i40e_adapter *adapter =
+ I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+ i40evf_dev_close(dev);
+ PMD_DRV_LOG(DEBUG, "i40evf dev close complete");
+ i40evf_dev_uninit(dev);
+ PMD_DRV_LOG(DEBUG, "i40evf dev detached");
+ memset(dev->data->dev_private, 0,
+ (uint64_t)&adapter->reset_number - (uint64_t)adapter);
+
+ i40evf_dev_configure(dev);
+ i40evf_dev_init(dev);
+ PMD_DRV_LOG(DEBUG, "i40evf dev attached");
+ i40e_vf_queue_reset(dev);
+ PMD_DRV_LOG(DEBUG, "i40evf queue reset");
+ i40evf_dev_start(dev);
+ PMD_DRV_LOG(DEBUG, "i40evf dev restart");
+}
+
+static uint16_t
+i40evf_recv_pkts_detach(void __rte_unused *rx_queue,
+ struct rte_mbuf __rte_unused **rx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+static uint16_t
+i40evf_xmit_pkts_detach(void __rte_unused *tx_queue,
+ struct rte_mbuf __rte_unused **tx_pkts,
+ uint16_t __rte_unused nb_pkts)
+{
+ return 0;
+}
+
+static int
+i40evf_handle_vf_reset(struct rte_eth_dev *dev)
+{
+ struct i40e_adapter *adapter =
+ I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+ uint16_t i = 0;
+ struct i40e_rx_queue *rxq;
+ struct i40e_tx_queue *txq;
+
+ if (!dev->data->dev_started)
+ return 0;
+
+ adapter->reset_number = 1;
+
+ /**
+ * Stop RX/TX by fake functions and locks.
+ * Fake functions are used to make RX/TX lock easier.
+ */
+ dev->rx_pkt_burst = i40evf_recv_pkts_detach;
+ dev->tx_pkt_burst = i40evf_xmit_pkts_detach;
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_lock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_lock(&txq->tx_lock);
+ }
+
+ i40e_vf_reset_dev(dev);
+
+ adapter->reset_number = 0;
+
+ if (dev->data->rx_queues)
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxq = dev->data->rx_queues[i];
+ rte_spinlock_unlock(&rxq->rx_lock);
+ }
+
+ if (dev->data->tx_queues)
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txq = dev->data->tx_queues[i];
+ rte_spinlock_unlock(&txq->tx_lock);
+ }
+
+ return 0;
+}
+
+void
+i40evf_emulate_vf_reset(uint8_t port_id)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ i40evf_handle_vf_reset(dev);
+}
+
+static void
i40evf_handle_pf_event(__rte_unused struct rte_eth_dev *dev,
uint8_t *msg,
__rte_unused uint16_t msglen)
@@ -1446,13 +1587,18 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(\
eth_dev->data->dev_private);
struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+ struct i40e_adapter *adapter =
+ I40E_DEV_PRIVATE_TO_ADAPTER(eth_dev->data->dev_private);

PMD_INIT_FUNC_TRACE();
-
/* assign ops func pointer */
eth_dev->dev_ops = &i40evf_eth_dev_ops;
- eth_dev->rx_pkt_burst = RX_LOCK_FUNCTION(eth_dev, i40e_recv_pkts);
- eth_dev->tx_pkt_burst = TX_LOCK_FUNCTION(eth_dev, i40e_xmit_pkts);
+ if (adapter->reset_number) {
+ eth_dev->rx_pkt_burst =
+ RX_LOCK_FUNCTION(eth_dev, i40e_recv_pkts);
+ eth_dev->tx_pkt_burst =
+ TX_LOCK_FUNCTION(eth_dev, i40e_xmit_pkts);
+ }

/*
* For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 0a6dcfb..f6caf4e 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2147,6 +2147,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t len, i;
uint16_t base, bsf, tc_mapping;
int use_def_burst_func = 1;
+ struct rte_eth_rxconf conf = *rx_conf;

if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
struct i40e_vf *vf =
@@ -2185,6 +2186,8 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
rxq->mp = mp;
+ rxq->socket_id = socket_id;
+ rxq->rxconf = conf;
rxq->nb_rx_desc = nb_desc;
rxq->rx_free_thresh = rx_conf->rx_free_thresh;
rxq->queue_id = queue_idx;
@@ -2364,6 +2367,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
uint32_t ring_size;
uint16_t tx_rs_thresh, tx_free_thresh;
uint16_t i, base, bsf, tc_mapping;
+ struct rte_eth_txconf conf = *tx_conf;

if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
struct i40e_vf *vf =
@@ -2487,6 +2491,8 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
}

txq->nb_tx_desc = nb_desc;
+ txq->socket_id = socket_id;
+ txq->txconf = conf;
txq->tx_rs_thresh = tx_rs_thresh;
txq->tx_free_thresh = tx_free_thresh;
txq->pthresh = tx_conf->tx_thresh.pthresh;
@@ -2951,8 +2957,12 @@ void
i40e_dev_free_queues(struct rte_eth_dev *dev)
{
uint16_t i;
+ struct i40e_adapter *adapter =
+ I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);

PMD_INIT_FUNC_TRACE();
+ if (adapter->reset_number)
+ return;

for (i = 0; i < dev->data->nb_rx_queues; i++) {
i40e_dev_rx_queue_release(dev->data->rx_queues[i]);
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index a1c13b8..7ee33dc 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -141,6 +141,8 @@ struct i40e_rx_queue {
uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
uint8_t dcb_tc; /**< Traffic class of rx queue */
rte_spinlock_t rx_lock; /**< lock for rx path */
+ uint8_t socket_id;
+ struct rte_eth_rxconf rxconf;
};

struct i40e_tx_entry {
@@ -183,6 +185,8 @@ struct i40e_tx_queue {
bool tx_deferred_start; /**< don't start this queue in dev start */
uint8_t dcb_tc; /**< Traffic class of tx queue */
rte_spinlock_t tx_lock; /**< lock for tx path */
+ uint8_t socket_id;
+ struct rte_eth_txconf txconf;
};

/** Offload features */
--
2.1.4
Continue reading on narkive:
Loading...