Discussion:
[dpdk-dev] [PATCH v3 06/11] net/failsafe: add flexible device definition
(too old to reply)
Gaetan Rivet
2017-05-24 15:20:04 UTC
Permalink
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 20 +++++++
drivers/net/failsafe/failsafe_args.c | 99 +++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ether.c | 7 +++
drivers/net/failsafe/failsafe_private.h | 4 ++
4 files changed, 130 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index c04891a..1b6e110 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,19 @@ Fail-safe command line parameters
additional sub-device parameters if need be. They will be passed on to the
sub-device.

+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
- **mac** parameter [MAC address]

This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +139,13 @@ This section shows some example of using **testpmd** with a fail-safe PMD.
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
-i

+#. Start testpmd using a flexible device definition
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
Using the Fail-safe PMD from an application
-------------------------------------------

diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 8f334aa..c723ca3 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -30,6 +30,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include <assert.h>
#include <string.h>
#include <errno.h>

@@ -96,6 +98,75 @@ fs_parse_device(struct sub_device *sdev, char *args)
return 0;
}

+static void
+fs_sanitize_cmdline(char *args)
+{
+ size_t len;
+
+ len = strnlen(args, DEVARGS_MAXLEN);
+ args[len - 1] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ sdev->cmdline = new_str;
+ snprintf(sdev->cmdline, len, "%s", cmdline);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ old_err = errno;
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = errno;
+ ERROR("popen: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ return -ENODEV;
+ }
+ fs_sanitize_cmdline(output);
+ ret = fs_parse_device(sdev, output);
+ if (ret) {
+ ERROR("Parsing device '%s' failed", output);
+ goto ret_pclose;
+ }
+ret_pclose:
+ ret = pclose(fp);
+ if (ret) {
+ ret = errno;
+ ERROR("pclose: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ return ret;
+}
+
static int
fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -130,6 +201,14 @@ fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
ret = fs_parse_device(sdev, args);
if (ret)
goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
} else {
ERROR("Unrecognized device type: %.*s", (int)b, param);
return -EINVAL;
@@ -331,6 +410,8 @@ failsafe_args_free(struct rte_eth_dev *dev)
uint8_t i;

FOREACH_SUBDEV(sdev, i, dev) {
+ rte_free(sdev->cmdline);
+ sdev->cmdline = NULL;
free(sdev->devargs.args);
sdev->devargs.args = NULL;
}
@@ -361,3 +442,21 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
return fs_parse_sub_devices(fs_count_device,
dev, params);
}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7910952..2a1535e 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -188,6 +188,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
int ret;
uint8_t i;

+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ return ret;
+
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8fb72fe..554d7a3 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -44,6 +44,7 @@
#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
+ "exec(<shell command>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
@@ -87,6 +88,8 @@ struct sub_device {
struct rte_eth_dev *edev;
/* Device state machine */
enum dev_state state;
+ /* Some device are defined as a command line */
+ char *cmdline;
};

struct fs_priv {
@@ -135,6 +138,7 @@ uint16_t failsafe_tx_burst(void *txq,
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);

/* EAL */
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:06 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload = Y
+L4 checksum offload = Y
+Inner L3 checksum = Y
+Inner L4 checksum = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4cb2e90..5fb0135 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,149 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
- /* Set of understood capabilities */
- .rx_offload_capa = 0x0,
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
};

+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ * 0: all requested capabilities are supported by the sub_device
+ * positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev_info infos = {0};
+ struct rte_eth_conf *cf;
+ uint32_t cap;
+
+ cf = &dev->data->dev_conf;
+ SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+ /* RX capabilities */
+ cap = infos.rx_offload_capa;
+ if (cf->rxmode.hw_vlan_strip &&
+ ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+ WARN("VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_VLAN_STRIP;
+ }
+ if (cf->rxmode.hw_ip_checksum &&
+ ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+ WARN("IP checksum offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM;
+ }
+ if (cf->rxmode.enable_lro &&
+ ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+ WARN("TCP LRO offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_TCP_LRO;
+ }
+ if (cf->rxmode.hw_vlan_extend &&
+ ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+ WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_QINQ_STRIP;
+ }
+ /* TX capabilities */
+ /* Nothing to do, no tx capa supported */
+ return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+ uint32_t ol_cap)
+{
+ switch (ol_cap) {
+ case DEV_RX_OFFLOAD_VLAN_STRIP:
+ INFO("Disabling VLAN stripping offload");
+ cf->rxmode.hw_vlan_strip = 0;
+ break;
+ case DEV_RX_OFFLOAD_IPV4_CKSUM:
+ case DEV_RX_OFFLOAD_UDP_CKSUM:
+ case DEV_RX_OFFLOAD_TCP_CKSUM:
+ case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+ INFO("Disabling IP checksum offload");
+ cf->rxmode.hw_ip_checksum = 0;
+ break;
+ case DEV_RX_OFFLOAD_TCP_LRO:
+ INFO("Disabling TCP LRO offload");
+ cf->rxmode.enable_lro = 0;
+ break;
+ case DEV_RX_OFFLOAD_QINQ_STRIP:
+ INFO("Disabling stacked VLAN stripping offload");
+ cf->rxmode.hw_vlan_extend = 0;
+ break;
+ default:
+ DEBUG("Unable to disable offload capability: %" PRIx32,
+ ol_cap);
+ return -1;
+ }
+ return 0;
+}
+
static int
fs_dev_configure(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
+ int capa_flag;
int ret;

FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state != DEV_PROBED)
continue;
+ DEBUG("Checking capabilities for sub_device %d", i);
+ while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
+ /*
+ * Refuse to change configuration if multiple devices
+ * are present and we already have configured at least
+ * some of them.
+ */
+ if (PRIV(dev)->state >= DEV_ACTIVE &&
+ PRIV(dev)->subs_tail > 1) {
+ ERROR("device already configured, cannot fix live configuration");
+ return -1;
+ }
+ ret = fs_port_disable_offload(&dev->data->dev_conf,
+ capa_flag);
+ if (ret) {
+ ERROR("Unable to disable offload capability");
+ return ret;
+ }
+ }
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
--
2.1.4
Gaetan Rivet
2017-05-24 15:19:59 UTC
Permalink
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 19 ++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 10 ++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ff4f5ab..c8c5e73 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1976,6 +1976,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1991,7 +1992,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);

- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}

int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 0f38b45..33cc48c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -374,6 +374,14 @@ enum rte_vlan_type {
};

/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1738,6 +1746,8 @@ struct rte_eth_dev_data {
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
const char *drv_name; /**< Driver name */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};

/** Device supports hotplug detach */
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:02 UTC
Permalink
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefit from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 133 +++++++
doc/guides/nics/features/failsafe.ini | 24 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 72 ++++
drivers/net/failsafe/failsafe.c | 231 +++++++++++
drivers/net/failsafe/failsafe_args.c | 331 ++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 160 ++++++++
drivers/net/failsafe/failsafe_ops.c | 663 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 227 +++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 107 ++++++
mk/rte.app.mk | 1 +
14 files changed, 1963 insertions(+)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index afb4cab..497a2cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -333,6 +333,11 @@ F: drivers/net/enic/
F: doc/guides/nics/enic.rst
F: doc/guides/nics/features/enic.ini

+Fail-safe PMD
+M: Gaetan Rivet <***@6wind.com>
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
Intel e1000
M: Wenzhuo Lu <***@intel.com>
F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 8907bea..ba3c879 100644
--- a/config/common_base
+++ b/config/common_base
@@ -425,6 +425,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_NULL=y

#
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 0000000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+.. BSD LICENSE
+ Copyright 2017 6WIND S.A.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of 6WIND S.A. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the primary has been
+removed from the system.
+
+.. note::
+
+ The library is enabled by default. You can enable it or disable it manually
+ by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option.
+
+Features
+--------
+
+The Fail-safe PMD only supports a limited set of features. If you plan to use a
+device underneath the Fail-safe PMD with a specific feature, this feature must
+be supported by the Fail-safe PMD to avoid throwing any error.
+
+Check the feature matrix for the complete set of supported features.
+
+Compilation options
+-------------------
+
+These options can be modified in the ``$RTE_TARGET/build/.config`` file.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**)
+
+ Toggle compiling librte_pmd_failsafe itself.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG`` (default **n**)
+
+ Toggle debugging code.
+
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
+
+Fail-safe command line parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **dev(<iface>)** parameter
+
+ This parameter allows the user to define a sub-device. The ``<iface>`` part of
+ this parameter must be a valid device definition. It could be the argument
+ provided to a ``-w`` PCI device specification or the argument that would be
+ given to a ``--vdev`` parameter (including a fail-safe).
+ Enclosing the device definition within parenthesis here allows using
+ additional sub-device parameters if need be. They will be passed on to the
+ sub-device.
+
+- **mac** parameter [MAC address]
+
+ This parameter allows the user to set a default MAC address to the fail-safe
+ and all of its sub-devices.
+ If no default mac address is provided, the fail-safe PMD will read the MAC
+ address of the first of its sub-device to be successfully probed and use it as
+ its default MAC address, trying to set it to all of its other sub-devices.
+ If no sub-device was successfully probed at initialization, then a random MAC
+ address is generated, that will be subsequently applied to all sub-device once
+ they are probed.
+
+Usage example
+~~~~~~~~~~~~~
+
+This section shows some example of using **testpmd** with a fail-safe PMD.
+
+#. Request huge pages:
+
+ .. code-block:: console
+
+ echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+#. Start testpmd
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
+ -i
+
+Using the Fail-safe PMD from an application
+-------------------------------------------
+
+This driver strives to be as seamless as possible to existing applications, in
+order to propose the hotplug functionality in the easiest way possible.
+
+Care must be taken, however, to respect the **ether** API concerning device
+access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
+over ethernet devices, instead of directly accessing them or by writing one's
+own device iterator.
diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
new file mode 100644
index 0000000..3c52823
--- /dev/null
+++ b/doc/guides/nics/features/failsafe.ini
@@ -0,0 +1,24 @@
+;
+; Supported features of the 'fail-safe' poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status = Y
+Queue start/stop = Y
+MTU update = Y
+Jumbo frame = Y
+Promiscuous mode = Y
+Allmulticast mode = Y
+Unicast MAC filter = Y
+Multicast MAC filter = Y
+VLAN filter = Y
+Packet type parsing = Y
+Basic stats = Y
+Stats per queue = Y
+ARMv7 = Y
+ARMv8 = Y
+Power8 = Y
+x86-32 = Y
+x86-64 = Y
+Usage doc = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 240d082..17eaaf4 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -64,6 +64,7 @@ Network Interface Controller Drivers
vhost
vmxnet3
pcap_ring
+ fail_safe

**Figures**

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 35ed813..d33c959 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -59,6 +59,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
DEPDIRS-ena = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
DEPDIRS-enic = $(core-libs) librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
+DEPDIRS-failsafe = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
DEPDIRS-fm10k = $(core-libs) librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
new file mode 100644
index 0000000..06199ad
--- /dev/null
+++ b/drivers/net/failsafe/Makefile
@@ -0,0 +1,72 @@
+# BSD LICENSE
+#
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+
+# No exported include files
+
+# This lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_mbuf
+
+ifneq ($(DEBUG),)
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG := y
+endif
+
+# Basic CFLAGS:
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -I.
+CFLAGS += -D_BSD_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic -DPEDANTIC
+
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG),y)
+CFLAGS += -g -UNDEBUG
+else
+CFLAGS += -O3
+CFLAGS += -DNDEBUG
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
new file mode 100644
index 0000000..7cf33e8
--- /dev/null
+++ b/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_vdev.h>
+
+#include "failsafe_private.h"
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_SPEED_AUTONEG,
+};
+
+static int
+fs_sub_device_create(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ params = rte_vdev_device_args(vdev);
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u",
+ socket_id);
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = dev->data->dev_private;
+ PRIV(dev)->dev = dev;
+ dev->dev_ops = &failsafe_ops;
+ TAILQ_INIT(&dev->link_intr_cbs);
+ dev->data->dev_flags = 0x0;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ goto free_dev;
+ }
+ ret = fs_sub_device_create(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto free_args;
+ mac = &dev->data->mac_addrs[0];
+ if (mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto free_args;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ return 0;
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ rte_free(PRIV(dev));
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (vdev == NULL)
+ return -EINVAL;
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (name == NULL)
+ return -EINVAL;
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_ALIAS(net_failsafe, eth_failsafe);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 0000000..f07d26e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <errno.h>
+
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "failsafe_private.h"
+
+#define DEVARGS_MAXLEN 4096
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+int mac_from_arg;
+
+const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_eal_devargs_parse(args, d);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0')
+ i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
+ if (params[b] == '(') {
+ size_t start = b;
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ snprintf(params, DEVARGS_MAXLEN, "%s", buffer);
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ if (dev == NULL || params == NULL)
+ return -EINVAL;
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ mac_from_arg = 0;
+ n = snprintf(mut_params, sizeof(mut_params), "%s", params);
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+ mac_from_arg = 1;
+ }
+ }
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) &&
+ strncmp(param, "exec", b)) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 0000000..087d4f3
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,160 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev *
+fs_eth_dev_from_name(const char *name)
+{
+ struct rte_eth_dev *dev;
+ uint8_t i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ dev = &rte_eth_devices[i];
+ if (!strcmp(dev->device->name, name))
+ return dev;
+ }
+ return NULL;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_device device;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ device.name = sdev->devargs.name;
+ device.devargs = &sdev->devargs;
+ if (!sdev->bus->attach) {
+ ERROR("Bus %s used for sub_device %d does not support hotplug, skipping",
+ sdev->bus->name, i);
+ return -EINVAL;
+ }
+ ret = sdev->bus->attach(&device);
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ errno ? "(" : "",
+ errno ? strerror(errno) : "",
+ errno ? ")" : "");
+ continue;
+ }
+ ETH(sdev) = fs_eth_dev_from_name(device.name);
+ if (ETH(sdev) == NULL) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ sdev->dev = ETH(sdev)->device;
+ ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ /*
+ * We only update TX_SUBDEV if we are not started.
+ * If a sub_device is emitting, we will switch the TX_SUBDEV to the
+ * preferred port only upon starting it, so that the switch is smoother.
+ */
+ if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
+ TX_SUBDEV(dev) == NULL) {
+ /* Using first probed device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ if (!sdev->bus->detach) {
+ ERROR("Bus does not support device removal for sub_device %u (%s)",
+ i, sdev->devargs.name);
+ continue;
+ }
+ ret = sdev->bus->detach(sdev->dev);
+ if (ret) {
+ ERROR("Failed to remove requested device %s",
+ sdev->devargs.name);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 0000000..693162e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,663 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ .driver_name = pmd_failsafe_driver_name,
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /* Set of understood capabilities */
+ .rx_offload_capa = 0x0,
+ .tx_offload_capa = 0x0,
+ .flow_type_rss_offloads = 0x0,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ ERROR("Could not configure sub_device %d", i);
+ return ret;
+ }
+ sdev->state = DEV_ACTIVE;
+ }
+ return 0;
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret)
+ return ret;
+ sdev->state = DEV_STARTED;
+ }
+ if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
+ TX_SUBDEV(dev) == NULL) {
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ DEBUG("Switching tx_dev to sub_device %d", i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_STARTED - 1;
+ }
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = rxq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL)
+ return -ENOMEM;
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if (ret) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ return ret;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = txq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL)
+ return -ENOMEM;
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if (ret) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ if (TX_SUBDEV(dev) == NULL)
+ return;
+ rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_stats_reset(PORT_ID(sdev));
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint32_t rx_offload_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads &=
+ default_infos.flow_type_rss_offloads;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return NULL;
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
+ return NULL;
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return 0;
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL)
+ return -ENOTSUP;
+ return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ assert(index < FAILSAFE_MAX_ETHADDR);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ return 0;
+}
+
+static void
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 0000000..e7a7592
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <rte_dev.h>
+#include <rte_ethdev.h>
+#include <rte_devargs.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "mac=mac_addr" \
+ ""
+
+#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+/* TYPES */
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* id of last sub_device polled */
+ uint8_t last_polled;
+ unsigned int socket_id;
+ struct rte_eth_rxq_info info;
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+};
+
+enum dev_state {
+ DEV_UNDEFINED = 0,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct rte_devargs devargs;
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_eth_dev *edev;
+ /* Device state machine */
+ enum dev_state state;
+};
+
+struct fs_priv {
+ struct rte_eth_dev *dev;
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs;
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+};
+
+/* RX / TX */
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern int mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->edev)
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ (ETH(sdev)->data->port_id)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_ST(s, i, dev, state) \
+ for (i = fs_find_next((dev), 0, state); \
+ i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]); \
+ i = fs_find_next((dev), i + 1, state))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_ST(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+#ifndef NDEBUG
+#include <stdio.h>
+#define DEBUG__(m, ...) \
+ (fprintf(stderr, "%s:%d: %s(): " m "%c", \
+ __FILE__, __LINE__, __func__, __VA_ARGS__), \
+ (void)0)
+#define DEBUG_(...) \
+ (errno = ((int []){ \
+ *(volatile int *)&errno, \
+ (DEBUG__(__VA_ARGS__), 0) \
+ })[0])
+#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define DEBUG(...) ((void)0)
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, "WARNING: " __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, "ERROR: " __VA_ARGS__)
+#endif
+
+/* inlined functions */
+
+static inline uint8_t
+fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
+ enum dev_state min_state)
+{
+ while (sid < PRIV(dev)->subs_tail) {
+ if (PRIV(dev)->subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ if (sid >= PRIV(dev)->subs_tail)
+ return PRIV(dev)->subs_tail;
+ return sid;
+}
+
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 0000000..a45b4e5
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ if (unlikely(ETH(sdev) == NULL))
+ continue;
+ if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
+ continue;
+ if (unlikely(sdev->state != DEV_STARTED))
+ continue;
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ if (unlikely(sdev == NULL))
+ return 0;
+ if (unlikely(ETH(sdev) == NULL))
+ return 0;
+ if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bcaf1b3..e5743c4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,6 +120,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:09 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
drivers/net/failsafe/failsafe_ether.c | 12 ++++++++++++
drivers/net/failsafe/failsafe_flow.c | 29 +++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 4 ++++
3 files changed, 45 insertions(+)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 3401a18..3a691a1 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -218,6 +218,18 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
DEBUG("VLAN filter already set");
}
/* rte_flow */
+ if (!PRIV(dev)->flow_isolated) {
+ DEBUG("Flow isolation already disabled");
+ } else {
+ DEBUG("Enabling flow isolation");
+ ret = rte_flow_isolate(PORT_ID(sdev),
+ PRIV(dev)->flow_isolated,
+ &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
DEBUG("rte_flow already set");
} else {
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index d8f59a1..a5598ae 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -207,10 +207,39 @@ fs_flow_query(struct rte_eth_dev *dev,
return -1;
}

+static int
+fs_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state < DEV_PROBED)
+ continue;
+ DEBUG("Calling rte_flow_isolate on sub_device %d", i);
+ if (PRIV(dev)->flow_isolated != sdev->flow_isolated)
+ WARN("flow isolation mode of sub_device %d in incoherent state.",
+ i);
+ ret = rte_flow_isolate(PORT_ID(sdev), set, error);
+ if (ret) {
+ ERROR("Operation rte_flow_isolate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ sdev->flow_isolated = set;
+ }
+ PRIV(dev)->flow_isolated = set;
+ return 0;
+}
+
const struct rte_flow_ops fs_flow_ops = {
.validate = fs_flow_validate,
.create = fs_flow_create,
.destroy = fs_flow_destroy,
.flush = fs_flow_flush,
.query = fs_flow_query,
+ .isolate = fs_flow_isolate,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 66303cd..a3885e8 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -109,6 +109,8 @@ struct sub_device {
struct rte_eth_dev *fs_dev;
/* flag calling for recollection */
volatile unsigned int remove:1;
+ /* flow isolation state */
+ int flow_isolated:1;
};

struct fs_priv {
@@ -140,6 +142,8 @@ struct fs_priv {
*/
enum dev_state state;
unsigned int pending_alarm:1; /* An alarm is pending */
+ /* flow isolation state */
+ int flow_isolated:1;
};

/* MISC */
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:01 UTC
Permalink
Make the rte_eth_dev_count() return the number of available devices even
after some are detached by the hotplug API or put in a deferred state.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 16 +++++++++-------
lib/librte_ether/rte_ethdev.h | 13 ++++++-------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index f592643..172c8ec 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;

/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -207,7 +206,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev_last_created_port = port_id;
- nb_ports++;

return eth_dev;
}
@@ -280,7 +278,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;

eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}

@@ -305,7 +302,15 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}

int
@@ -337,9 +342,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}

- if (!nb_ports)
- return -ENODEV;
-
*port_id = RTE_MAX_ETHPORTS;
RTE_ETH_FOREACH_DEV(i) {
if (!strncmp(name,
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 161b0ff..44f7e83 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1788,13 +1788,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);

/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:03 UTC
Permalink
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 19 +++
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 71 ++++++++++
drivers/net/failsafe/failsafe_args.c | 32 +++++
drivers/net/failsafe/failsafe_eal.c | 30 +----
drivers/net/failsafe/failsafe_ether.c | 228 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 25 ++--
drivers/net/failsafe/failsafe_private.h | 60 ++++++++-
8 files changed, 423 insertions(+), 43 deletions(-)
create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..c04891a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
address is generated, that will be subsequently applied to all sub-device once
they are probed.

+- **hotplug_poll** parameter [UINT64] (default **2000**)
+
+ This parameter allows the user to configure the amount of time in milliseconds
+ between two slave upkeep round.
+
Usage example
~~~~~~~~~~~~~

@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API concerning device
access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
over ethernet devices, instead of directly accessing them or by writing one's
own device iterator.
+
+Plug-in feature
+---------------
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 06199ad..4567961 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -40,6 +40,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7cf33e8..888f07b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -80,6 +80,72 @@ fs_sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
}

+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ if (PRIV(dev)->pending_alarm) {
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ ret = failsafe_eth_dev_state_sync(dev);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
static int
fs_eth_dev_create(struct rte_vdev_device *vdev)
{
@@ -128,6 +194,11 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
ret = failsafe_eal_init(dev);
if (ret)
goto free_args;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto free_args;
+ }
mac = &dev->data->mac_addrs[0];
if (mac_from_arg) {
/*
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index f07d26e..8f334aa 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -45,9 +45,11 @@
typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
uint8_t head);

+uint64_t hotplug_poll;
int mac_from_arg;

const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
PMD_FAILSAFE_MAC_KVARG,
NULL,
};
@@ -221,6 +223,24 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
}

static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
fs_get_mac_addr_arg(const char *key __rte_unused,
const char *value, void *out)
{
@@ -252,6 +272,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
ret = 0;
priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
/* default parameters */
+ hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
mac_from_arg = 0;
n = snprintf(mut_params, sizeof(mut_params), "%s", params);
if (n >= sizeof(mut_params)) {
@@ -274,6 +295,16 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
PMD_FAILSAFE_PARAM_STRING);
return -1;
}
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
+ &fs_get_u64_arg, &hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
/* MAC addr */
arg_count = rte_kvargs_count(kvlist,
PMD_FAILSAFE_MAC_KVARG);
@@ -287,6 +318,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
mac_from_arg = 1;
}
}
+ PRIV(dev)->state = DEV_PARSED;
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 087d4f3..d7b12e2 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -90,37 +90,14 @@ fs_bus_init(struct rte_eth_dev *dev)
int
failsafe_eal_init(struct rte_eth_dev *dev)
{
- struct sub_device *sdev;
- uint8_t i;
int ret;

ret = fs_bus_init(dev);
if (ret)
return ret;
- /*
- * We only update TX_SUBDEV if we are not started.
- * If a sub_device is emitting, we will switch the TX_SUBDEV to the
- * preferred port only upon starting it, so that the switch is smoother.
- */
- if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
- TX_SUBDEV(dev) == NULL) {
- /* Using first probed device */
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
- DEBUG("Switching tx_dev to sub_device %d",
- i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev);
return 0;
}

@@ -156,5 +133,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
ret = fs_bus_uninit(dev);
if (ret)
return ret;
+ PRIV(dev)->state = DEV_PROBED - 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 0000000..7910952
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state == DEV_PROBED)
+ inactive |= UINT32_C(1) << i;
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ return ret;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ /* TODO: disable device */
+ return ret;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 693162e..4044473 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -89,6 +89,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
sdev->state = DEV_ACTIVE;
}
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
return 0;
}

@@ -108,21 +110,9 @@ fs_dev_start(struct rte_eth_dev *dev)
return ret;
sdev->state = DEV_STARTED;
}
- if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
- TX_SUBDEV(dev) == NULL) {
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
- DEBUG("Switching tx_dev to sub_device %d", i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_STARTED)
+ PRIV(dev)->state = DEV_STARTED;
+ fs_switch_dev(dev);
return 0;
}

@@ -132,6 +122,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ PRIV(dev)->state = DEV_STARTED - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_STARTED - 1;
@@ -183,6 +174,10 @@ fs_dev_close(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Closing sub_device %d", i);
rte_eth_dev_close(PORT_ID(sdev));
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index e7a7592..8fb72fe 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -41,12 +41,14 @@
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"

#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
- "mac=mac_addr" \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
""

-#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -105,8 +107,22 @@ struct fs_priv {
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ unsigned int pending_alarm:1; /* An alarm is pending */
};

+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
/* RX / TX */

uint16_t failsafe_rx_burst(void *rxq,
@@ -125,10 +141,15 @@ int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);

+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+
/* GLOBALS */

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern uint64_t hotplug_poll;
extern int mac_from_arg;

/* HELPERS */
@@ -224,4 +245,39 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+static inline void
+fs_switch_dev(struct rte_eth_dev *dev)
+{
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ if (PREFERRED_SUBDEV(dev)->state >= req_state) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (req_state == DEV_STARTED) ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
+ TX_SUBDEV(dev) == NULL) {
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ } else {
+ return;
+ }
+ rte_wmb();
+}
+
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:07 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 14 ++++
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_args.c | 13 +++
drivers/net/failsafe/failsafe_eal.c | 3 +-
drivers/net/failsafe/failsafe_ether.c | 93 +++++++++++++++++++--
drivers/net/failsafe/failsafe_ops.c | 38 ++++++++-
drivers/net/failsafe/failsafe_private.h | 81 +++++++++++++++++--
drivers/net/failsafe/failsafe_rxtx.c | 139 +++++++++++++++++++++++++++-----
8 files changed, 343 insertions(+), 39 deletions(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 1b6e110..4154f0a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.

+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
Check the feature matrix for the complete set of supported features.

Compilation options
@@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to
apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+----------------
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6557255..4d35860 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg)
if (!PRIV(dev)->pending_alarm)
return;
PRIV(dev)->pending_alarm = 0;
+ failsafe_dev_remove(dev);
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
break;
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index c723ca3..dd55aaf 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}

+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else
+ ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 1d9ddab..4e5d70e 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -81,6 +81,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
+ sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@@ -98,7 +99,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2958207..bb6fcff 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -250,6 +250,64 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}

+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ if (sdev->bus->detach == NULL ||
+ sdev->bus->detach(sdev->dev)) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ /* the end */
+ break;
+ }
+ failsafe_hotplug_alarm_install(sdev->fs_dev);
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->remove && fs_rxtx_clean(sdev))
+ fs_dev_remove(sdev);
+}
+
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@@ -263,13 +321,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)

ret = failsafe_args_parse_subs(dev);
if (ret)
- return ret;
+ goto err_remove;

if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
- return ret;
+ goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@@ -278,15 +336,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
- return ret;
+ goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
- /* TODO: disable device */
- return ret;
+ goto err_remove;
}
}
}
@@ -300,6 +357,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
- return ret;
+ goto err_remove;
return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+void
+failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct sub_device *sdev = cb_arg;
+
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(sdev->fs_dev, sdev);
+ /* Use safe bursts in any case. */
+ set_burst_fn(sdev->fs_dev, 1);
+ usleep(FAILSAFE_PLUGOUT_ASYNC_RESCHED_US);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 5fb0135..2e1c798 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -33,6 +33,8 @@

#include <assert.h>
#include <stdint.h>
+
+#include <rte_atomic.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_flow.h>
@@ -204,9 +206,21 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
}
FOREACH_SUBDEV(sdev, i, dev) {
+ int rmv_interrupt = 0;
+
if (sdev->state != DEV_PROBED)
continue;
+
+ rmv_interrupt = ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_RMV;
+ if (rmv_interrupt) {
+ DEBUG("Enabling RMV interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.rmv = 1;
+ } else {
+ DEBUG("sub_device %d does not support RMV event", i);
+ }
DEBUG("Configuring sub-device %d", i);
+ sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
@@ -215,6 +229,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
ERROR("Could not configure sub_device %d", i);
return ret;
}
+ if (rmv_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to register RMV callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.rmv = 0;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
@@ -240,7 +264,7 @@ fs_dev_start(struct rte_eth_dev *dev)
}
if (PRIV(dev)->state < DEV_STARTED)
PRIV(dev)->state = DEV_STARTED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

@@ -351,10 +375,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
fs_rx_queue_release(rxq);
dev->data->rx_queues[rx_queue_id] = NULL;
}
- rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ rxq = rte_zmalloc(NULL,
+ sizeof(*rxq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&rxq->refcnt[i]);
rxq->qid = rx_queue_id;
rxq->socket_id = socket_id;
rxq->info.mp = mb_pool;
@@ -414,10 +442,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
fs_tx_queue_release(txq);
dev->data->tx_queues[tx_queue_id] = NULL;
}
- txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ txq = rte_zmalloc("ethdev TX queue",
+ sizeof(*txq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&txq->refcnt[i]);
txq->qid = tx_queue_id;
txq->socket_id = socket_id;
txq->info.conf = *tx_conf;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f40ea2f..ca20109 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -36,6 +36,7 @@

#include <sys/queue.h>

+#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -52,6 +53,7 @@
""

#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_PLUGOUT_ASYNC_RESCHED_US 100

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -65,6 +67,7 @@ struct rxq {
uint8_t last_polled;
unsigned int socket_id;
struct rte_eth_rxq_info info;
+ rte_atomic64_t refcnt[];
};

struct txq {
@@ -72,6 +75,7 @@ struct txq {
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
+ rte_atomic64_t refcnt[];
};

struct rte_flow {
@@ -101,6 +105,10 @@ struct sub_device {
enum dev_state state;
/* Some device are defined as a command line */
char *cmdline;
+ /* fail-safe device backreference */
+ struct rte_eth_dev *fs_dev;
+ /* flag calling for recollection */
+ volatile unsigned int remove:1;
};

struct fs_priv {
@@ -141,11 +149,18 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);

/* RX / TX */

+void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
/* ARGS */

int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -161,6 +176,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */

int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_dev_remove(struct rte_eth_dev *dev);
+void failsafe_eth_rmv_event_callback(uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *arg);

/* GLOBALS */

@@ -226,6 +245,39 @@ extern int mac_from_arg;
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)

+/**
+ * Atomic guard
+ */
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_P(a) \
+ rte_atomic64_add(&(a), 1)
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_V(a) \
+ rte_atomic64_sub(&(a), 1)
+
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_RX(s, i) \
+ rte_atomic64_read( \
+ &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
+ )
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_TX(s, i) \
+ rte_atomic64_read( \
+ &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
+ )
+
#ifndef NDEBUG
#include <stdio.h>
#define DEBUG__(m, ...) \
@@ -267,38 +319,51 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+/*
+ * Switch emitting device.
+ * If banned is set, banned must not be considered for
+ * the role of emitting device.
+ */
static inline void
-fs_switch_dev(struct rte_eth_dev *dev)
+fs_switch_dev(struct rte_eth_dev *dev,
+ struct sub_device *banned)
{
+ struct sub_device *txd;
enum dev_state req_state;

req_state = PRIV(dev)->state;
- if (PREFERRED_SUBDEV(dev)->state >= req_state) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
+ txd = TX_SUBDEV(dev);
+ if (PREFERRED_SUBDEV(dev)->state >= req_state &&
+ PREFERRED_SUBDEV(dev) != banned) {
+ if (txd != PREFERRED_SUBDEV(dev) &&
+ (txd == NULL ||
(req_state == DEV_STARTED) ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ (txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
- } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
- TX_SUBDEV(dev) == NULL) {
+ } else if ((txd && txd->state < req_state) ||
+ txd == NULL ||
+ txd == banned) {
struct sub_device *sdev;
uint8_t i;

/* Using acceptable device */
FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ if (sdev == banned)
+ continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
- } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ } else if (txd && txd->state < req_state) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
} else {
return;
}
+ set_burst_fn(dev, 0);
rte_wmb();
}

diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..18d5efd 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -31,16 +31,64 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <assert.h>
+
+#include <rte_atomic.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>

#include "failsafe_private.h"

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+fs_sdev_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+static inline int
+fs_sdev_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_sdev_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_sdev_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
uint16_t
failsafe_rx_burst(void *queue,
struct rte_mbuf **rx_pkts,
@@ -63,15 +111,49 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
- if (unlikely(ETH(sdev) == NULL))
- continue;
- if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
- continue;
- if (unlikely(sdev->state != DEV_STARTED))
+ if (unlikely(fs_sdev_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ assert(!fs_sdev_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -80,11 +162,6 @@ failsafe_rx_burst(void *queue,
return 0;
}

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
uint16_t
failsafe_tx_burst(void *queue,
struct rte_mbuf **tx_pkts,
@@ -93,15 +170,35 @@ failsafe_tx_burst(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
- if (unlikely(sdev == NULL))
- return 0;
- if (unlikely(ETH(sdev) == NULL))
- return 0;
- if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ if (unlikely(fs_sdev_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
+}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+ uint16_t nb_tx;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ assert(!fs_sdev_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:00 UTC
Permalink
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 3 ++-
lib/librte_ether/rte_ethdev.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index c8c5e73..f592643 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -288,7 +288,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 33cc48c..161b0ff 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1652,6 +1652,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};

/**
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:08 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 15 +++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 23 +++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 3 +++
5 files changed, 43 insertions(+)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 257f579..251ce55 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -5,6 +5,7 @@
;
[Features]
Link status = Y
+Link status event = Y
Queue start/stop = Y
MTU update = Y
Jumbo frame = Y
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 4d35860..151f823 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -240,6 +240,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
mac->addr_bytes[0], mac->addr_bytes[1],
mac->addr_bytes[2], mac->addr_bytes[3],
mac->addr_bytes[4], mac->addr_bytes[5]);
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
free_args:
failsafe_args_free(dev);
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index bb6fcff..3401a18 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -384,3 +384,18 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
*/
sdev->remove = 1;
}
+
+void
+failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ int ret;
+
+ ret = dev->dev_ops->link_update(dev, 0);
+ /* We must pass on the LSC event */
+ if (ret)
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 2e1c798..05221bc 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -207,6 +207,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
FOREACH_SUBDEV(sdev, i, dev) {
int rmv_interrupt = 0;
+ int lsc_interrupt = 0;
+ int lsc_enabled;

if (sdev->state != DEV_PROBED)
continue;
@@ -219,6 +221,17 @@ fs_dev_configure(struct rte_eth_dev *dev)
} else {
DEBUG("sub_device %d does not support RMV event", i);
}
+ lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
+ lsc_interrupt = lsc_enabled &&
+ (ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_LSC);
+ if (lsc_interrupt) {
+ DEBUG("Enabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 1;
+ } else if (lsc_enabled && !lsc_interrupt) {
+ DEBUG("Disabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ }
DEBUG("Configuring sub-device %d", i);
sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
@@ -239,6 +252,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
SUB_ID(sdev));
}
dev->data->dev_conf.intr_conf.rmv = 0;
+ if (lsc_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_LSC,
+ failsafe_eth_lsc_event_callback,
+ dev);
+ if (ret)
+ WARN("Failed to register LSC callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index ca20109..66303cd 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -180,6 +180,9 @@ void failsafe_dev_remove(struct rte_eth_dev *dev);
void failsafe_eth_rmv_event_callback(uint8_t port_id,
enum rte_eth_event_type type,
void *arg);
+void failsafe_eth_lsc_event_callback(uint8_t port_id,
+ enum rte_eth_event_type event,
+ void *cb_arg);

/* GLOBALS */
--
2.1.4
Gaetan Rivet
2017-05-24 15:20:05 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode = Y
Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
+Flow API = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 4567961..a53bb75 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -41,6 +41,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 888f07b..6557255 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -177,6 +177,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index d7b12e2..1d9ddab 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -80,6 +80,7 @@ fs_bus_init(struct rte_eth_dev *dev)
ERROR("sub_device %d init went wrong", i);
return -ENODEV;
}
+ SUB_ID(sdev) = i;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2a1535e..2958207 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@

#include <unistd.h>

+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
#include "failsafe_private.h"

+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
uint32_t i;
int ret;

@@ -177,6 +217,36 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
} else {
DEBUG("VLAN filter already set");
}
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ &flow->fd->attr,
+ flow->fd->items,
+ flow->fd->actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 0000000..d8f59a1
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,216 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ size_t fdsz;
+
+ fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+ flow = rte_zmalloc(NULL,
+ sizeof(struct rte_flow) + fdsz,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+ if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
+ ERROR("Failed to copy flow description");
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free((*flow)->fd);
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (ret) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if (local_ret) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ rte_free(flow);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if (ret) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ return rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)], type, arg, error);
+ }
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+};
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4044473..4cb2e90 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
+#include <rte_flow.h>

#include "failsafe_private.h"

@@ -628,6 +629,33 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
}

+static int
+fs_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
+ ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
const struct eth_dev_ops failsafe_ops = {
.dev_configure = fs_dev_configure,
.dev_start = fs_dev_start,
@@ -655,4 +683,5 @@ const struct eth_dev_ops failsafe_ops = {
.mac_addr_remove = fs_mac_addr_remove,
.mac_addr_add = fs_mac_addr_add,
.mac_addr_set = fs_mac_addr_set,
+ .filter_ctrl = fs_filter_ctrl,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 554d7a3..f40ea2f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,8 @@
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_

+#include <sys/queue.h>
+
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -72,6 +74,14 @@ struct txq {
struct rte_eth_txq_info info;
};

+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_desc *fd;
+};
+
enum dev_state {
DEV_UNDEFINED = 0,
DEV_PARSED,
@@ -86,6 +96,7 @@ struct sub_device {
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
+ uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Some device are defined as a command line */
@@ -104,6 +115,8 @@ struct fs_priv {
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
@@ -153,6 +166,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;

@@ -170,6 +184,10 @@ extern int mac_from_arg;
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)

+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:12 UTC
Permalink
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.

The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.

Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).

Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.

Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.

This patchset depends on:

eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/

ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/

ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/

v1 --> v2:

- Wrote documentation
- Fixed commit logs, signed-off-by
- Added LSC event support
- A few minor fixes

v2 --> v3:

- Numerous bug fixes.
- Complete sub-EAL rework to follow new bus API.
- burst protection on sub removal.
- more flexible sub definition.
- flow isolated mode support.

v3 --> v4:

- Split back commits
net/failsafe: add fast burst functions
net/failsafe: support device removal
That were squashed by error during a rebase
- Fix segfault on port plugin
- Fix isolate mode support for MLX4 ports plugin

Gaetan Rivet (12):
ethdev: save VLAN filter setting
ethdev: add deferred intermediate device state
ethdev: count devices consistently
net/failsafe: add fail-safe PMD
net/failsafe: add plug-in support
net/failsafe: add flexible device definition
net/failsafe: support flow API
net/failsafe: support offload capabilities
net/failsafe: add fast burst functions
net/failsafe: support device removal
net/failsafe: support link status change event
net/failsafe: support flow API isolation mode

MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 186 +++++++
doc/guides/nics/features/failsafe.ini | 32 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 74 +++
drivers/net/failsafe/failsafe.c | 305 +++++++++++
drivers/net/failsafe/failsafe_args.c | 475 +++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 140 +++++
drivers/net/failsafe/failsafe_ether.c | 431 ++++++++++++++++
drivers/net/failsafe/failsafe_flow.c | 245 +++++++++
drivers/net/failsafe/failsafe_ops.c | 869 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 377 ++++++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 204 ++++++++
lib/librte_ether/rte_ethdev.c | 38 +-
lib/librte_ether/rte_ethdev.h | 24 +-
mk/rte.app.mk | 1 +
18 files changed, 3399 insertions(+), 16 deletions(-)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ether.c
create mode 100644 drivers/net/failsafe/failsafe_flow.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:13 UTC
Permalink
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 19 ++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 10 ++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ff4f5ab..c8c5e73 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1976,6 +1976,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1991,7 +1992,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);

- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}

int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 0f38b45..33cc48c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -374,6 +374,14 @@ enum rte_vlan_type {
};

/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1738,6 +1746,8 @@ struct rte_eth_dev_data {
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
const char *drv_name; /**< Driver name */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};

/** Device supports hotplug detach */
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:14 UTC
Permalink
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 3 ++-
lib/librte_ether/rte_ethdev.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index c8c5e73..f592643 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -288,7 +288,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 33cc48c..161b0ff 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1652,6 +1652,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};

/**
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:15 UTC
Permalink
Make the rte_eth_dev_count() return the number of available devices even
after some are detached by the hotplug API or put in a deferred state.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 16 +++++++++-------
lib/librte_ether/rte_ethdev.h | 13 ++++++-------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index f592643..172c8ec 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;

/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -207,7 +206,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev_last_created_port = port_id;
- nb_ports++;

return eth_dev;
}
@@ -280,7 +278,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;

eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}

@@ -305,7 +302,15 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}

int
@@ -337,9 +342,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}

- if (!nb_ports)
- return -ENODEV;
-
*port_id = RTE_MAX_ETHPORTS;
RTE_ETH_FOREACH_DEV(i) {
if (!strncmp(name,
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 161b0ff..44f7e83 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1788,13 +1788,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);

/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:16 UTC
Permalink
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefit from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 133 +++++++
doc/guides/nics/features/failsafe.ini | 24 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 72 ++++
drivers/net/failsafe/failsafe.c | 231 +++++++++++
drivers/net/failsafe/failsafe_args.c | 331 ++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 160 ++++++++
drivers/net/failsafe/failsafe_ops.c | 663 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 227 +++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 107 ++++++
mk/rte.app.mk | 1 +
14 files changed, 1963 insertions(+)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index afb4cab..497a2cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -333,6 +333,11 @@ F: drivers/net/enic/
F: doc/guides/nics/enic.rst
F: doc/guides/nics/features/enic.ini

+Fail-safe PMD
+M: Gaetan Rivet <***@6wind.com>
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
Intel e1000
M: Wenzhuo Lu <***@intel.com>
F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 8907bea..ba3c879 100644
--- a/config/common_base
+++ b/config/common_base
@@ -425,6 +425,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_NULL=y

#
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 0000000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+.. BSD LICENSE
+ Copyright 2017 6WIND S.A.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of 6WIND S.A. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the primary has been
+removed from the system.
+
+.. note::
+
+ The library is enabled by default. You can enable it or disable it manually
+ by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option.
+
+Features
+--------
+
+The Fail-safe PMD only supports a limited set of features. If you plan to use a
+device underneath the Fail-safe PMD with a specific feature, this feature must
+be supported by the Fail-safe PMD to avoid throwing any error.
+
+Check the feature matrix for the complete set of supported features.
+
+Compilation options
+-------------------
+
+These options can be modified in the ``$RTE_TARGET/build/.config`` file.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**)
+
+ Toggle compiling librte_pmd_failsafe itself.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG`` (default **n**)
+
+ Toggle debugging code.
+
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
+
+Fail-safe command line parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **dev(<iface>)** parameter
+
+ This parameter allows the user to define a sub-device. The ``<iface>`` part of
+ this parameter must be a valid device definition. It could be the argument
+ provided to a ``-w`` PCI device specification or the argument that would be
+ given to a ``--vdev`` parameter (including a fail-safe).
+ Enclosing the device definition within parenthesis here allows using
+ additional sub-device parameters if need be. They will be passed on to the
+ sub-device.
+
+- **mac** parameter [MAC address]
+
+ This parameter allows the user to set a default MAC address to the fail-safe
+ and all of its sub-devices.
+ If no default mac address is provided, the fail-safe PMD will read the MAC
+ address of the first of its sub-device to be successfully probed and use it as
+ its default MAC address, trying to set it to all of its other sub-devices.
+ If no sub-device was successfully probed at initialization, then a random MAC
+ address is generated, that will be subsequently applied to all sub-device once
+ they are probed.
+
+Usage example
+~~~~~~~~~~~~~
+
+This section shows some example of using **testpmd** with a fail-safe PMD.
+
+#. Request huge pages:
+
+ .. code-block:: console
+
+ echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+#. Start testpmd
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
+ -i
+
+Using the Fail-safe PMD from an application
+-------------------------------------------
+
+This driver strives to be as seamless as possible to existing applications, in
+order to propose the hotplug functionality in the easiest way possible.
+
+Care must be taken, however, to respect the **ether** API concerning device
+access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
+over ethernet devices, instead of directly accessing them or by writing one's
+own device iterator.
diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
new file mode 100644
index 0000000..3c52823
--- /dev/null
+++ b/doc/guides/nics/features/failsafe.ini
@@ -0,0 +1,24 @@
+;
+; Supported features of the 'fail-safe' poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status = Y
+Queue start/stop = Y
+MTU update = Y
+Jumbo frame = Y
+Promiscuous mode = Y
+Allmulticast mode = Y
+Unicast MAC filter = Y
+Multicast MAC filter = Y
+VLAN filter = Y
+Packet type parsing = Y
+Basic stats = Y
+Stats per queue = Y
+ARMv7 = Y
+ARMv8 = Y
+Power8 = Y
+x86-32 = Y
+x86-64 = Y
+Usage doc = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 240d082..17eaaf4 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -64,6 +64,7 @@ Network Interface Controller Drivers
vhost
vmxnet3
pcap_ring
+ fail_safe

**Figures**

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 35ed813..d33c959 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -59,6 +59,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
DEPDIRS-ena = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
DEPDIRS-enic = $(core-libs) librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
+DEPDIRS-failsafe = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
DEPDIRS-fm10k = $(core-libs) librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
new file mode 100644
index 0000000..06199ad
--- /dev/null
+++ b/drivers/net/failsafe/Makefile
@@ -0,0 +1,72 @@
+# BSD LICENSE
+#
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+
+# No exported include files
+
+# This lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_mbuf
+
+ifneq ($(DEBUG),)
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG := y
+endif
+
+# Basic CFLAGS:
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -I.
+CFLAGS += -D_BSD_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic -DPEDANTIC
+
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG),y)
+CFLAGS += -g -UNDEBUG
+else
+CFLAGS += -O3
+CFLAGS += -DNDEBUG
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
new file mode 100644
index 0000000..7cf33e8
--- /dev/null
+++ b/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_vdev.h>
+
+#include "failsafe_private.h"
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_SPEED_AUTONEG,
+};
+
+static int
+fs_sub_device_create(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ params = rte_vdev_device_args(vdev);
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u",
+ socket_id);
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = dev->data->dev_private;
+ PRIV(dev)->dev = dev;
+ dev->dev_ops = &failsafe_ops;
+ TAILQ_INIT(&dev->link_intr_cbs);
+ dev->data->dev_flags = 0x0;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ goto free_dev;
+ }
+ ret = fs_sub_device_create(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto free_args;
+ mac = &dev->data->mac_addrs[0];
+ if (mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto free_args;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ return 0;
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ rte_free(PRIV(dev));
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (vdev == NULL)
+ return -EINVAL;
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (name == NULL)
+ return -EINVAL;
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_ALIAS(net_failsafe, eth_failsafe);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 0000000..f07d26e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <errno.h>
+
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "failsafe_private.h"
+
+#define DEVARGS_MAXLEN 4096
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+int mac_from_arg;
+
+const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_eal_devargs_parse(args, d);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0')
+ i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
+ if (params[b] == '(') {
+ size_t start = b;
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ snprintf(params, DEVARGS_MAXLEN, "%s", buffer);
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ if (dev == NULL || params == NULL)
+ return -EINVAL;
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ mac_from_arg = 0;
+ n = snprintf(mut_params, sizeof(mut_params), "%s", params);
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+ mac_from_arg = 1;
+ }
+ }
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) &&
+ strncmp(param, "exec", b)) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 0000000..087d4f3
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,160 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev *
+fs_eth_dev_from_name(const char *name)
+{
+ struct rte_eth_dev *dev;
+ uint8_t i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ dev = &rte_eth_devices[i];
+ if (!strcmp(dev->device->name, name))
+ return dev;
+ }
+ return NULL;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_device device;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ device.name = sdev->devargs.name;
+ device.devargs = &sdev->devargs;
+ if (!sdev->bus->attach) {
+ ERROR("Bus %s used for sub_device %d does not support hotplug, skipping",
+ sdev->bus->name, i);
+ return -EINVAL;
+ }
+ ret = sdev->bus->attach(&device);
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ errno ? "(" : "",
+ errno ? strerror(errno) : "",
+ errno ? ")" : "");
+ continue;
+ }
+ ETH(sdev) = fs_eth_dev_from_name(device.name);
+ if (ETH(sdev) == NULL) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ sdev->dev = ETH(sdev)->device;
+ ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ /*
+ * We only update TX_SUBDEV if we are not started.
+ * If a sub_device is emitting, we will switch the TX_SUBDEV to the
+ * preferred port only upon starting it, so that the switch is smoother.
+ */
+ if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
+ TX_SUBDEV(dev) == NULL) {
+ /* Using first probed device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ if (!sdev->bus->detach) {
+ ERROR("Bus does not support device removal for sub_device %u (%s)",
+ i, sdev->devargs.name);
+ continue;
+ }
+ ret = sdev->bus->detach(sdev->dev);
+ if (ret) {
+ ERROR("Failed to remove requested device %s",
+ sdev->devargs.name);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 0000000..693162e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,663 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ .driver_name = pmd_failsafe_driver_name,
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /* Set of understood capabilities */
+ .rx_offload_capa = 0x0,
+ .tx_offload_capa = 0x0,
+ .flow_type_rss_offloads = 0x0,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ ERROR("Could not configure sub_device %d", i);
+ return ret;
+ }
+ sdev->state = DEV_ACTIVE;
+ }
+ return 0;
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret)
+ return ret;
+ sdev->state = DEV_STARTED;
+ }
+ if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
+ TX_SUBDEV(dev) == NULL) {
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ DEBUG("Switching tx_dev to sub_device %d", i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_STARTED - 1;
+ }
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = rxq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL)
+ return -ENOMEM;
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if (ret) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ return ret;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = txq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL)
+ return -ENOMEM;
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if (ret) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ if (TX_SUBDEV(dev) == NULL)
+ return;
+ rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_stats_reset(PORT_ID(sdev));
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint32_t rx_offload_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads &=
+ default_infos.flow_type_rss_offloads;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return NULL;
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
+ return NULL;
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return 0;
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL)
+ return -ENOTSUP;
+ return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ assert(index < FAILSAFE_MAX_ETHADDR);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ return 0;
+}
+
+static void
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 0000000..e7a7592
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <rte_dev.h>
+#include <rte_ethdev.h>
+#include <rte_devargs.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "mac=mac_addr" \
+ ""
+
+#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+/* TYPES */
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* id of last sub_device polled */
+ uint8_t last_polled;
+ unsigned int socket_id;
+ struct rte_eth_rxq_info info;
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+};
+
+enum dev_state {
+ DEV_UNDEFINED = 0,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct rte_devargs devargs;
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_eth_dev *edev;
+ /* Device state machine */
+ enum dev_state state;
+};
+
+struct fs_priv {
+ struct rte_eth_dev *dev;
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs;
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+};
+
+/* RX / TX */
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern int mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->edev)
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ (ETH(sdev)->data->port_id)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_ST(s, i, dev, state) \
+ for (i = fs_find_next((dev), 0, state); \
+ i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]); \
+ i = fs_find_next((dev), i + 1, state))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_ST(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+#ifndef NDEBUG
+#include <stdio.h>
+#define DEBUG__(m, ...) \
+ (fprintf(stderr, "%s:%d: %s(): " m "%c", \
+ __FILE__, __LINE__, __func__, __VA_ARGS__), \
+ (void)0)
+#define DEBUG_(...) \
+ (errno = ((int []){ \
+ *(volatile int *)&errno, \
+ (DEBUG__(__VA_ARGS__), 0) \
+ })[0])
+#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define DEBUG(...) ((void)0)
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, "WARNING: " __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, "ERROR: " __VA_ARGS__)
+#endif
+
+/* inlined functions */
+
+static inline uint8_t
+fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
+ enum dev_state min_state)
+{
+ while (sid < PRIV(dev)->subs_tail) {
+ if (PRIV(dev)->subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ if (sid >= PRIV(dev)->subs_tail)
+ return PRIV(dev)->subs_tail;
+ return sid;
+}
+
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 0000000..a45b4e5
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ if (unlikely(ETH(sdev) == NULL))
+ continue;
+ if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
+ continue;
+ if (unlikely(sdev->state != DEV_STARTED))
+ continue;
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ if (unlikely(sdev == NULL))
+ return 0;
+ if (unlikely(ETH(sdev) == NULL))
+ return 0;
+ if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bcaf1b3..e5743c4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,6 +120,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
--
2.1.4
Stephen Hemminger
2017-05-31 15:13:53 UTC
Permalink
On Mon, 29 May 2017 15:42:16 +0200
Post by Gaetan Rivet
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
What about the case of Hyper-V where the components of the Fail Safe PMD may
arrive later. An example would be a NFV server that starts on boot. The synthetic
device will be present at boot, but the associated VF device may be plugged
in later (by checking SR-IOV on host console) or removed (by unchecking).

There doesn't appear to be a way to manage slave devices that get added
and removed through CLI management model.
Post by Gaetan Rivet
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
Configuring fail-safe (or any other device) from command line is difficult in a real
world application. The EAL command line is difficult API to manipulate programmatically.
Why not have a real API?
Post by Gaetan Rivet
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
memcmp here is a potential problem since rte_eth_link maybe padded and have holes.
Why compare anyway? if *l1 == *l2 the assignment would be a nop.
What if links are down?
Post by Gaetan Rivet
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
memset here is unnecessary, already done by rte_eth_stats_get
Gaëtan Rivet
2017-06-01 14:01:37 UTC
Permalink
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:16 +0200
Post by Gaetan Rivet
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
What about the case of Hyper-V where the components of the Fail Safe PMD may
arrive later. An example would be a NFV server that starts on boot. The synthetic
device will be present at boot, but the associated VF device may be plugged
in later (by checking SR-IOV on host console) or removed (by unchecking).
There doesn't appear to be a way to manage slave devices that get added
and removed through CLI management model.
The VF and the synthetic path (SP) should both be declared as slaves to the
fail-safe. The SP is probed while the process fails for the VF.

The fail-safe then continues as usual, getting his infos (MAC address,
capabilities) from the SP. More on that later, as you have evocated the
subject in another thread.

The fail-safe detects that not all his slaves are probed and enables its
plugin poll, meaning that it will detect when the VF arrives.

As the VF appears later, there is no way to know which PCI address it
will be at. Thus the need for the "exec" slave declaration, which allows
complex logic for slave detection.

What is necessary is a common piece of info (it can be MAC address, a
class Id, anything else) that allows a script to detect that the right
device has been plugged in. As long as the NFV server allows determinism
here, the user will be able to use its VF.
Post by Stephen Hemminger
Post by Gaetan Rivet
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
Configuring fail-safe (or any other device) from command line is difficult in a real
world application. The EAL command line is difficult API to manipulate programmatically.
Why not have a real API?
The real API is proposed through the standard DPDK layers.
You can already create a virtual device on the fly with arbitrary
parameters. You can thus create a fail-safe device with several slaves.

The requirement to be able to do this however is that the bus of the
slave supports the plug / unplug API. This is the case for the virtual
and PCI buses.

You can try it on testpmd, using a command such as

testpmd> port attach net_failsafe0,dev(net_ring0),dev(net_ring1)

Should create a fail-safe instance with two slaves.

Finally, in a recent patchset, I introduced an rte_devargs parsing
helper that should ease the creation of devices in this way. It takes a
"name,devargs" string and builds an rte_devargs, that can be used in any
plug/unplug implementation worth its salt.

{
struct rte_devargs da;

rte_eal_devargs_parse("net_failsafe0,dev(net_ring0)", &da);
da.bus->plug(&da);
}

And you are set.
Post by Stephen Hemminger
Post by Gaetan Rivet
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
memcmp here is a potential problem since rte_eth_link maybe padded and have holes.
Why compare anyway? if *l1 == *l2 the assignment would be a nop.
What if links are down?
Post by Gaetan Rivet
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
memset here is unnecessary, already done by rte_eth_stats_get
--
Gaëtan Rivet
6WIND
Stephen Hemminger
2017-06-01 17:57:22 UTC
Permalink
On Thu, 1 Jun 2017 16:01:37 +0200
Post by Gaëtan Rivet
What is necessary is a common piece of info (it can be MAC address, a
class Id, anything else) that allows a script to detect that the right
device has been plugged in. As long as the NFV server allows determinism
here, the user will be able to use its VF.
There is a serial number property (GUID) on vmbus that is what Windows uses
to associate VF and SF device. It maybe visible over sysfs.

But it make senses to put that kind of system dependent matching rule in
some other place (DPDK bus logic? or external daemon).
Gaëtan Rivet
2017-06-04 23:04:38 UTC
Permalink
Post by Stephen Hemminger
On Thu, 1 Jun 2017 16:01:37 +0200
Post by Gaëtan Rivet
What is necessary is a common piece of info (it can be MAC address, a
class Id, anything else) that allows a script to detect that the right
device has been plugged in. As long as the NFV server allows determinism
here, the user will be able to use its VF.
There is a serial number property (GUID) on vmbus that is what Windows uses
to associate VF and SF device. It maybe visible over sysfs.
But it make senses to put that kind of system dependent matching rule in
some other place (DPDK bus logic? or external daemon).
I think it might make sense to have expanded matching feature in
rte_buses. For example, being able to match devices using metadatas specific
to a bus, while allowing wildcards-like behavior.

Such as expressing "Probe all devices having a vendor:device id pair
in such ranges", and having an "INTR_ADD" event added to mirror the
"INTR_RMV" already existing.

Other possible grammar elements for device matching could be thought up,
not limited to NFV servers. Once the buses have somewhat stabilized in
this release or the next.

But the exec stanza allows the fail-safe to have answers for this configuration
and possibly many others, at least while the buses are not capable
enough.
--
Gaëtan Rivet
6WIND
Gaetan Rivet
2017-05-29 13:42:17 UTC
Permalink
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 19 +++
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 71 ++++++++++
drivers/net/failsafe/failsafe_args.c | 32 +++++
drivers/net/failsafe/failsafe_eal.c | 30 +----
drivers/net/failsafe/failsafe_ether.c | 228 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 25 ++--
drivers/net/failsafe/failsafe_private.h | 60 ++++++++-
8 files changed, 423 insertions(+), 43 deletions(-)
create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..c04891a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
address is generated, that will be subsequently applied to all sub-device once
they are probed.

+- **hotplug_poll** parameter [UINT64] (default **2000**)
+
+ This parameter allows the user to configure the amount of time in milliseconds
+ between two slave upkeep round.
+
Usage example
~~~~~~~~~~~~~

@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API concerning device
access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
over ethernet devices, instead of directly accessing them or by writing one's
own device iterator.
+
+Plug-in feature
+---------------
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 06199ad..4567961 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -40,6 +40,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7cf33e8..888f07b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -80,6 +80,72 @@ fs_sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
}

+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ if (PRIV(dev)->pending_alarm) {
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ ret = failsafe_eth_dev_state_sync(dev);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
static int
fs_eth_dev_create(struct rte_vdev_device *vdev)
{
@@ -128,6 +194,11 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
ret = failsafe_eal_init(dev);
if (ret)
goto free_args;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto free_args;
+ }
mac = &dev->data->mac_addrs[0];
if (mac_from_arg) {
/*
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index f07d26e..8f334aa 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -45,9 +45,11 @@
typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
uint8_t head);

+uint64_t hotplug_poll;
int mac_from_arg;

const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
PMD_FAILSAFE_MAC_KVARG,
NULL,
};
@@ -221,6 +223,24 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
}

static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
fs_get_mac_addr_arg(const char *key __rte_unused,
const char *value, void *out)
{
@@ -252,6 +272,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
ret = 0;
priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
/* default parameters */
+ hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
mac_from_arg = 0;
n = snprintf(mut_params, sizeof(mut_params), "%s", params);
if (n >= sizeof(mut_params)) {
@@ -274,6 +295,16 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
PMD_FAILSAFE_PARAM_STRING);
return -1;
}
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
+ &fs_get_u64_arg, &hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
/* MAC addr */
arg_count = rte_kvargs_count(kvlist,
PMD_FAILSAFE_MAC_KVARG);
@@ -287,6 +318,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
mac_from_arg = 1;
}
}
+ PRIV(dev)->state = DEV_PARSED;
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 087d4f3..d7b12e2 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -90,37 +90,14 @@ fs_bus_init(struct rte_eth_dev *dev)
int
failsafe_eal_init(struct rte_eth_dev *dev)
{
- struct sub_device *sdev;
- uint8_t i;
int ret;

ret = fs_bus_init(dev);
if (ret)
return ret;
- /*
- * We only update TX_SUBDEV if we are not started.
- * If a sub_device is emitting, we will switch the TX_SUBDEV to the
- * preferred port only upon starting it, so that the switch is smoother.
- */
- if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
- TX_SUBDEV(dev) == NULL) {
- /* Using first probed device */
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
- DEBUG("Switching tx_dev to sub_device %d",
- i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev);
return 0;
}

@@ -156,5 +133,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
ret = fs_bus_uninit(dev);
if (ret)
return ret;
+ PRIV(dev)->state = DEV_PROBED - 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 0000000..7910952
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state == DEV_PROBED)
+ inactive |= UINT32_C(1) << i;
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ return ret;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ /* TODO: disable device */
+ return ret;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 693162e..4044473 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -89,6 +89,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
sdev->state = DEV_ACTIVE;
}
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
return 0;
}

@@ -108,21 +110,9 @@ fs_dev_start(struct rte_eth_dev *dev)
return ret;
sdev->state = DEV_STARTED;
}
- if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
- TX_SUBDEV(dev) == NULL) {
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
- DEBUG("Switching tx_dev to sub_device %d", i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_STARTED)
+ PRIV(dev)->state = DEV_STARTED;
+ fs_switch_dev(dev);
return 0;
}

@@ -132,6 +122,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ PRIV(dev)->state = DEV_STARTED - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_STARTED - 1;
@@ -183,6 +174,10 @@ fs_dev_close(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Closing sub_device %d", i);
rte_eth_dev_close(PORT_ID(sdev));
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index e7a7592..8fb72fe 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -41,12 +41,14 @@
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"

#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
- "mac=mac_addr" \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
""

-#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -105,8 +107,22 @@ struct fs_priv {
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ unsigned int pending_alarm:1; /* An alarm is pending */
};

+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
/* RX / TX */

uint16_t failsafe_rx_burst(void *rxq,
@@ -125,10 +141,15 @@ int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);

+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+
/* GLOBALS */

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern uint64_t hotplug_poll;
extern int mac_from_arg;

/* HELPERS */
@@ -224,4 +245,39 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+static inline void
+fs_switch_dev(struct rte_eth_dev *dev)
+{
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ if (PREFERRED_SUBDEV(dev)->state >= req_state) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (req_state == DEV_STARTED) ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
+ TX_SUBDEV(dev) == NULL) {
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ } else {
+ return;
+ }
+ rte_wmb();
+}
+
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
--
2.1.4
Stephen Hemminger
2017-05-31 15:15:26 UTC
Permalink
On Mon, 29 May 2017 15:42:17 +0200
Post by Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.
The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.
There are existing event models (udev and netlink) that could be used to
do plug-in support without polling. Polling relies on application doing
rte_alarms and many don't.
Gaëtan Rivet
2017-06-01 14:12:41 UTC
Permalink
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:17 +0200
Post by Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.
The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.
There are existing event models (udev and netlink) that could be used to
do plug-in support without polling. Polling relies on application doing
rte_alarms and many don't.
Indeed. This possibility arose during development.

The main issue with it however is that it introduces an asynchronous
design, which the DPDK and PMDs underneath are not well-suited to
interact with. It goes against the grain in a way.

The polling is simple. It can work with all models of device and is
independent of event models specific to any architecture.

It also allows to simplify the contexts in which probing and
removal are done. Currently there is only one, the interrupt thread.
This solves a few possible race conditions without having to resort to
critical sections.

The only dependency is on another DPDK subsystem, rte_alarm.
I used alarms here because rte_timers need regular rte_timer_manage()
calls and there is little way to guarantee the frequency of the calls.

rte_alarms do not force any externalities on applications, thus allowing a
seamless use of the fail-safe.
--
Gaëtan Rivet
6WIND
Stephen Hemminger
2017-06-01 18:00:10 UTC
Permalink
On Thu, 1 Jun 2017 16:12:41 +0200
Post by Gaëtan Rivet
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:17 +0200
Post by Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.
The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.
There are existing event models (udev and netlink) that could be used to
do plug-in support without polling. Polling relies on application doing
rte_alarms and many don't.
Indeed. This possibility arose during development.
The main issue with it however is that it introduces an asynchronous
design, which the DPDK and PMDs underneath are not well-suited to
interact with. It goes against the grain in a way.
The polling is simple. It can work with all models of device and is
independent of event models specific to any architecture.
It also allows to simplify the contexts in which probing and
removal are done. Currently there is only one, the interrupt thread.
This solves a few possible race conditions without having to resort to
critical sections.
The only dependency is on another DPDK subsystem, rte_alarm.
I used alarms here because rte_timers need regular rte_timer_manage()
calls and there is little way to guarantee the frequency of the calls.
rte_alarms do not force any externalities on applications, thus allowing a
seamless use of the fail-safe.
The issue with rte_alarm and also with LSC interrupt callbacks is that
they don't run on a normal DPDK EAL application thread. These callbacks
run on a DPDK internal pthread. I remember having to do some application
hacks like having the callback generate an internal event on a pipe.
Gaëtan Rivet
2017-06-04 23:09:19 UTC
Permalink
Post by Stephen Hemminger
On Thu, 1 Jun 2017 16:12:41 +0200
Post by Gaëtan Rivet
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:17 +0200
Post by Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.
The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.
There are existing event models (udev and netlink) that could be used to
do plug-in support without polling. Polling relies on application doing
rte_alarms and many don't.
Indeed. This possibility arose during development.
The main issue with it however is that it introduces an asynchronous
design, which the DPDK and PMDs underneath are not well-suited to
interact with. It goes against the grain in a way.
The polling is simple. It can work with all models of device and is
independent of event models specific to any architecture.
It also allows to simplify the contexts in which probing and
removal are done. Currently there is only one, the interrupt thread.
This solves a few possible race conditions without having to resort to
critical sections.
The only dependency is on another DPDK subsystem, rte_alarm.
I used alarms here because rte_timers need regular rte_timer_manage()
calls and there is little way to guarantee the frequency of the calls.
rte_alarms do not force any externalities on applications, thus allowing a
seamless use of the fail-safe.
The issue with rte_alarm and also with LSC interrupt callbacks is that
they don't run on a normal DPDK EAL application thread. These callbacks
run on a DPDK internal pthread. I remember having to do some application
hacks like having the callback generate an internal event on a pipe.
On the other hand, not all applications would make use of those hacks,
and adding those would impose architecture elements on users. While
convenient, this goes somewhat against the tool-box ethos of DPDK.

In the end, I had to leverage the existing tools. Interrupts in DPDK are
a known weak point, but they are at least working and not too heavy
conceptually on applications (clean threading model, no need for signal
masks, etc). Better implementation might crop up at some point, if those
hurdles are too much and shared by many.
--
Gaëtan Rivet
6WIND
Stephen Hemminger
2017-06-05 15:25:22 UTC
Permalink
On Mon, 5 Jun 2017 01:09:19 +0200
Post by Gaëtan Rivet
Post by Stephen Hemminger
On Thu, 1 Jun 2017 16:12:41 +0200
Post by Gaëtan Rivet
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:17 +0200
Post by Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.
The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.
There are existing event models (udev and netlink) that could be used to
do plug-in support without polling. Polling relies on application doing
rte_alarms and many don't.
Indeed. This possibility arose during development.
The main issue with it however is that it introduces an asynchronous
design, which the DPDK and PMDs underneath are not well-suited to
interact with. It goes against the grain in a way.
The polling is simple. It can work with all models of device and is
independent of event models specific to any architecture.
It also allows to simplify the contexts in which probing and
removal are done. Currently there is only one, the interrupt thread.
This solves a few possible race conditions without having to resort to
critical sections.
The only dependency is on another DPDK subsystem, rte_alarm.
I used alarms here because rte_timers need regular rte_timer_manage()
calls and there is little way to guarantee the frequency of the calls.
rte_alarms do not force any externalities on applications, thus allowing a
seamless use of the fail-safe.
The issue with rte_alarm and also with LSC interrupt callbacks is that
they don't run on a normal DPDK EAL application thread. These callbacks
run on a DPDK internal pthread. I remember having to do some application
hacks like having the callback generate an internal event on a pipe.
On the other hand, not all applications would make use of those hacks,
and adding those would impose architecture elements on users. While
convenient, this goes somewhat against the tool-box ethos of DPDK.
In the end, I had to leverage the existing tools. Interrupts in DPDK are
a known weak point, but they are at least working and not too heavy
conceptually on applications (clean threading model, no need for signal
masks, etc). Better implementation might crop up at some point, if those
hurdles are too much and shared by many.
The alarm solution is a good intermediate step. But eventually in the spirit
of the DPDK there should be option to have an event driven model. Maybe the event
library will help.

For me the litmus test is can the known open source heavy weight DPDK applications
like VPP work?
Gaetan Rivet
2017-05-29 13:42:18 UTC
Permalink
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 20 +++++++
drivers/net/failsafe/failsafe_args.c | 99 +++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ether.c | 7 +++
drivers/net/failsafe/failsafe_private.h | 4 ++
4 files changed, 130 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index c04891a..1b6e110 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,19 @@ Fail-safe command line parameters
additional sub-device parameters if need be. They will be passed on to the
sub-device.

+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
- **mac** parameter [MAC address]

This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +139,13 @@ This section shows some example of using **testpmd** with a fail-safe PMD.
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
-i

+#. Start testpmd using a flexible device definition
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
Using the Fail-safe PMD from an application
-------------------------------------------

diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 8f334aa..c723ca3 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -30,6 +30,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include <assert.h>
#include <string.h>
#include <errno.h>

@@ -96,6 +98,75 @@ fs_parse_device(struct sub_device *sdev, char *args)
return 0;
}

+static void
+fs_sanitize_cmdline(char *args)
+{
+ size_t len;
+
+ len = strnlen(args, DEVARGS_MAXLEN);
+ args[len - 1] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ sdev->cmdline = new_str;
+ snprintf(sdev->cmdline, len, "%s", cmdline);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ old_err = errno;
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = errno;
+ ERROR("popen: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ return -ENODEV;
+ }
+ fs_sanitize_cmdline(output);
+ ret = fs_parse_device(sdev, output);
+ if (ret) {
+ ERROR("Parsing device '%s' failed", output);
+ goto ret_pclose;
+ }
+ret_pclose:
+ ret = pclose(fp);
+ if (ret) {
+ ret = errno;
+ ERROR("pclose: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ return ret;
+}
+
static int
fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -130,6 +201,14 @@ fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
ret = fs_parse_device(sdev, args);
if (ret)
goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
} else {
ERROR("Unrecognized device type: %.*s", (int)b, param);
return -EINVAL;
@@ -331,6 +410,8 @@ failsafe_args_free(struct rte_eth_dev *dev)
uint8_t i;

FOREACH_SUBDEV(sdev, i, dev) {
+ rte_free(sdev->cmdline);
+ sdev->cmdline = NULL;
free(sdev->devargs.args);
sdev->devargs.args = NULL;
}
@@ -361,3 +442,21 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
return fs_parse_sub_devices(fs_count_device,
dev, params);
}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7910952..2a1535e 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -188,6 +188,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
int ret;
uint8_t i;

+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ return ret;
+
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8fb72fe..554d7a3 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -44,6 +44,7 @@
#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
+ "exec(<shell command>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
@@ -87,6 +88,8 @@ struct sub_device {
struct rte_eth_dev *edev;
/* Device state machine */
enum dev_state state;
+ /* Some device are defined as a command line */
+ char *cmdline;
};

struct fs_priv {
@@ -135,6 +138,7 @@ uint16_t failsafe_tx_burst(void *txq,
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);

/* EAL */
--
2.1.4
Stephen Hemminger
2017-05-31 15:19:36 UTC
Permalink
On Mon, 29 May 2017 15:42:18 +0200
Post by Gaetan Rivet
+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
Exec from a DPDK application seems like possible security hole since most DPDK applications
have to run as root.
Post by Gaetan Rivet
static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
Using rte_malloc for cmdline is way over optimizing. rte_malloc comes from huge page area
which is limited. The only reason to use it is if the memory needs to be shared by primary/slave.
Also rte_malloc has much less protection (memleak checkers, guards etc) compared to regular malloc.
Gaëtan Rivet
2017-06-01 14:24:21 UTC
Permalink
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:18 +0200
Post by Gaetan Rivet
+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
Exec from a DPDK application seems like possible security hole since most DPDK applications
have to run as root.
Users will run scripts or other programs that will launch fail-safe
instances. If a user launches a script over the fail-safe to configure
it or under it to detect devices, security seems at the same level?
Post by Stephen Hemminger
Post by Gaetan Rivet
static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
Using rte_malloc for cmdline is way over optimizing. rte_malloc comes from huge page area
which is limited. The only reason to use it is if the memory needs to be shared by primary/slave.
Also rte_malloc has much less protection (memleak checkers, guards etc) compared to regular malloc.
I agree, it should be changed.
--
Gaëtan Rivet
6WIND
Gaetan Rivet
2017-05-29 13:42:19 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode = Y
Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
+Flow API = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 4567961..a53bb75 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -41,6 +41,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 888f07b..6557255 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -177,6 +177,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index d7b12e2..1d9ddab 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -80,6 +80,7 @@ fs_bus_init(struct rte_eth_dev *dev)
ERROR("sub_device %d init went wrong", i);
return -ENODEV;
}
+ SUB_ID(sdev) = i;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2a1535e..2958207 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@

#include <unistd.h>

+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
#include "failsafe_private.h"

+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
uint32_t i;
int ret;

@@ -177,6 +217,36 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
} else {
DEBUG("VLAN filter already set");
}
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ &flow->fd->attr,
+ flow->fd->items,
+ flow->fd->actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 0000000..d8f59a1
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,216 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ size_t fdsz;
+
+ fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+ flow = rte_zmalloc(NULL,
+ sizeof(struct rte_flow) + fdsz,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+ if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
+ ERROR("Failed to copy flow description");
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free((*flow)->fd);
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (ret) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if (local_ret) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ rte_free(flow);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if (ret) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ return rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)], type, arg, error);
+ }
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+};
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4044473..4cb2e90 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
+#include <rte_flow.h>

#include "failsafe_private.h"

@@ -628,6 +629,33 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
}

+static int
+fs_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
+ ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
const struct eth_dev_ops failsafe_ops = {
.dev_configure = fs_dev_configure,
.dev_start = fs_dev_start,
@@ -655,4 +683,5 @@ const struct eth_dev_ops failsafe_ops = {
.mac_addr_remove = fs_mac_addr_remove,
.mac_addr_add = fs_mac_addr_add,
.mac_addr_set = fs_mac_addr_set,
+ .filter_ctrl = fs_filter_ctrl,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 554d7a3..f40ea2f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,8 @@
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_

+#include <sys/queue.h>
+
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -72,6 +74,14 @@ struct txq {
struct rte_eth_txq_info info;
};

+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_desc *fd;
+};
+
enum dev_state {
DEV_UNDEFINED = 0,
DEV_PARSED,
@@ -86,6 +96,7 @@ struct sub_device {
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
+ uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Some device are defined as a command line */
@@ -104,6 +115,8 @@ struct fs_priv {
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
@@ -153,6 +166,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;

@@ -170,6 +184,10 @@ extern int mac_from_arg;
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)

+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
--
2.1.4
Stephen Hemminger
2017-05-31 15:21:39 UTC
Permalink
On Mon, 29 May 2017 15:42:19 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c
How does this interact with typical case of VF and dumb virtual device?
The VF has flow API but dumb virtual device does not.

How does this work with late binding plugin? If VF arrives later is
the flow table reprogrammed to the VF?
Gaëtan Rivet
2017-06-01 14:28:13 UTC
Permalink
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:19 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c
How does this interact with typical case of VF and dumb virtual device?
The VF has flow API but dumb virtual device does not.
The fail-safe requires capabilities to be the same on all its slave. If
a capability must be supported on the VF, then is should be as well on
the synthetic path.

But the TAP PMD that can be used to capture traffic from a synthetic
path supports rte_flow in the same capacity as other NICs.
Post by Stephen Hemminger
How does this work with late binding plugin? If VF arrives later is
the flow table reprogrammed to the VF?
The fail-safe stores an internal representation of rte_flows. These are
replayed in the same order upon plugin, so the flow table is
reprogrammed in the same way to the VF.
--
Gaëtan Rivet
6WIND
Stephen Hemminger
2017-06-01 18:02:22 UTC
Permalink
On Thu, 1 Jun 2017 16:28:13 +0200
Post by Gaëtan Rivet
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:19 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c
How does this interact with typical case of VF and dumb virtual device?
The VF has flow API but dumb virtual device does not.
The fail-safe requires capabilities to be the same on all its slave. If
a capability must be supported on the VF, then is should be as well on
the synthetic path.
But the TAP PMD that can be used to capture traffic from a synthetic
path supports rte_flow in the same capacity as other NICs.
Post by Stephen Hemminger
How does this work with late binding plugin? If VF arrives later is
the flow table reprogrammed to the VF?
The fail-safe stores an internal representation of rte_flows. These are
replayed in the same order upon plugin, so the flow table is
reprogrammed in the same way to the VF.
The synthetic path can't do flow direction in netvsc (and via tap). Therefore this whole
flow direction part is uninteresting for the use case of Hyper-V/Azure.
Gaetan Rivet
2017-05-29 13:42:20 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload = Y
+L4 checksum offload = Y
+Inner L3 checksum = Y
+Inner L4 checksum = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4cb2e90..5fb0135 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,149 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
- /* Set of understood capabilities */
- .rx_offload_capa = 0x0,
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
};

+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ * 0: all requested capabilities are supported by the sub_device
+ * positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev_info infos = {0};
+ struct rte_eth_conf *cf;
+ uint32_t cap;
+
+ cf = &dev->data->dev_conf;
+ SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+ /* RX capabilities */
+ cap = infos.rx_offload_capa;
+ if (cf->rxmode.hw_vlan_strip &&
+ ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+ WARN("VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_VLAN_STRIP;
+ }
+ if (cf->rxmode.hw_ip_checksum &&
+ ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+ WARN("IP checksum offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM;
+ }
+ if (cf->rxmode.enable_lro &&
+ ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+ WARN("TCP LRO offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_TCP_LRO;
+ }
+ if (cf->rxmode.hw_vlan_extend &&
+ ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+ WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_QINQ_STRIP;
+ }
+ /* TX capabilities */
+ /* Nothing to do, no tx capa supported */
+ return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+ uint32_t ol_cap)
+{
+ switch (ol_cap) {
+ case DEV_RX_OFFLOAD_VLAN_STRIP:
+ INFO("Disabling VLAN stripping offload");
+ cf->rxmode.hw_vlan_strip = 0;
+ break;
+ case DEV_RX_OFFLOAD_IPV4_CKSUM:
+ case DEV_RX_OFFLOAD_UDP_CKSUM:
+ case DEV_RX_OFFLOAD_TCP_CKSUM:
+ case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+ INFO("Disabling IP checksum offload");
+ cf->rxmode.hw_ip_checksum = 0;
+ break;
+ case DEV_RX_OFFLOAD_TCP_LRO:
+ INFO("Disabling TCP LRO offload");
+ cf->rxmode.enable_lro = 0;
+ break;
+ case DEV_RX_OFFLOAD_QINQ_STRIP:
+ INFO("Disabling stacked VLAN stripping offload");
+ cf->rxmode.hw_vlan_extend = 0;
+ break;
+ default:
+ DEBUG("Unable to disable offload capability: %" PRIx32,
+ ol_cap);
+ return -1;
+ }
+ return 0;
+}
+
static int
fs_dev_configure(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
+ int capa_flag;
int ret;

FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state != DEV_PROBED)
continue;
+ DEBUG("Checking capabilities for sub_device %d", i);
+ while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
+ /*
+ * Refuse to change configuration if multiple devices
+ * are present and we already have configured at least
+ * some of them.
+ */
+ if (PRIV(dev)->state >= DEV_ACTIVE &&
+ PRIV(dev)->subs_tail > 1) {
+ ERROR("device already configured, cannot fix live configuration");
+ return -1;
+ }
+ ret = fs_port_disable_offload(&dev->data->dev_conf,
+ capa_flag);
+ if (ret) {
+ ERROR("Unable to disable offload capability");
+ return ret;
+ }
+ }
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
--
2.1.4
Stephen Hemminger
2017-05-31 15:23:09 UTC
Permalink
On Mon, 29 May 2017 15:42:20 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)
Once again what about case of dumb synthetic NIC combined with SR-IOV VF?
The VF has offloads the virtual NIC does not.

What about late plugin. how do you program the offloads of the later arriving
VF device.
Gaëtan Rivet
2017-06-01 14:38:12 UTC
Permalink
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:20 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)
Once again what about case of dumb synthetic NIC combined with SR-IOV VF?
The VF has offloads the virtual NIC does not.
The rules for capabilities are a little complicated.
In the case both VF and the virtual NIC are present at launch, then the
logical AND is done both their capabilities sets.
If one has additional capabilities that the user is requesting, and the
fail-safe recognize them (currently, all RX offloads, as TX offloads
were not yet expressed by flags), and this capability is not supported
by one slave, then this offload is disabled in the configuration.
Post by Stephen Hemminger
What about late plugin. how do you program the offloads of the later arriving
VF device.
If the VF is not present at launch, then the fail-safe reads only the
set of capabilities from the fallback device. It does not have to do any
AND-ing of the flags.

The consequence is that upon plugin of the VF, the latter has to respect
the current running configuration. Probing will actually fail if some
capability is not supported (depending on PMDs), and the running
configuration is not updated as it is considered "live".

There are only two solutions to this, either:

* Complicate a lot the fail-safe design and the rules applied in
the decision made on NIC configuration. The user then has bad
surprises upon seeing that his performance have been degraded
for arcane reasons.

* Emulate in software the offloads and try to advertize as many as
possible. This is done for example in the TAP PMD for some flags,
allowing those offloads to be used with hardware NICs.
The user then has a clear view of the available offloads by comparing
both sets of capabilities.
--
Gaëtan Rivet
6WIND
Olga Shern
2017-06-01 14:55:54 UTC
Permalink
L;w
-----Original Message-----
Sent: Thursday, June 01, 2017 5:38 PM
Subject: Re: [dpdk-dev] [PATCH v4 08/12] net/failsafe: support offload
capabilities
Post by Stephen Hemminger
On Mon, 29 May 2017 15:42:20 +0200
Post by Gaetan Rivet
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131
+++++++++++++++++++++++++++++++++-
Post by Stephen Hemminger
Post by Gaetan Rivet
2 files changed, 135 insertions(+), 2 deletions(-)
Once again what about case of dumb synthetic NIC combined with SR-IOV
VF?
Post by Stephen Hemminger
The VF has offloads the virtual NIC does not.
The rules for capabilities are a little complicated.
In the case both VF and the virtual NIC are present at launch, then the logical
AND is done both their capabilities sets.
If one has additional capabilities that the user is requesting, and the fail-safe
recognize them (currently, all RX offloads, as TX offloads were not yet
expressed by flags), and this capability is not supported by one slave, then
this offload is disabled in the configuration.
Post by Stephen Hemminger
What about late plugin. how do you program the offloads of the later
arriving VF device.
If the VF is not present at launch, then the fail-safe reads only the set of
capabilities from the fallback device. It does not have to do any AND-ing of
the flags.
The consequence is that upon plugin of the VF, the latter has to respect the
current running configuration. Probing will actually fail if some capability is not
supported (depending on PMDs), and the running configuration is not
updated as it is considered "live".
* Complicate a lot the fail-safe design and the rules applied in
the decision made on NIC configuration. The user then has bad
surprises upon seeing that his performance have been degraded
for arcane reasons.
* Emulate in software the offloads and try to advertize as many as
possible. This is done for example in the TAP PMD for some flags,
allowing those offloads to be used with hardware NICs.
The user then has a clear view of the available offloads by comparing
both sets of capabilities.
--
Gaëtan Rivet
6WIND
Gaetan Rivet
2017-05-29 13:42:21 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
drivers/net/failsafe/failsafe_private.h | 8 +++
drivers/net/failsafe/failsafe_rxtx.c | 124 ++++++++++++++++++++++++++------
2 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f40ea2f..25a4dac 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -141,11 +141,18 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);

/* RX / TX */

+void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
/* ARGS */

int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -299,6 +306,7 @@ fs_switch_dev(struct rte_eth_dev *dev)
} else {
return;
}
+ set_burst_fn(dev, 0);
rte_wmb();
}

diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..c15025f 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -31,16 +31,63 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <assert.h>
+
#include <rte_mbuf.h>
#include <rte_ethdev.h>

#include "failsafe_private.h"

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+fs_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+static inline int
+fs_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
uint16_t
failsafe_rx_burst(void *queue,
struct rte_mbuf **rx_pkts,
@@ -63,11 +110,7 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
- if (unlikely(ETH(sdev) == NULL))
- continue;
- if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
- continue;
- if (unlikely(sdev->state != DEV_STARTED))
+ if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
nb_rx = ETH(sdev)->
@@ -80,11 +123,40 @@ failsafe_rx_burst(void *queue,
return 0;
}

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ assert(!fs_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
uint16_t
failsafe_tx_burst(void *queue,
struct rte_mbuf **tx_pkts,
@@ -96,12 +168,24 @@ failsafe_tx_burst(void *queue,

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
- if (unlikely(sdev == NULL))
- return 0;
- if (unlikely(ETH(sdev) == NULL))
- return 0;
- if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ assert(!fs_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:22 UTC
Permalink
Listen to INTR_RMV events issued by slaves.
Add atomic flags on slave queues to detect use of slave bursts function.
If a removal is detected, set the recollection flag on this slave.

During a slave upkeep round, if its recollection flag is set and its
burst functions are not in use by any thread, remove that slave.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 14 +++++
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_args.c | 13 +++++
drivers/net/failsafe/failsafe_eal.c | 3 +-
drivers/net/failsafe/failsafe_ether.c | 93 ++++++++++++++++++++++++++++++---
drivers/net/failsafe/failsafe_ops.c | 38 ++++++++++++--
drivers/net/failsafe/failsafe_private.h | 73 +++++++++++++++++++++++---
drivers/net/failsafe/failsafe_rxtx.c | 17 +++++-
8 files changed, 232 insertions(+), 20 deletions(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 1b6e110..4154f0a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.

+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
Check the feature matrix for the complete set of supported features.

Compilation options
@@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to
apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+----------------
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6557255..4d35860 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg)
if (!PRIV(dev)->pending_alarm)
return;
PRIV(dev)->pending_alarm = 0;
+ failsafe_dev_remove(dev);
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
break;
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index c723ca3..dd55aaf 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}

+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else
+ ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 1d9ddab..4e5d70e 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -81,6 +81,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
+ sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@@ -98,7 +99,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2958207..bb6fcff 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -250,6 +250,64 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}

+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ if (sdev->bus->detach == NULL ||
+ sdev->bus->detach(sdev->dev)) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ /* the end */
+ break;
+ }
+ failsafe_hotplug_alarm_install(sdev->fs_dev);
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->remove && fs_rxtx_clean(sdev))
+ fs_dev_remove(sdev);
+}
+
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@@ -263,13 +321,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)

ret = failsafe_args_parse_subs(dev);
if (ret)
- return ret;
+ goto err_remove;

if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
- return ret;
+ goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@@ -278,15 +336,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
- return ret;
+ goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
- /* TODO: disable device */
- return ret;
+ goto err_remove;
}
}
}
@@ -300,6 +357,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
- return ret;
+ goto err_remove;
return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+void
+failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct sub_device *sdev = cb_arg;
+
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(sdev->fs_dev, sdev);
+ /* Use safe bursts in any case. */
+ set_burst_fn(sdev->fs_dev, 1);
+ usleep(FAILSAFE_PLUGOUT_ASYNC_RESCHED_US);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 5fb0135..2e1c798 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -33,6 +33,8 @@

#include <assert.h>
#include <stdint.h>
+
+#include <rte_atomic.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_flow.h>
@@ -204,9 +206,21 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
}
FOREACH_SUBDEV(sdev, i, dev) {
+ int rmv_interrupt = 0;
+
if (sdev->state != DEV_PROBED)
continue;
+
+ rmv_interrupt = ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_RMV;
+ if (rmv_interrupt) {
+ DEBUG("Enabling RMV interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.rmv = 1;
+ } else {
+ DEBUG("sub_device %d does not support RMV event", i);
+ }
DEBUG("Configuring sub-device %d", i);
+ sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
@@ -215,6 +229,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
ERROR("Could not configure sub_device %d", i);
return ret;
}
+ if (rmv_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to register RMV callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.rmv = 0;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
@@ -240,7 +264,7 @@ fs_dev_start(struct rte_eth_dev *dev)
}
if (PRIV(dev)->state < DEV_STARTED)
PRIV(dev)->state = DEV_STARTED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

@@ -351,10 +375,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
fs_rx_queue_release(rxq);
dev->data->rx_queues[rx_queue_id] = NULL;
}
- rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ rxq = rte_zmalloc(NULL,
+ sizeof(*rxq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&rxq->refcnt[i]);
rxq->qid = rx_queue_id;
rxq->socket_id = socket_id;
rxq->info.mp = mb_pool;
@@ -414,10 +442,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
fs_tx_queue_release(txq);
dev->data->tx_queues[tx_queue_id] = NULL;
}
- txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ txq = rte_zmalloc("ethdev TX queue",
+ sizeof(*txq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&txq->refcnt[i]);
txq->qid = tx_queue_id;
txq->socket_id = socket_id;
txq->info.conf = *tx_conf;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 25a4dac..ca20109 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -36,6 +36,7 @@

#include <sys/queue.h>

+#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -52,6 +53,7 @@
""

#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_PLUGOUT_ASYNC_RESCHED_US 100

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -65,6 +67,7 @@ struct rxq {
uint8_t last_polled;
unsigned int socket_id;
struct rte_eth_rxq_info info;
+ rte_atomic64_t refcnt[];
};

struct txq {
@@ -72,6 +75,7 @@ struct txq {
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
+ rte_atomic64_t refcnt[];
};

struct rte_flow {
@@ -101,6 +105,10 @@ struct sub_device {
enum dev_state state;
/* Some device are defined as a command line */
char *cmdline;
+ /* fail-safe device backreference */
+ struct rte_eth_dev *fs_dev;
+ /* flag calling for recollection */
+ volatile unsigned int remove:1;
};

struct fs_priv {
@@ -168,6 +176,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */

int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_dev_remove(struct rte_eth_dev *dev);
+void failsafe_eth_rmv_event_callback(uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *arg);

/* GLOBALS */

@@ -233,6 +245,39 @@ extern int mac_from_arg;
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)

+/**
+ * Atomic guard
+ */
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_P(a) \
+ rte_atomic64_add(&(a), 1)
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_V(a) \
+ rte_atomic64_sub(&(a), 1)
+
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_RX(s, i) \
+ rte_atomic64_read( \
+ &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
+ )
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_TX(s, i) \
+ rte_atomic64_read( \
+ &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
+ )
+
#ifndef NDEBUG
#include <stdio.h>
#define DEBUG__(m, ...) \
@@ -274,33 +319,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+/*
+ * Switch emitting device.
+ * If banned is set, banned must not be considered for
+ * the role of emitting device.
+ */
static inline void
-fs_switch_dev(struct rte_eth_dev *dev)
+fs_switch_dev(struct rte_eth_dev *dev,
+ struct sub_device *banned)
{
+ struct sub_device *txd;
enum dev_state req_state;

req_state = PRIV(dev)->state;
- if (PREFERRED_SUBDEV(dev)->state >= req_state) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
+ txd = TX_SUBDEV(dev);
+ if (PREFERRED_SUBDEV(dev)->state >= req_state &&
+ PREFERRED_SUBDEV(dev) != banned) {
+ if (txd != PREFERRED_SUBDEV(dev) &&
+ (txd == NULL ||
(req_state == DEV_STARTED) ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ (txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
- } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
- TX_SUBDEV(dev) == NULL) {
+ } else if ((txd && txd->state < req_state) ||
+ txd == NULL ||
+ txd == banned) {
struct sub_device *sdev;
uint8_t i;

/* Using acceptable device */
FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ if (sdev == banned)
+ continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
- } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ } else if (txd && txd->state < req_state) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
} else {
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index c15025f..82a8c4e 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -33,6 +33,7 @@

#include <assert.h>

+#include <rte_atomic.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>

@@ -113,8 +114,10 @@ failsafe_rx_burst(void *queue,
if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -147,8 +150,10 @@ failsafe_rx_burst_fast(void *queue,
sdev = &priv->subs[i];
assert(!fs_rx_unsafe(sdev));
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -165,13 +170,17 @@ failsafe_tx_burst(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}

uint16_t
@@ -182,10 +191,14 @@ failsafe_tx_burst_fast(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
assert(!fs_tx_unsafe(sdev));
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:23 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 15 +++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 23 +++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 3 +++
5 files changed, 43 insertions(+)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 257f579..251ce55 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -5,6 +5,7 @@
;
[Features]
Link status = Y
+Link status event = Y
Queue start/stop = Y
MTU update = Y
Jumbo frame = Y
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 4d35860..151f823 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -240,6 +240,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
mac->addr_bytes[0], mac->addr_bytes[1],
mac->addr_bytes[2], mac->addr_bytes[3],
mac->addr_bytes[4], mac->addr_bytes[5]);
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
free_args:
failsafe_args_free(dev);
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index bb6fcff..3401a18 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -384,3 +384,18 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
*/
sdev->remove = 1;
}
+
+void
+failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ int ret;
+
+ ret = dev->dev_ops->link_update(dev, 0);
+ /* We must pass on the LSC event */
+ if (ret)
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 2e1c798..05221bc 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -207,6 +207,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
FOREACH_SUBDEV(sdev, i, dev) {
int rmv_interrupt = 0;
+ int lsc_interrupt = 0;
+ int lsc_enabled;

if (sdev->state != DEV_PROBED)
continue;
@@ -219,6 +221,17 @@ fs_dev_configure(struct rte_eth_dev *dev)
} else {
DEBUG("sub_device %d does not support RMV event", i);
}
+ lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
+ lsc_interrupt = lsc_enabled &&
+ (ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_LSC);
+ if (lsc_interrupt) {
+ DEBUG("Enabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 1;
+ } else if (lsc_enabled && !lsc_interrupt) {
+ DEBUG("Disabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ }
DEBUG("Configuring sub-device %d", i);
sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
@@ -239,6 +252,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
SUB_ID(sdev));
}
dev->data->dev_conf.intr_conf.rmv = 0;
+ if (lsc_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_LSC,
+ failsafe_eth_lsc_event_callback,
+ dev);
+ if (ret)
+ WARN("Failed to register LSC callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index ca20109..66303cd 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -180,6 +180,9 @@ void failsafe_dev_remove(struct rte_eth_dev *dev);
void failsafe_eth_rmv_event_callback(uint8_t port_id,
enum rte_eth_event_type type,
void *arg);
+void failsafe_eth_lsc_event_callback(uint8_t port_id,
+ enum rte_eth_event_type event,
+ void *cb_arg);

/* GLOBALS */
--
2.1.4
Gaetan Rivet
2017-05-29 13:42:24 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
drivers/net/failsafe/failsafe_ether.c | 34 +++++++++++++++++++++++++++++++--
drivers/net/failsafe/failsafe_flow.c | 29 ++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 4 ++++
3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 3401a18..b7ec262 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -74,6 +74,28 @@ fs_flow_complain(struct rte_flow_error *error)
}

static int
+eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_flow_error ferror;
+ int ret;
+
+ if (!PRIV(dev)->flow_isolated) {
+ DEBUG("Flow isolation already disabled");
+ } else {
+ DEBUG("Enabling flow isolation");
+ ret = rte_flow_isolate(PORT_ID(sdev),
+ PRIV(dev)->flow_isolated,
+ &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
{
@@ -331,9 +353,17 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
- FOREACH_SUBDEV(sdev, i, dev)
- if (sdev->state == DEV_PROBED)
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state == DEV_PROBED) {
inactive |= UINT32_C(1) << i;
+ ret = eth_dev_flow_isolate_set(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ goto err_remove;
+ }
+ }
+ }
ret = dev->dev_ops->dev_configure(dev);
if (ret)
goto err_remove;
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index d8f59a1..a5598ae 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -207,10 +207,39 @@ fs_flow_query(struct rte_eth_dev *dev,
return -1;
}

+static int
+fs_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state < DEV_PROBED)
+ continue;
+ DEBUG("Calling rte_flow_isolate on sub_device %d", i);
+ if (PRIV(dev)->flow_isolated != sdev->flow_isolated)
+ WARN("flow isolation mode of sub_device %d in incoherent state.",
+ i);
+ ret = rte_flow_isolate(PORT_ID(sdev), set, error);
+ if (ret) {
+ ERROR("Operation rte_flow_isolate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ sdev->flow_isolated = set;
+ }
+ PRIV(dev)->flow_isolated = set;
+ return 0;
+}
+
const struct rte_flow_ops fs_flow_ops = {
.validate = fs_flow_validate,
.create = fs_flow_create,
.destroy = fs_flow_destroy,
.flush = fs_flow_flush,
.query = fs_flow_query,
+ .isolate = fs_flow_isolate,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 66303cd..a3885e8 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -109,6 +109,8 @@ struct sub_device {
struct rte_eth_dev *fs_dev;
/* flag calling for recollection */
volatile unsigned int remove:1;
+ /* flow isolation state */
+ int flow_isolated:1;
};

struct fs_priv {
@@ -140,6 +142,8 @@ struct fs_priv {
*/
enum dev_state state;
unsigned int pending_alarm:1; /* An alarm is pending */
+ /* flow isolation state */
+ int flow_isolated:1;
};

/* MISC */
--
2.1.4
Ferruh Yigit
2017-05-29 14:03:40 UTC
Permalink
Post by Gaetan Rivet
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.
The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.
Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).
Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.
Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
- Wrote documentation
- Fixed commit logs, signed-off-by
- Added LSC event support
- A few minor fixes
- Numerous bug fixes.
- Complete sub-EAL rework to follow new bus API.
- burst protection on sub removal.
- more flexible sub definition.
- flow isolated mode support.
- Split back commits
net/failsafe: add fast burst functions
net/failsafe: support device removal
That were squashed by error during a rebase
- Fix segfault on port plugin
- Fix isolate mode support for MLX4 ports plugin
ethdev: save VLAN filter setting
ethdev: add deferred intermediate device state
ethdev: count devices consistently
net/failsafe: add fail-safe PMD
net/failsafe: add plug-in support
net/failsafe: add flexible device definition
net/failsafe: support flow API
net/failsafe: support offload capabilities
net/failsafe: add fast burst functions
net/failsafe: support device removal
net/failsafe: support link status change event
net/failsafe: support flow API isolation mode
Bruce, Neil,

Any comment on to the latest version of the PMD?

Thomas,

Should this discussed in the tech-board, again for this release?

Thanks,
ferruh
Stephen Hemminger
2017-05-31 15:24:56 UTC
Permalink
On Mon, 29 May 2017 15:03:40 +0100
Post by Ferruh Yigit
Should this discussed in the tech-board, again for this release?
The primary discussion area should be the mailing list not the tech board.
Comments in public please.
Stephen Hemminger
2017-06-07 17:30:11 UTC
Permalink
On Mon, 29 May 2017 15:42:12 +0200
Post by Gaetan Rivet
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.
The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.
Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).
Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.
Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
- Wrote documentation
- Fixed commit logs, signed-off-by
- Added LSC event support
- A few minor fixes
- Numerous bug fixes.
- Complete sub-EAL rework to follow new bus API.
- burst protection on sub removal.
- more flexible sub definition.
- flow isolated mode support.
- Split back commits
net/failsafe: add fast burst functions
net/failsafe: support device removal
That were squashed by error during a rebase
- Fix segfault on port plugin
- Fix isolate mode support for MLX4 ports plugin
ethdev: save VLAN filter setting
ethdev: add deferred intermediate device state
ethdev: count devices consistently
net/failsafe: add fail-safe PMD
net/failsafe: add plug-in support
net/failsafe: add flexible device definition
net/failsafe: support flow API
net/failsafe: support offload capabilities
net/failsafe: add fast burst functions
net/failsafe: support device removal
net/failsafe: support link status change event
net/failsafe: support flow API isolation mode
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 186 +++++++
doc/guides/nics/features/failsafe.ini | 32 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 74 +++
drivers/net/failsafe/failsafe.c | 305 +++++++++++
drivers/net/failsafe/failsafe_args.c | 475 +++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 140 +++++
drivers/net/failsafe/failsafe_ether.c | 431 ++++++++++++++++
drivers/net/failsafe/failsafe_flow.c | 245 +++++++++
drivers/net/failsafe/failsafe_ops.c | 869 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 377 ++++++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 204 ++++++++
lib/librte_ether/rte_ethdev.c | 38 +-
lib/librte_ether/rte_ethdev.h | 24 +-
mk/rte.app.mk | 1 +
18 files changed, 3399 insertions(+), 16 deletions(-)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ether.c
create mode 100644 drivers/net/failsafe/failsafe_flow.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
Discussed in tech-board meeting and accepted for inclusion in 17.08 release.
Longer term this driver would benefit from better hot plug support

Reviewed-by: Stephen Hemminger <***@networkplumber.org>
Gaetan Rivet
2017-06-07 23:59:50 UTC
Permalink
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.

The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.

Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).

Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.

Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.

This patchset depends on:

eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/

ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/

ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/

v1 --> v2:

- Wrote documentation
- Fixed commit logs, signed-off-by
- Added LSC event support
- A few minor fixes

v2 --> v3:

- Numerous bug fixes.
- Complete sub-EAL rework to follow new bus API.
- burst protection on sub removal.
- more flexible sub definition.
- flow isolated mode support.

v3 --> v4:

- Split back commits
net/failsafe: add fast burst functions
net/failsafe: support device removal
That were squashed by error during a rebase
- Fix segfault on port plugin
- Fix isolate mode support for MLX4 ports plugin

v4 --> v5:

- Follow new plug / unplug API.

Gaetan Rivet (12):
ethdev: save VLAN filter setting
ethdev: add deferred intermediate device state
ethdev: count devices consistently
net/failsafe: add fail-safe PMD
net/failsafe: add plug-in support
net/failsafe: add flexible device definition
net/failsafe: support flow API
net/failsafe: support offload capabilities
net/failsafe: add fast burst functions
net/failsafe: support device removal
net/failsafe: support link status change event
net/failsafe: support flow API isolation mode

MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 186 +++++++
doc/guides/nics/features/failsafe.ini | 32 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 74 +++
drivers/net/failsafe/failsafe.c | 305 +++++++++++
drivers/net/failsafe/failsafe_args.c | 475 +++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 143 ++++++
drivers/net/failsafe/failsafe_ether.c | 430 ++++++++++++++++
drivers/net/failsafe/failsafe_flow.c | 245 +++++++++
drivers/net/failsafe/failsafe_ops.c | 869 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 376 ++++++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 204 ++++++++
lib/librte_ether/rte_ethdev.c | 38 +-
lib/librte_ether/rte_ethdev.h | 24 +-
mk/rte.app.mk | 1 +
18 files changed, 3400 insertions(+), 16 deletions(-)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ether.c
create mode 100644 drivers/net/failsafe/failsafe_flow.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:51 UTC
Permalink
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 19 ++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 10 ++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 2446ed9..1922adf 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1963,6 +1963,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1978,7 +1979,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);

- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}

int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b9fd8d3..6584842 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -374,6 +374,14 @@ enum rte_vlan_type {
};

/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1675,6 +1683,8 @@ struct rte_eth_dev_data {
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
const char *drv_name; /**< Driver name */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};

/** Device supports hotplug detach */
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:52 UTC
Permalink
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 3 ++-
lib/librte_ether/rte_ethdev.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 1922adf..2729fc4 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -287,7 +287,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 6584842..c09f88e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1589,6 +1589,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};

/**
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:53 UTC
Permalink
Make the rte_eth_dev_count() return the number of available devices even
after some are detached by the hotplug API or put in a deferred state.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 16 +++++++++-------
lib/librte_ether/rte_ethdev.h | 13 ++++++-------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 2729fc4..6977458 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -71,7 +71,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;

/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -206,7 +205,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev_last_created_port = port_id;
- nb_ports++;

return eth_dev;
}
@@ -279,7 +277,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;

eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}

@@ -304,7 +301,15 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}

int
@@ -336,9 +341,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}

- if (!nb_ports)
- return -ENODEV;
-
*port_id = RTE_MAX_ETHPORTS;
RTE_ETH_FOREACH_DEV(i) {
if (!strncmp(name,
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index c09f88e..6786111 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1725,13 +1725,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);

/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:54 UTC
Permalink
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefit from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 133 +++++++
doc/guides/nics/features/failsafe.ini | 24 ++
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 72 ++++
drivers/net/failsafe/failsafe.c | 231 +++++++++++
drivers/net/failsafe/failsafe_args.c | 331 ++++++++++++++++
drivers/net/failsafe/failsafe_eal.c | 163 ++++++++
drivers/net/failsafe/failsafe_ops.c | 663 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 227 +++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 107 ++++++
mk/rte.app.mk | 1 +
14 files changed, 1966 insertions(+)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c8c57cb..6d23022 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -333,6 +333,11 @@ F: drivers/net/enic/
F: doc/guides/nics/enic.rst
F: doc/guides/nics/features/enic.ini

+Fail-safe PMD
+M: Gaetan Rivet <***@6wind.com>
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
Intel e1000
M: Wenzhuo Lu <***@intel.com>
F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 1813f39..d53e489 100644
--- a/config/common_base
+++ b/config/common_base
@@ -433,6 +433,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_NULL=y

#
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 0000000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+.. BSD LICENSE
+ Copyright 2017 6WIND S.A.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of 6WIND S.A. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the primary has been
+removed from the system.
+
+.. note::
+
+ The library is enabled by default. You can enable it or disable it manually
+ by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option.
+
+Features
+--------
+
+The Fail-safe PMD only supports a limited set of features. If you plan to use a
+device underneath the Fail-safe PMD with a specific feature, this feature must
+be supported by the Fail-safe PMD to avoid throwing any error.
+
+Check the feature matrix for the complete set of supported features.
+
+Compilation options
+-------------------
+
+These options can be modified in the ``$RTE_TARGET/build/.config`` file.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**)
+
+ Toggle compiling librte_pmd_failsafe itself.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG`` (default **n**)
+
+ Toggle debugging code.
+
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
+
+Fail-safe command line parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **dev(<iface>)** parameter
+
+ This parameter allows the user to define a sub-device. The ``<iface>`` part of
+ this parameter must be a valid device definition. It could be the argument
+ provided to a ``-w`` PCI device specification or the argument that would be
+ given to a ``--vdev`` parameter (including a fail-safe).
+ Enclosing the device definition within parenthesis here allows using
+ additional sub-device parameters if need be. They will be passed on to the
+ sub-device.
+
+- **mac** parameter [MAC address]
+
+ This parameter allows the user to set a default MAC address to the fail-safe
+ and all of its sub-devices.
+ If no default mac address is provided, the fail-safe PMD will read the MAC
+ address of the first of its sub-device to be successfully probed and use it as
+ its default MAC address, trying to set it to all of its other sub-devices.
+ If no sub-device was successfully probed at initialization, then a random MAC
+ address is generated, that will be subsequently applied to all sub-device once
+ they are probed.
+
+Usage example
+~~~~~~~~~~~~~
+
+This section shows some example of using **testpmd** with a fail-safe PMD.
+
+#. Request huge pages:
+
+ .. code-block:: console
+
+ echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+#. Start testpmd
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
+ -i
+
+Using the Fail-safe PMD from an application
+-------------------------------------------
+
+This driver strives to be as seamless as possible to existing applications, in
+order to propose the hotplug functionality in the easiest way possible.
+
+Care must be taken, however, to respect the **ether** API concerning device
+access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
+over ethernet devices, instead of directly accessing them or by writing one's
+own device iterator.
diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
new file mode 100644
index 0000000..3c52823
--- /dev/null
+++ b/doc/guides/nics/features/failsafe.ini
@@ -0,0 +1,24 @@
+;
+; Supported features of the 'fail-safe' poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status = Y
+Queue start/stop = Y
+MTU update = Y
+Jumbo frame = Y
+Promiscuous mode = Y
+Allmulticast mode = Y
+Unicast MAC filter = Y
+Multicast MAC filter = Y
+VLAN filter = Y
+Packet type parsing = Y
+Basic stats = Y
+Stats per queue = Y
+ARMv7 = Y
+ARMv8 = Y
+Power8 = Y
+x86-32 = Y
+x86-64 = Y
+Usage doc = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 240d082..17eaaf4 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -64,6 +64,7 @@ Network Interface Controller Drivers
vhost
vmxnet3
pcap_ring
+ fail_safe

**Figures**

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 35ed813..d33c959 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -59,6 +59,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
DEPDIRS-ena = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
DEPDIRS-enic = $(core-libs) librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
+DEPDIRS-failsafe = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
DEPDIRS-fm10k = $(core-libs) librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
new file mode 100644
index 0000000..06199ad
--- /dev/null
+++ b/drivers/net/failsafe/Makefile
@@ -0,0 +1,72 @@
+# BSD LICENSE
+#
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+
+# No exported include files
+
+# This lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_mbuf
+
+ifneq ($(DEBUG),)
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG := y
+endif
+
+# Basic CFLAGS:
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -I.
+CFLAGS += -D_BSD_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic -DPEDANTIC
+
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG),y)
+CFLAGS += -g -UNDEBUG
+else
+CFLAGS += -O3
+CFLAGS += -DNDEBUG
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
new file mode 100644
index 0000000..7cf33e8
--- /dev/null
+++ b/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_vdev.h>
+
+#include "failsafe_private.h"
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_SPEED_AUTONEG,
+};
+
+static int
+fs_sub_device_create(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ params = rte_vdev_device_args(vdev);
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u",
+ socket_id);
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = dev->data->dev_private;
+ PRIV(dev)->dev = dev;
+ dev->dev_ops = &failsafe_ops;
+ TAILQ_INIT(&dev->link_intr_cbs);
+ dev->data->dev_flags = 0x0;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ goto free_dev;
+ }
+ ret = fs_sub_device_create(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto free_args;
+ mac = &dev->data->mac_addrs[0];
+ if (mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto free_args;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ return 0;
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ rte_free(PRIV(dev));
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (vdev == NULL)
+ return -EINVAL;
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (name == NULL)
+ return -EINVAL;
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_ALIAS(net_failsafe, eth_failsafe);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 0000000..f07d26e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <errno.h>
+
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "failsafe_private.h"
+
+#define DEVARGS_MAXLEN 4096
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+int mac_from_arg;
+
+const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_eal_devargs_parse(args, d);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0')
+ i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
+ if (params[b] == '(') {
+ size_t start = b;
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ snprintf(params, DEVARGS_MAXLEN, "%s", buffer);
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ if (dev == NULL || params == NULL)
+ return -EINVAL;
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ mac_from_arg = 0;
+ n = snprintf(mut_params, sizeof(mut_params), "%s", params);
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+ mac_from_arg = 1;
+ }
+ }
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) &&
+ strncmp(param, "exec", b)) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 0000000..87d2c89
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,163 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev *
+fs_find_ethdev(const struct rte_device *dev)
+{
+ struct rte_eth_dev *eth_dev;
+ uint8_t i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ eth_dev = &rte_eth_devices[i];
+ if (eth_dev->device == dev)
+ return eth_dev;
+ }
+ return NULL;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_device *rdev;
+ struct rte_devargs *da;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ da = &sdev->devargs;
+ if (!sdev->bus->plug) {
+ ERROR("Bus %s used for sub_device %d does not support hotplug, skipping",
+ sdev->bus->name, i);
+ return -EINVAL;
+ }
+ rdev = sdev->bus->plug(da);
+ ret = rdev ? 0 : -rte_errno;
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ errno ? "(" : "",
+ errno ? strerror(errno) : "",
+ errno ? ")" : "");
+ continue;
+ }
+ ETH(sdev) = fs_find_ethdev(rdev);
+ if (ETH(sdev) == NULL) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ sdev->dev = ETH(sdev)->device;
+ ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ /*
+ * We only update TX_SUBDEV if we are not started.
+ * If a sub_device is emitting, we will switch the TX_SUBDEV to the
+ * preferred port only upon starting it, so that the switch is smoother.
+ */
+ if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
+ TX_SUBDEV(dev) == NULL) {
+ /* Using first probed device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ struct rte_device *rdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rdev = sdev->dev;
+ if (!sdev->bus->unplug) {
+ ERROR("Bus does not support device removal for sub_device %u (%s)",
+ i, rdev->name);
+ continue;
+ }
+ ret = sdev->bus->unplug(rdev);
+ if (ret) {
+ ERROR("Failed to remove requested device %s",
+ rdev->name);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 0000000..693162e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,663 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ .driver_name = pmd_failsafe_driver_name,
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /* Set of understood capabilities */
+ .rx_offload_capa = 0x0,
+ .tx_offload_capa = 0x0,
+ .flow_type_rss_offloads = 0x0,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ ERROR("Could not configure sub_device %d", i);
+ return ret;
+ }
+ sdev->state = DEV_ACTIVE;
+ }
+ return 0;
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret)
+ return ret;
+ sdev->state = DEV_STARTED;
+ }
+ if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
+ TX_SUBDEV(dev) == NULL) {
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ DEBUG("Switching tx_dev to sub_device %d", i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_STARTED - 1;
+ }
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = rxq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL)
+ return -ENOMEM;
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if (ret) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ return ret;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = txq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL)
+ return -ENOMEM;
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if (ret) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ if (TX_SUBDEV(dev) == NULL)
+ return;
+ rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_stats_reset(PORT_ID(sdev));
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint32_t rx_offload_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads &=
+ default_infos.flow_type_rss_offloads;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return NULL;
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
+ return NULL;
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return 0;
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL)
+ return -ENOTSUP;
+ return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ assert(index < FAILSAFE_MAX_ETHADDR);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ return 0;
+}
+
+static void
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 0000000..e7a7592
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <rte_dev.h>
+#include <rte_ethdev.h>
+#include <rte_devargs.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "mac=mac_addr" \
+ ""
+
+#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+/* TYPES */
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* id of last sub_device polled */
+ uint8_t last_polled;
+ unsigned int socket_id;
+ struct rte_eth_rxq_info info;
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+};
+
+enum dev_state {
+ DEV_UNDEFINED = 0,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct rte_devargs devargs;
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_eth_dev *edev;
+ /* Device state machine */
+ enum dev_state state;
+};
+
+struct fs_priv {
+ struct rte_eth_dev *dev;
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs;
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+};
+
+/* RX / TX */
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern int mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->edev)
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ (ETH(sdev)->data->port_id)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_ST(s, i, dev, state) \
+ for (i = fs_find_next((dev), 0, state); \
+ i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]); \
+ i = fs_find_next((dev), i + 1, state))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_ST(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+#ifndef NDEBUG
+#include <stdio.h>
+#define DEBUG__(m, ...) \
+ (fprintf(stderr, "%s:%d: %s(): " m "%c", \
+ __FILE__, __LINE__, __func__, __VA_ARGS__), \
+ (void)0)
+#define DEBUG_(...) \
+ (errno = ((int []){ \
+ *(volatile int *)&errno, \
+ (DEBUG__(__VA_ARGS__), 0) \
+ })[0])
+#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define DEBUG(...) ((void)0)
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, "WARNING: " __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, "ERROR: " __VA_ARGS__)
+#endif
+
+/* inlined functions */
+
+static inline uint8_t
+fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
+ enum dev_state min_state)
+{
+ while (sid < PRIV(dev)->subs_tail) {
+ if (PRIV(dev)->subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ if (sid >= PRIV(dev)->subs_tail)
+ return PRIV(dev)->subs_tail;
+ return sid;
+}
+
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 0000000..a45b4e5
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ if (unlikely(ETH(sdev) == NULL))
+ continue;
+ if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
+ continue;
+ if (unlikely(sdev->state != DEV_STARTED))
+ continue;
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ if (unlikely(sdev == NULL))
+ return 0;
+ if (unlikely(ETH(sdev) == NULL))
+ return 0;
+ if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index d476068..d536a20 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -122,6 +122,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
--
2.1.4
Thomas Monjalon
2017-06-28 09:18:05 UTC
Permalink
You need this patch for shared library build:

--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -34,6 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
# Library name
LIB = librte_pmd_failsafe.a

+EXPORT_MAP := rte_pmd_failsafe_version.map
+
+LIBABIVER := 1
+
# Sources are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c

--- /dev/null
+++ b/drivers/net/failsafe/rte_pmd_failsafe_version.map
@@ -0,0 +1,4 @@
+DPDK_17.08 {
+
+ local: *;
+};
Gaetan Rivet
2017-06-07 23:59:55 UTC
Permalink
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 19 +++
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 71 ++++++++++
drivers/net/failsafe/failsafe_args.c | 32 +++++
drivers/net/failsafe/failsafe_eal.c | 30 +----
drivers/net/failsafe/failsafe_ether.c | 228 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 25 ++--
drivers/net/failsafe/failsafe_private.h | 60 ++++++++-
8 files changed, 423 insertions(+), 43 deletions(-)
create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..c04891a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
address is generated, that will be subsequently applied to all sub-device once
they are probed.

+- **hotplug_poll** parameter [UINT64] (default **2000**)
+
+ This parameter allows the user to configure the amount of time in milliseconds
+ between two slave upkeep round.
+
Usage example
~~~~~~~~~~~~~

@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API concerning device
access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
over ethernet devices, instead of directly accessing them or by writing one's
own device iterator.
+
+Plug-in feature
+---------------
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 06199ad..4567961 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -40,6 +40,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7cf33e8..888f07b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -80,6 +80,72 @@ fs_sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
}

+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ if (PRIV(dev)->pending_alarm) {
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ ret = failsafe_eth_dev_state_sync(dev);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
static int
fs_eth_dev_create(struct rte_vdev_device *vdev)
{
@@ -128,6 +194,11 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
ret = failsafe_eal_init(dev);
if (ret)
goto free_args;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto free_args;
+ }
mac = &dev->data->mac_addrs[0];
if (mac_from_arg) {
/*
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index f07d26e..8f334aa 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -45,9 +45,11 @@
typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
uint8_t head);

+uint64_t hotplug_poll;
int mac_from_arg;

const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
PMD_FAILSAFE_MAC_KVARG,
NULL,
};
@@ -221,6 +223,24 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
}

static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
fs_get_mac_addr_arg(const char *key __rte_unused,
const char *value, void *out)
{
@@ -252,6 +272,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
ret = 0;
priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
/* default parameters */
+ hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
mac_from_arg = 0;
n = snprintf(mut_params, sizeof(mut_params), "%s", params);
if (n >= sizeof(mut_params)) {
@@ -274,6 +295,16 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
PMD_FAILSAFE_PARAM_STRING);
return -1;
}
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
+ &fs_get_u64_arg, &hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
/* MAC addr */
arg_count = rte_kvargs_count(kvlist,
PMD_FAILSAFE_MAC_KVARG);
@@ -287,6 +318,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
mac_from_arg = 1;
}
}
+ PRIV(dev)->state = DEV_PARSED;
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 87d2c89..ec0f221 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -91,37 +91,14 @@ fs_bus_init(struct rte_eth_dev *dev)
int
failsafe_eal_init(struct rte_eth_dev *dev)
{
- struct sub_device *sdev;
- uint8_t i;
int ret;

ret = fs_bus_init(dev);
if (ret)
return ret;
- /*
- * We only update TX_SUBDEV if we are not started.
- * If a sub_device is emitting, we will switch the TX_SUBDEV to the
- * preferred port only upon starting it, so that the switch is smoother.
- */
- if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
- TX_SUBDEV(dev) == NULL) {
- /* Using first probed device */
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
- DEBUG("Switching tx_dev to sub_device %d",
- i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev);
return 0;
}

@@ -159,5 +136,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
ret = fs_bus_uninit(dev);
if (ret)
return ret;
+ PRIV(dev)->state = DEV_PROBED - 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 0000000..7910952
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state == DEV_PROBED)
+ inactive |= UINT32_C(1) << i;
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ return ret;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ /* TODO: disable device */
+ return ret;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 693162e..4044473 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -89,6 +89,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
sdev->state = DEV_ACTIVE;
}
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
return 0;
}

@@ -108,21 +110,9 @@ fs_dev_start(struct rte_eth_dev *dev)
return ret;
sdev->state = DEV_STARTED;
}
- if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
- TX_SUBDEV(dev) == NULL) {
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
- DEBUG("Switching tx_dev to sub_device %d", i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_STARTED)
+ PRIV(dev)->state = DEV_STARTED;
+ fs_switch_dev(dev);
return 0;
}

@@ -132,6 +122,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ PRIV(dev)->state = DEV_STARTED - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_STARTED - 1;
@@ -183,6 +174,10 @@ fs_dev_close(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Closing sub_device %d", i);
rte_eth_dev_close(PORT_ID(sdev));
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index e7a7592..8fb72fe 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -41,12 +41,14 @@
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"

#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
- "mac=mac_addr" \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
""

-#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -105,8 +107,22 @@ struct fs_priv {
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ unsigned int pending_alarm:1; /* An alarm is pending */
};

+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
/* RX / TX */

uint16_t failsafe_rx_burst(void *rxq,
@@ -125,10 +141,15 @@ int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);

+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+
/* GLOBALS */

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern uint64_t hotplug_poll;
extern int mac_from_arg;

/* HELPERS */
@@ -224,4 +245,39 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+static inline void
+fs_switch_dev(struct rte_eth_dev *dev)
+{
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ if (PREFERRED_SUBDEV(dev)->state >= req_state) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (req_state == DEV_STARTED) ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
+ TX_SUBDEV(dev) == NULL) {
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ } else {
+ return;
+ }
+ rte_wmb();
+}
+
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:56 UTC
Permalink
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 20 +++++++
drivers/net/failsafe/failsafe_args.c | 99 +++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ether.c | 7 +++
drivers/net/failsafe/failsafe_private.h | 4 ++
4 files changed, 130 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index c04891a..1b6e110 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,19 @@ Fail-safe command line parameters
additional sub-device parameters if need be. They will be passed on to the
sub-device.

+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
- **mac** parameter [MAC address]

This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +139,13 @@ This section shows some example of using **testpmd** with a fail-safe PMD.
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
-i

+#. Start testpmd using a flexible device definition
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
Using the Fail-safe PMD from an application
-------------------------------------------

diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 8f334aa..c723ca3 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -30,6 +30,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include <assert.h>
#include <string.h>
#include <errno.h>

@@ -96,6 +98,75 @@ fs_parse_device(struct sub_device *sdev, char *args)
return 0;
}

+static void
+fs_sanitize_cmdline(char *args)
+{
+ size_t len;
+
+ len = strnlen(args, DEVARGS_MAXLEN);
+ args[len - 1] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ sdev->cmdline = new_str;
+ snprintf(sdev->cmdline, len, "%s", cmdline);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ old_err = errno;
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = errno;
+ ERROR("popen: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ return -ENODEV;
+ }
+ fs_sanitize_cmdline(output);
+ ret = fs_parse_device(sdev, output);
+ if (ret) {
+ ERROR("Parsing device '%s' failed", output);
+ goto ret_pclose;
+ }
+ret_pclose:
+ ret = pclose(fp);
+ if (ret) {
+ ret = errno;
+ ERROR("pclose: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ return ret;
+}
+
static int
fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -130,6 +201,14 @@ fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
ret = fs_parse_device(sdev, args);
if (ret)
goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
} else {
ERROR("Unrecognized device type: %.*s", (int)b, param);
return -EINVAL;
@@ -331,6 +410,8 @@ failsafe_args_free(struct rte_eth_dev *dev)
uint8_t i;

FOREACH_SUBDEV(sdev, i, dev) {
+ rte_free(sdev->cmdline);
+ sdev->cmdline = NULL;
free(sdev->devargs.args);
sdev->devargs.args = NULL;
}
@@ -361,3 +442,21 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
return fs_parse_sub_devices(fs_count_device,
dev, params);
}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7910952..2a1535e 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -188,6 +188,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
int ret;
uint8_t i;

+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ return ret;
+
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8fb72fe..554d7a3 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -44,6 +44,7 @@
#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
+ "exec(<shell command>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
@@ -87,6 +88,8 @@ struct sub_device {
struct rte_eth_dev *edev;
/* Device state machine */
enum dev_state state;
+ /* Some device are defined as a command line */
+ char *cmdline;
};

struct fs_priv {
@@ -135,6 +138,7 @@ uint16_t failsafe_tx_burst(void *txq,
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);

/* EAL */
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:57 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode = Y
Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
+Flow API = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 4567961..a53bb75 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -41,6 +41,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 888f07b..6557255 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -177,6 +177,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index ec0f221..4ae99e0 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -81,6 +81,7 @@ fs_bus_init(struct rte_eth_dev *dev)
ERROR("sub_device %d init went wrong", i);
return -ENODEV;
}
+ SUB_ID(sdev) = i;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2a1535e..2958207 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@

#include <unistd.h>

+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
#include "failsafe_private.h"

+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
uint32_t i;
int ret;

@@ -177,6 +217,36 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
} else {
DEBUG("VLAN filter already set");
}
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ &flow->fd->attr,
+ flow->fd->items,
+ flow->fd->actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 0000000..d8f59a1
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,216 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ size_t fdsz;
+
+ fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+ flow = rte_zmalloc(NULL,
+ sizeof(struct rte_flow) + fdsz,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+ if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
+ ERROR("Failed to copy flow description");
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free((*flow)->fd);
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (ret) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if (local_ret) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ rte_free(flow);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if (ret) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ return rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)], type, arg, error);
+ }
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+};
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4044473..4cb2e90 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
+#include <rte_flow.h>

#include "failsafe_private.h"

@@ -628,6 +629,33 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
}

+static int
+fs_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
+ ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
const struct eth_dev_ops failsafe_ops = {
.dev_configure = fs_dev_configure,
.dev_start = fs_dev_start,
@@ -655,4 +683,5 @@ const struct eth_dev_ops failsafe_ops = {
.mac_addr_remove = fs_mac_addr_remove,
.mac_addr_add = fs_mac_addr_add,
.mac_addr_set = fs_mac_addr_set,
+ .filter_ctrl = fs_filter_ctrl,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 554d7a3..f40ea2f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,8 @@
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_

+#include <sys/queue.h>
+
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -72,6 +74,14 @@ struct txq {
struct rte_eth_txq_info info;
};

+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_desc *fd;
+};
+
enum dev_state {
DEV_UNDEFINED = 0,
DEV_PARSED,
@@ -86,6 +96,7 @@ struct sub_device {
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
+ uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Some device are defined as a command line */
@@ -104,6 +115,8 @@ struct fs_priv {
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
@@ -153,6 +166,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;

@@ -170,6 +184,10 @@ extern int mac_from_arg;
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)

+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:58 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload = Y
+L4 checksum offload = Y
+Inner L3 checksum = Y
+Inner L4 checksum = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4cb2e90..5fb0135 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,149 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
- /* Set of understood capabilities */
- .rx_offload_capa = 0x0,
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
};

+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ * 0: all requested capabilities are supported by the sub_device
+ * positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev_info infos = {0};
+ struct rte_eth_conf *cf;
+ uint32_t cap;
+
+ cf = &dev->data->dev_conf;
+ SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+ /* RX capabilities */
+ cap = infos.rx_offload_capa;
+ if (cf->rxmode.hw_vlan_strip &&
+ ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+ WARN("VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_VLAN_STRIP;
+ }
+ if (cf->rxmode.hw_ip_checksum &&
+ ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+ WARN("IP checksum offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM;
+ }
+ if (cf->rxmode.enable_lro &&
+ ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+ WARN("TCP LRO offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_TCP_LRO;
+ }
+ if (cf->rxmode.hw_vlan_extend &&
+ ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+ WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_QINQ_STRIP;
+ }
+ /* TX capabilities */
+ /* Nothing to do, no tx capa supported */
+ return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+ uint32_t ol_cap)
+{
+ switch (ol_cap) {
+ case DEV_RX_OFFLOAD_VLAN_STRIP:
+ INFO("Disabling VLAN stripping offload");
+ cf->rxmode.hw_vlan_strip = 0;
+ break;
+ case DEV_RX_OFFLOAD_IPV4_CKSUM:
+ case DEV_RX_OFFLOAD_UDP_CKSUM:
+ case DEV_RX_OFFLOAD_TCP_CKSUM:
+ case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+ INFO("Disabling IP checksum offload");
+ cf->rxmode.hw_ip_checksum = 0;
+ break;
+ case DEV_RX_OFFLOAD_TCP_LRO:
+ INFO("Disabling TCP LRO offload");
+ cf->rxmode.enable_lro = 0;
+ break;
+ case DEV_RX_OFFLOAD_QINQ_STRIP:
+ INFO("Disabling stacked VLAN stripping offload");
+ cf->rxmode.hw_vlan_extend = 0;
+ break;
+ default:
+ DEBUG("Unable to disable offload capability: %" PRIx32,
+ ol_cap);
+ return -1;
+ }
+ return 0;
+}
+
static int
fs_dev_configure(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
+ int capa_flag;
int ret;

FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state != DEV_PROBED)
continue;
+ DEBUG("Checking capabilities for sub_device %d", i);
+ while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
+ /*
+ * Refuse to change configuration if multiple devices
+ * are present and we already have configured at least
+ * some of them.
+ */
+ if (PRIV(dev)->state >= DEV_ACTIVE &&
+ PRIV(dev)->subs_tail > 1) {
+ ERROR("device already configured, cannot fix live configuration");
+ return -1;
+ }
+ ret = fs_port_disable_offload(&dev->data->dev_conf,
+ capa_flag);
+ if (ret) {
+ ERROR("Unable to disable offload capability");
+ return ret;
+ }
+ }
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
--
2.1.4
Gaetan Rivet
2017-06-07 23:59:59 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
drivers/net/failsafe/failsafe_private.h | 8 +++
drivers/net/failsafe/failsafe_rxtx.c | 124 ++++++++++++++++++++++++++------
2 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f40ea2f..25a4dac 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -141,11 +141,18 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);

/* RX / TX */

+void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
/* ARGS */

int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -299,6 +306,7 @@ fs_switch_dev(struct rte_eth_dev *dev)
} else {
return;
}
+ set_burst_fn(dev, 0);
rte_wmb();
}

diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..c15025f 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -31,16 +31,63 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <assert.h>
+
#include <rte_mbuf.h>
#include <rte_ethdev.h>

#include "failsafe_private.h"

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+fs_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+static inline int
+fs_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
uint16_t
failsafe_rx_burst(void *queue,
struct rte_mbuf **rx_pkts,
@@ -63,11 +110,7 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
- if (unlikely(ETH(sdev) == NULL))
- continue;
- if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
- continue;
- if (unlikely(sdev->state != DEV_STARTED))
+ if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
nb_rx = ETH(sdev)->
@@ -80,11 +123,40 @@ failsafe_rx_burst(void *queue,
return 0;
}

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ assert(!fs_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
uint16_t
failsafe_tx_burst(void *queue,
struct rte_mbuf **tx_pkts,
@@ -96,12 +168,24 @@ failsafe_tx_burst(void *queue,

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
- if (unlikely(sdev == NULL))
- return 0;
- if (unlikely(ETH(sdev) == NULL))
- return 0;
- if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ assert(!fs_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
--
2.1.4
Gaetan Rivet
2017-06-08 00:00:00 UTC
Permalink
Listen to INTR_RMV events issued by slaves.
Add atomic flags on slave queues to detect use of slave bursts function.
If a removal is detected, set the recollection flag on this slave.

During a slave upkeep round, if its recollection flag is set and its
burst functions are not in use by any thread, remove that slave.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 14 +++++
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_args.c | 13 +++++
drivers/net/failsafe/failsafe_eal.c | 3 +-
drivers/net/failsafe/failsafe_ether.c | 92 ++++++++++++++++++++++++++++++---
drivers/net/failsafe/failsafe_ops.c | 38 ++++++++++++--
drivers/net/failsafe/failsafe_private.h | 72 +++++++++++++++++++++++---
drivers/net/failsafe/failsafe_rxtx.c | 17 +++++-
8 files changed, 230 insertions(+), 20 deletions(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 1b6e110..4154f0a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.

+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
Check the feature matrix for the complete set of supported features.

Compilation options
@@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to
apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+----------------
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6557255..4d35860 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg)
if (!PRIV(dev)->pending_alarm)
return;
PRIV(dev)->pending_alarm = 0;
+ failsafe_dev_remove(dev);
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
break;
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index c723ca3..dd55aaf 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}

+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else
+ ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 4ae99e0..3eefbf8 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -82,6 +82,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
+ sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@@ -99,7 +100,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2958207..4456d21 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -250,6 +250,64 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}

+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ if (sdev->bus->unplug == NULL ||
+ sdev->bus->unplug(sdev->dev)) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ /* the end */
+ break;
+ }
+ failsafe_hotplug_alarm_install(sdev->fs_dev);
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ if (sdev->remove && fs_rxtx_clean(sdev))
+ fs_dev_remove(sdev);
+}
+
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@@ -263,13 +321,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)

ret = failsafe_args_parse_subs(dev);
if (ret)
- return ret;
+ goto err_remove;

if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
- return ret;
+ goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@@ -278,15 +336,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
- return ret;
+ goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
- /* TODO: disable device */
- return ret;
+ goto err_remove;
}
}
}
@@ -300,6 +357,29 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
- return ret;
+ goto err_remove;
return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+void
+failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct sub_device *sdev = cb_arg;
+
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(sdev->fs_dev, sdev);
+ /* Use safe bursts in any case. */
+ set_burst_fn(sdev->fs_dev, 1);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 5fb0135..2e1c798 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -33,6 +33,8 @@

#include <assert.h>
#include <stdint.h>
+
+#include <rte_atomic.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_flow.h>
@@ -204,9 +206,21 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
}
FOREACH_SUBDEV(sdev, i, dev) {
+ int rmv_interrupt = 0;
+
if (sdev->state != DEV_PROBED)
continue;
+
+ rmv_interrupt = ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_RMV;
+ if (rmv_interrupt) {
+ DEBUG("Enabling RMV interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.rmv = 1;
+ } else {
+ DEBUG("sub_device %d does not support RMV event", i);
+ }
DEBUG("Configuring sub-device %d", i);
+ sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
@@ -215,6 +229,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
ERROR("Could not configure sub_device %d", i);
return ret;
}
+ if (rmv_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to register RMV callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.rmv = 0;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
@@ -240,7 +264,7 @@ fs_dev_start(struct rte_eth_dev *dev)
}
if (PRIV(dev)->state < DEV_STARTED)
PRIV(dev)->state = DEV_STARTED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

@@ -351,10 +375,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
fs_rx_queue_release(rxq);
dev->data->rx_queues[rx_queue_id] = NULL;
}
- rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ rxq = rte_zmalloc(NULL,
+ sizeof(*rxq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&rxq->refcnt[i]);
rxq->qid = rx_queue_id;
rxq->socket_id = socket_id;
rxq->info.mp = mb_pool;
@@ -414,10 +442,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
fs_tx_queue_release(txq);
dev->data->tx_queues[tx_queue_id] = NULL;
}
- txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ txq = rte_zmalloc("ethdev TX queue",
+ sizeof(*txq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&txq->refcnt[i]);
txq->qid = tx_queue_id;
txq->socket_id = socket_id;
txq->info.conf = *tx_conf;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 25a4dac..8f10d61 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -36,6 +36,7 @@

#include <sys/queue.h>

+#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -65,6 +66,7 @@ struct rxq {
uint8_t last_polled;
unsigned int socket_id;
struct rte_eth_rxq_info info;
+ rte_atomic64_t refcnt[];
};

struct txq {
@@ -72,6 +74,7 @@ struct txq {
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
+ rte_atomic64_t refcnt[];
};

struct rte_flow {
@@ -101,6 +104,10 @@ struct sub_device {
enum dev_state state;
/* Some device are defined as a command line */
char *cmdline;
+ /* fail-safe device backreference */
+ struct rte_eth_dev *fs_dev;
+ /* flag calling for recollection */
+ volatile unsigned int remove:1;
};

struct fs_priv {
@@ -168,6 +175,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */

int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_dev_remove(struct rte_eth_dev *dev);
+void failsafe_eth_rmv_event_callback(uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *arg);

/* GLOBALS */

@@ -233,6 +244,39 @@ extern int mac_from_arg;
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)

+/**
+ * Atomic guard
+ */
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_P(a) \
+ rte_atomic64_add(&(a), 1)
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_V(a) \
+ rte_atomic64_sub(&(a), 1)
+
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_RX(s, i) \
+ rte_atomic64_read( \
+ &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
+ )
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_TX(s, i) \
+ rte_atomic64_read( \
+ &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
+ )
+
#ifndef NDEBUG
#include <stdio.h>
#define DEBUG__(m, ...) \
@@ -274,33 +318,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+/*
+ * Switch emitting device.
+ * If banned is set, banned must not be considered for
+ * the role of emitting device.
+ */
static inline void
-fs_switch_dev(struct rte_eth_dev *dev)
+fs_switch_dev(struct rte_eth_dev *dev,
+ struct sub_device *banned)
{
+ struct sub_device *txd;
enum dev_state req_state;

req_state = PRIV(dev)->state;
- if (PREFERRED_SUBDEV(dev)->state >= req_state) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
+ txd = TX_SUBDEV(dev);
+ if (PREFERRED_SUBDEV(dev)->state >= req_state &&
+ PREFERRED_SUBDEV(dev) != banned) {
+ if (txd != PREFERRED_SUBDEV(dev) &&
+ (txd == NULL ||
(req_state == DEV_STARTED) ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ (txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
- } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
- TX_SUBDEV(dev) == NULL) {
+ } else if ((txd && txd->state < req_state) ||
+ txd == NULL ||
+ txd == banned) {
struct sub_device *sdev;
uint8_t i;

/* Using acceptable device */
FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ if (sdev == banned)
+ continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
- } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ } else if (txd && txd->state < req_state) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
} else {
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index c15025f..82a8c4e 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -33,6 +33,7 @@

#include <assert.h>

+#include <rte_atomic.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>

@@ -113,8 +114,10 @@ failsafe_rx_burst(void *queue,
if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -147,8 +150,10 @@ failsafe_rx_burst_fast(void *queue,
sdev = &priv->subs[i];
assert(!fs_rx_unsafe(sdev));
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -165,13 +170,17 @@ failsafe_tx_burst(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}

uint16_t
@@ -182,10 +191,14 @@ failsafe_tx_burst_fast(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
assert(!fs_tx_unsafe(sdev));
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}
--
2.1.4
Thomas Monjalon
2017-07-05 06:42:41 UTC
Permalink
This patch needs to be rebased with this change:

--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -399,7 +399,7 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
void
failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
enum rte_eth_event_type event __rte_unused,
- void *cb_arg)
+ void *cb_arg, void *ret __rte_unused)
{
struct sub_device *sdev = cb_arg;

--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -182,7 +182,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
void failsafe_dev_remove(struct rte_eth_dev *dev);
void failsafe_eth_rmv_event_callback(uint8_t port_id,
enum rte_eth_event_type type,
- void *arg);
+ void *arg, void *ret);
Gaetan Rivet
2017-06-08 00:00:01 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 15 +++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 23 +++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 3 +++
5 files changed, 43 insertions(+)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 257f579..251ce55 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -5,6 +5,7 @@
;
[Features]
Link status = Y
+Link status event = Y
Queue start/stop = Y
MTU update = Y
Jumbo frame = Y
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 4d35860..151f823 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -240,6 +240,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
mac->addr_bytes[0], mac->addr_bytes[1],
mac->addr_bytes[2], mac->addr_bytes[3],
mac->addr_bytes[4], mac->addr_bytes[5]);
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
free_args:
failsafe_args_free(dev);
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 4456d21..06f950b 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -383,3 +383,18 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
*/
sdev->remove = 1;
}
+
+void
+failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ int ret;
+
+ ret = dev->dev_ops->link_update(dev, 0);
+ /* We must pass on the LSC event */
+ if (ret)
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 2e1c798..05221bc 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -207,6 +207,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
FOREACH_SUBDEV(sdev, i, dev) {
int rmv_interrupt = 0;
+ int lsc_interrupt = 0;
+ int lsc_enabled;

if (sdev->state != DEV_PROBED)
continue;
@@ -219,6 +221,17 @@ fs_dev_configure(struct rte_eth_dev *dev)
} else {
DEBUG("sub_device %d does not support RMV event", i);
}
+ lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
+ lsc_interrupt = lsc_enabled &&
+ (ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_LSC);
+ if (lsc_interrupt) {
+ DEBUG("Enabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 1;
+ } else if (lsc_enabled && !lsc_interrupt) {
+ DEBUG("Disabling LSC interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ }
DEBUG("Configuring sub-device %d", i);
sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
@@ -239,6 +252,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
SUB_ID(sdev));
}
dev->data->dev_conf.intr_conf.rmv = 0;
+ if (lsc_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_LSC,
+ failsafe_eth_lsc_event_callback,
+ dev);
+ if (ret)
+ WARN("Failed to register LSC callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8f10d61..cb8fad2 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -179,6 +179,9 @@ void failsafe_dev_remove(struct rte_eth_dev *dev);
void failsafe_eth_rmv_event_callback(uint8_t port_id,
enum rte_eth_event_type type,
void *arg);
+void failsafe_eth_lsc_event_callback(uint8_t port_id,
+ enum rte_eth_event_type event,
+ void *cb_arg);

/* GLOBALS */
--
2.1.4
Thomas Monjalon
2017-07-05 06:43:33 UTC
Permalink
This patch need to be rebased with this change:

--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -417,7 +417,7 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
void
failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
enum rte_eth_event_type event __rte_unused,
- void *cb_arg)
+ void *cb_arg, void *ret __rte_unused)
{
struct rte_eth_dev *dev = cb_arg;
int ret;
@@ -426,5 +426,5 @@ failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
/* We must pass on the LSC event */
if (ret)
_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
- NULL);
+ NULL, NULL);
}

--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
void failsafe_eth_lsc_event_callback(uint8_t port_id,
enum rte_eth_event_type event,
- void *cb_arg);
+ void *cb_arg, void *ret);
Gaetan Rivet
2017-06-08 00:00:02 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
drivers/net/failsafe/failsafe_ether.c | 34 +++++++++++++++++++++++++++++++--
drivers/net/failsafe/failsafe_flow.c | 29 ++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 4 ++++
3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 06f950b..fb64111 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -74,6 +74,28 @@ fs_flow_complain(struct rte_flow_error *error)
}

static int
+eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_flow_error ferror;
+ int ret;
+
+ if (!PRIV(dev)->flow_isolated) {
+ DEBUG("Flow isolation already disabled");
+ } else {
+ DEBUG("Enabling flow isolation");
+ ret = rte_flow_isolate(PORT_ID(sdev),
+ PRIV(dev)->flow_isolated,
+ &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
{
@@ -331,9 +353,17 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
- FOREACH_SUBDEV(sdev, i, dev)
- if (sdev->state == DEV_PROBED)
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state == DEV_PROBED) {
inactive |= UINT32_C(1) << i;
+ ret = eth_dev_flow_isolate_set(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ goto err_remove;
+ }
+ }
+ }
ret = dev->dev_ops->dev_configure(dev);
if (ret)
goto err_remove;
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index d8f59a1..a5598ae 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -207,10 +207,39 @@ fs_flow_query(struct rte_eth_dev *dev,
return -1;
}

+static int
+fs_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state < DEV_PROBED)
+ continue;
+ DEBUG("Calling rte_flow_isolate on sub_device %d", i);
+ if (PRIV(dev)->flow_isolated != sdev->flow_isolated)
+ WARN("flow isolation mode of sub_device %d in incoherent state.",
+ i);
+ ret = rte_flow_isolate(PORT_ID(sdev), set, error);
+ if (ret) {
+ ERROR("Operation rte_flow_isolate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ sdev->flow_isolated = set;
+ }
+ PRIV(dev)->flow_isolated = set;
+ return 0;
+}
+
const struct rte_flow_ops fs_flow_ops = {
.validate = fs_flow_validate,
.create = fs_flow_create,
.destroy = fs_flow_destroy,
.flush = fs_flow_flush,
.query = fs_flow_query,
+ .isolate = fs_flow_isolate,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index cb8fad2..c029633 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -108,6 +108,8 @@ struct sub_device {
struct rte_eth_dev *fs_dev;
/* flag calling for recollection */
volatile unsigned int remove:1;
+ /* flow isolation state */
+ int flow_isolated:1;
};

struct fs_priv {
@@ -139,6 +141,8 @@ struct fs_priv {
*/
enum dev_state state;
unsigned int pending_alarm:1; /* An alarm is pending */
+ /* flow isolation state */
+ int flow_isolated:1;
};

/* MISC */
--
2.1.4
Ferruh Yigit
2017-06-08 10:56:37 UTC
Permalink
Post by Gaetan Rivet
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
Hi Gaetan,

Above dependencies are not enough to apply the PMD, it seems above ones
also has dependencies to other patchsets, specially eal one that I am
not able trace its dependencies from mails.

Since most of the eal patches are also from you, can you please list
dependency tree to apply the PMD?

Thanks,
ferruh
Gaëtan Rivet
2017-06-08 13:30:04 UTC
Permalink
Post by Ferruh Yigit
Post by Gaetan Rivet
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
Hi Gaetan,
Above dependencies are not enough to apply the PMD, it seems above ones
also has dependencies to other patchsets, specially eal one that I am
not able trace its dependencies from mails.
Since most of the eal patches are also from you, can you please list
dependency tree to apply the PMD?
Thanks,
ferruh
Hi Ferruh,

here is the full list of patchsets that I have currently in my tree, in
order, up until the fail-safe PMD:

---

[0] ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html

[1] ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html

[2] Remove PCI and VDEV dependency from eventdev library
http://dpdk.org/ml/archives/dev/2017-June/067402.html

[3] Remove cryptodev driver
http://dpdk.org/ml/archives/dev/2017-May/066382.html

[4] bus: attach / detach API
http://dpdk.org/ml/archives/dev/2017-June/067474.html

[5] pci: implement find_device bus operation
http://dpdk.org/ml/archives/dev/2017-June/067485.html

[6] rte_bus parse API
http://dpdk.org/ml/archives/dev/2017-June/067486.html

[7] Generic devargs parsing
http://dpdk.org/ml/archives/dev/2017-June/067496.html

[8] eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-June/067516.html

[9] bus/pci: remove PCI bus from EAL
http://dpdk.org/ml/archives/dev/2017-June/067520.html

[10] introduce fail-safe PMD
http://dpdk.org/ml/archives/dev/2017-June/067533.html

---

[2] has a slight compilation issue that I fixed with
https://github.com/grivet/dpdk/commit/1af1272bd3129e90ae7c4ab005e85d5f8a2f95fe
However Jerin has not yet answered to this issue (see:
http://dpdk.org/ml/archives/dev/2017-June/067412.html )

The failsafe PMD only depends on 0, 1, 4, 5, 6, 7, 8. All others are
left there for consistency.
--
Gaëtan Rivet
6WIND
Ferruh Yigit
2017-06-08 14:20:07 UTC
Permalink
Post by Gaëtan Rivet
Post by Ferruh Yigit
Post by Gaetan Rivet
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
Hi Gaetan,
Above dependencies are not enough to apply the PMD, it seems above ones
also has dependencies to other patchsets, specially eal one that I am
not able trace its dependencies from mails.
Since most of the eal patches are also from you, can you please list
dependency tree to apply the PMD?
Thanks,
ferruh
Hi Ferruh,
here is the full list of patchsets that I have currently in my tree, in
---
[0] ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
[1] ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
[2] Remove PCI and VDEV dependency from eventdev library
http://dpdk.org/ml/archives/dev/2017-June/067402.html
[3] Remove cryptodev driver
http://dpdk.org/ml/archives/dev/2017-May/066382.html
[4] bus: attach / detach API
http://dpdk.org/ml/archives/dev/2017-June/067474.html
[5] pci: implement find_device bus operation
http://dpdk.org/ml/archives/dev/2017-June/067485.html
[6] rte_bus parse API
http://dpdk.org/ml/archives/dev/2017-June/067486.html
[7] Generic devargs parsing
http://dpdk.org/ml/archives/dev/2017-June/067496.html
[8] eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-June/067516.html
[9] bus/pci: remove PCI bus from EAL
http://dpdk.org/ml/archives/dev/2017-June/067520.html
[10] introduce fail-safe PMD
http://dpdk.org/ml/archives/dev/2017-June/067533.html
---
[2] has a slight compilation issue that I fixed with
https://github.com/grivet/dpdk/commit/1af1272bd3129e90ae7c4ab005e85d5f8a2f95fe
http://dpdk.org/ml/archives/dev/2017-June/067412.html )
The failsafe PMD only depends on 0, 1, 4, 5, 6, 7, 8. All others are
left there for consistency.
Thanks Gaetan,

I did able to apply the patchset, I think it is good idea to wait for
PMD until things settle down in dependent patches side.


btw,getting build error [1], CFLAGS += -D_DEFAULT_SOURCE may help.

[1]
/usr/include/features.h:148:3: error: #warning "_BSD_SOURCE and
_SVID_SOURCE are deprecated, use _DEFAULT_SOURCE" [-Werror=cpp]
# warning "_BSD_SOURCE and _SVID_SOURCE are deprecated, use
_DEFAULT_SOURCE"
Ferruh Yigit
2017-06-08 14:25:03 UTC
Permalink
Post by Ferruh Yigit
Post by Gaëtan Rivet
Post by Ferruh Yigit
Post by Gaetan Rivet
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
Hi Gaetan,
Above dependencies are not enough to apply the PMD, it seems above ones
also has dependencies to other patchsets, specially eal one that I am
not able trace its dependencies from mails.
Since most of the eal patches are also from you, can you please list
dependency tree to apply the PMD?
Thanks,
ferruh
Hi Ferruh,
here is the full list of patchsets that I have currently in my tree, in
---
[0] ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
[1] ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
[2] Remove PCI and VDEV dependency from eventdev library
http://dpdk.org/ml/archives/dev/2017-June/067402.html
[3] Remove cryptodev driver
http://dpdk.org/ml/archives/dev/2017-May/066382.html
[4] bus: attach / detach API
http://dpdk.org/ml/archives/dev/2017-June/067474.html
[5] pci: implement find_device bus operation
http://dpdk.org/ml/archives/dev/2017-June/067485.html
[6] rte_bus parse API
http://dpdk.org/ml/archives/dev/2017-June/067486.html
[7] Generic devargs parsing
http://dpdk.org/ml/archives/dev/2017-June/067496.html
[8] eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-June/067516.html
[9] bus/pci: remove PCI bus from EAL
http://dpdk.org/ml/archives/dev/2017-June/067520.html
[10] introduce fail-safe PMD
http://dpdk.org/ml/archives/dev/2017-June/067533.html
---
[2] has a slight compilation issue that I fixed with
https://github.com/grivet/dpdk/commit/1af1272bd3129e90ae7c4ab005e85d5f8a2f95fe
http://dpdk.org/ml/archives/dev/2017-June/067412.html )
The failsafe PMD only depends on 0, 1, 4, 5, 6, 7, 8. All others are
left there for consistency.
Thanks Gaetan,
I did able to apply the patchset, I think it is good idea to wait for
PMD until things settle down in dependent patches side.
btw, first three patches of the failsafe patchset are more generic
ethdev layer patches. I think they shouldn't be merged as part of PMD
patchset.

What do you think separating them into separate patchset (or perhaps
individual patches, since they looks independent from eachother)?

Thanks,
ferruh
Post by Ferruh Yigit
btw,getting build error [1], CFLAGS += -D_DEFAULT_SOURCE may help.
[1]
/usr/include/features.h:148:3: error: #warning "_BSD_SOURCE and
_SVID_SOURCE are deprecated, use _DEFAULT_SOURCE" [-Werror=cpp]
# warning "_BSD_SOURCE and _SVID_SOURCE are deprecated, use
_DEFAULT_SOURCE"
Gaëtan Rivet
2017-06-08 15:02:13 UTC
Permalink
Post by Ferruh Yigit
Post by Ferruh Yigit
Post by Gaëtan Rivet
Post by Ferruh Yigit
Post by Gaetan Rivet
eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/
ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/
ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
Hi Gaetan,
Above dependencies are not enough to apply the PMD, it seems above ones
also has dependencies to other patchsets, specially eal one that I am
not able trace its dependencies from mails.
Since most of the eal patches are also from you, can you please list
dependency tree to apply the PMD?
Thanks,
ferruh
Hi Ferruh,
here is the full list of patchsets that I have currently in my tree, in
---
[0] ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
[1] ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
[2] Remove PCI and VDEV dependency from eventdev library
http://dpdk.org/ml/archives/dev/2017-June/067402.html
[3] Remove cryptodev driver
http://dpdk.org/ml/archives/dev/2017-May/066382.html
[4] bus: attach / detach API
http://dpdk.org/ml/archives/dev/2017-June/067474.html
[5] pci: implement find_device bus operation
http://dpdk.org/ml/archives/dev/2017-June/067485.html
[6] rte_bus parse API
http://dpdk.org/ml/archives/dev/2017-June/067486.html
[7] Generic devargs parsing
http://dpdk.org/ml/archives/dev/2017-June/067496.html
[8] eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-June/067516.html
[9] bus/pci: remove PCI bus from EAL
http://dpdk.org/ml/archives/dev/2017-June/067520.html
[10] introduce fail-safe PMD
http://dpdk.org/ml/archives/dev/2017-June/067533.html
---
[2] has a slight compilation issue that I fixed with
https://github.com/grivet/dpdk/commit/1af1272bd3129e90ae7c4ab005e85d5f8a2f95fe
http://dpdk.org/ml/archives/dev/2017-June/067412.html )
The failsafe PMD only depends on 0, 1, 4, 5, 6, 7, 8. All others are
left there for consistency.
Thanks Gaetan,
I did able to apply the patchset, I think it is good idea to wait for
PMD until things settle down in dependent patches side.
Sure, it won't compile without it anyway.
Post by Ferruh Yigit
btw, first three patches of the failsafe patchset are more generic
ethdev layer patches. I think they shouldn't be merged as part of PMD
patchset.
What do you think separating them into separate patchset (or perhaps
individual patches, since they looks independent from eachother)?
Why not, but I am juggling (read: struggling) with a few patchsets already.
I will wait for the dust to settle and look into it.
Post by Ferruh Yigit
Thanks,
ferruh
Post by Ferruh Yigit
btw,getting build error [1], CFLAGS += -D_DEFAULT_SOURCE may help.
[1]
/usr/include/features.h:148:3: error: #warning "_BSD_SOURCE and
_SVID_SOURCE are deprecated, use _DEFAULT_SOURCE" [-Werror=cpp]
# warning "_BSD_SOURCE and _SVID_SOURCE are deprecated, use
_DEFAULT_SOURCE"
--
Gaëtan Rivet
6WIND
Gaetan Rivet
2017-07-07 00:09:07 UTC
Permalink
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.

The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.

Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).

Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.

Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.

This patchset depends on:

eal: complete attach / detach support
http://dpdk.org/ml/archives/dev/2017-May/066366.html
http://dpdk.org/dev/patchwork/patch/24522/

ethdev: add flow API rule copy function
http://dpdk.org/ml/archives/dev/2017-May/066145.html
http://dpdk.org/dev/patchwork/patch/24406/

ethdev: add isolated mode to flow API
http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/

v1 --> v2:

- Wrote documentation
- Fixed commit logs, signed-off-by
- Added LSC event support
- A few minor fixes

v2 --> v3:

- Numerous bug fixes.
- Complete sub-EAL rework to follow new bus API.
- burst protection on sub removal.
- more flexible sub definition.
- flow isolated mode support.

v3 --> v4:

- Split back commits
net/failsafe: add fast burst functions
net/failsafe: support device removal
That were squashed by error during a rebase
- Fix segfault on port plugin
- Fix isolate mode support for MLX4 ports plugin

v4 --> v5:

- Follow new plug / unplug API.

v5 --> v6:

- Follow new hotplug API.
- Improve usability of hotplug API.
- Fix rte_dev hotplug API implementation.
- Introduce rte_eal_devargs_rmv API as EXPERIMENTAL.
- Use it to clean up resources on hotplug_remove.
- Fix hotplug implementation and support un pci bus.
The scan was not idempotent, nor clean.
Neither were the device fields.
- Implement plug operation for vdev bus.
This is needed for hotplug support and to make the EAL
independent from vdev-specific API.
- Remove useless parameters from plug / unplug API.

This patchset is fairly big and complex. The hotplug API has been rushed and
has never been tested outside of the special case of vdev bus.

These evolutions are proposed alongside this PMD as only this PMD allows to test
this API at the moment, and without those evolutions this PMD cannot be used.

Gaetan Rivet (22):
eal: return device handle upon plugin
eal: fix hotplug add
devargs: introduce removal function
eal: release devargs on device removal
pci: use given name as generic name
pci: fix generic driver pointer on probe error
pci: fix hotplug operations
vdev: add dev to vdev macro
vdev: implement plug operation
bus: remove useless plug parameter
ethdev: save VLAN filter setting
ethdev: add deferred intermediate device state
ethdev: count devices consistently
net/failsafe: add fail-safe PMD
net/failsafe: add plug-in support
net/failsafe: add flexible device definition
net/failsafe: support flow API
net/failsafe: support offload capabilities
net/failsafe: add fast burst functions
net/failsafe: support device removal
net/failsafe: support link status change event
net/failsafe: support flow API isolation mode

MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 186 +++++
doc/guides/nics/features/failsafe.ini | 32 +
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 78 ++
drivers/net/failsafe/failsafe.c | 305 ++++++++
drivers/net/failsafe/failsafe_args.c | 475 ++++++++++++
drivers/net/failsafe/failsafe_eal.c | 134 ++++
drivers/net/failsafe/failsafe_ether.c | 437 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 245 ++++++
drivers/net/failsafe/failsafe_ops.c | 869 ++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 376 ++++++++++
drivers/net/failsafe/failsafe_rxtx.c | 204 +++++
drivers/net/failsafe/rte_pmd_failsafe_version.map | 4 +
lib/librte_eal/bsdapp/eal/eal_pci.c | 4 +-
lib/librte_eal/bsdapp/eal/rte_eal_version.map | 1 +
lib/librte_eal/common/eal_common_dev.c | 83 ++-
lib/librte_eal/common/eal_common_devargs.c | 18 +
lib/librte_eal/common/eal_common_pci.c | 49 +-
lib/librte_eal/common/eal_common_vdev.c | 12 +-
lib/librte_eal/common/eal_private.h | 5 +
lib/librte_eal/common/include/rte_bus.h | 6 +-
lib/librte_eal/common/include/rte_dev.h | 10 +-
lib/librte_eal/common/include/rte_devargs.h | 18 +
lib/librte_eal/common/include/rte_vdev.h | 7 +
lib/librte_eal/linuxapp/eal/eal_pci.c | 4 +-
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 +
lib/librte_ether/rte_ethdev.c | 38 +-
lib/librte_ether/rte_ethdev.h | 24 +-
mk/rte.app.mk | 1 +
32 files changed, 3568 insertions(+), 72 deletions(-)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ether.c
create mode 100644 drivers/net/failsafe/failsafe_flow.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
create mode 100644 drivers/net/failsafe/rte_pmd_failsafe_version.map
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:08 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_dev.c | 40 +++++++++++++++++++++++----------
lib/librte_eal/common/include/rte_dev.h | 10 +++++----
2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 32e12b5..292fefe 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -42,6 +42,7 @@
#include <rte_devargs.h>
#include <rte_debug.h>
#include <rte_devargs.h>
+#include <rte_errno.h>
#include <rte_log.h>

#include "eal_private.h"
@@ -67,6 +68,7 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)

int rte_eal_dev_attach(const char *name, const char *devargs)
{
+ struct rte_device *dev;
int ret;

if (name == NULL || devargs == NULL) {
@@ -74,9 +76,9 @@ int rte_eal_dev_attach(const char *name, const char *devargs)
return -EINVAL;
}

- ret = rte_eal_hotplug_add("pci", name, devargs);
- if (ret && ret != -EINVAL)
- return ret;
+ dev = rte_eal_hotplug_add("pci", name, devargs);
+ if (dev == NULL && rte_errno != EINVAL)
+ return -rte_errno;

/*
* If we haven't found a bus device the user meant to "hotplug" a
@@ -118,7 +120,8 @@ int rte_eal_dev_detach(struct rte_device *dev)
return ret;
}

-int rte_eal_hotplug_add(const char *busname, const char *devname,
+struct rte_device *
+rte_eal_hotplug_add(const char *busname, const char *devname,
const char *devargs)
{
struct rte_bus *bus;
@@ -128,31 +131,39 @@ int rte_eal_hotplug_add(const char *busname, const char *devname,
bus = rte_bus_find_by_name(busname);
if (bus == NULL) {
RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
- return -ENOENT;
+ rte_errno = ENOENT;
+ return NULL;
}

if (bus->plug == NULL) {
RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
bus->name);
- return -ENOTSUP;
+ rte_errno = ENOTSUP;
+ return NULL;
}

ret = bus->scan();
- if (ret)
- return ret;
+ if (ret) {
+ rte_errno = -ret;
+ return NULL;
+ }

dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
- return -EINVAL;
+ rte_errno = EINVAL;
+ return NULL;
}

ret = bus->plug(dev, devargs);
- if (ret)
+ if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
- return ret;
+ rte_errno = -ret;
+ return NULL;
+ }
+ return dev;
}

int rte_eal_hotplug_remove(const char *busname, const char *devname)
@@ -164,24 +175,29 @@ int rte_eal_hotplug_remove(const char *busname, const char *devname)
bus = rte_bus_find_by_name(busname);
if (bus == NULL) {
RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+ rte_errno = ENOENT;
return -ENOENT;
}

if (bus->unplug == NULL) {
RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
bus->name);
+ rte_errno = ENOTSUP;
return -ENOTSUP;
}

dev = bus->find_device(NULL, cmp_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
+ rte_errno = EINVAL;
return -EINVAL;
}

ret = bus->unplug(dev);
- if (ret)
+ if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
dev->name);
+ rte_errno = -ret;
+ }
return ret;
}
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index bcd8b1e..30d1f2e 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -218,10 +218,12 @@ int rte_eal_dev_detach(struct rte_device *dev);
* @param devargs
* Device arguments to be passed to the driver.
* @return
- * 0 on success, negative on error.
+ * The pointer to the plugged rte_device on success.
+ * NULL on error. rte_errno is then set.
*/
-int rte_eal_hotplug_add(const char *busname, const char *devname,
- const char *devargs);
+struct rte_device *rte_eal_hotplug_add(const char *busname,
+ const char *devname,
+ const char *devargs);

/**
* @warning
@@ -234,7 +236,7 @@ int rte_eal_hotplug_add(const char *busname, const char *devname,
* @param devname
* The device name being removed.
* @return
- * 0 on success, negative on error.
+ * 0 on success, negative on error. rte_errno is then set.
*/
int rte_eal_hotplug_remove(const char *busname, const char *devname);
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:09 UTC
Permalink
New device should be represented by an rte_devarg prior to being
plugged.

Fixes: a3ee360f4440 ("eal: add hotplug add/remove device")

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_dev.c | 43 +++++++++++++++++++++++++++++++---
1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 292fefe..708c8e9 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -120,12 +120,32 @@ int rte_eal_dev_detach(struct rte_device *dev)
return ret;
}

+static char *
+full_dev_name(const char *bus, const char *dev, const char *args)
+{
+ char *name;
+ size_t len;
+
+ len = strlen(bus) + 1 +
+ strlen(dev) + 1 +
+ strlen(args) + 1;
+ name = calloc(1, len);
+ if (name == NULL) {
+ RTE_LOG(ERR, EAL, "Could not allocate full device name\n");
+ return NULL;
+ }
+ snprintf(name, len, "%s:%s,%s", bus, dev,
+ args ? args : "");
+ return name;
+}
+
struct rte_device *
rte_eal_hotplug_add(const char *busname, const char *devname,
const char *devargs)
{
struct rte_bus *bus;
struct rte_device *dev;
+ char *name;
int ret;

bus = rte_bus_find_by_name(busname);
@@ -142,10 +162,22 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
return NULL;
}

+ name = full_dev_name(busname, devname, devargs);
+ if (name == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED, name);
+ if (ret) {
+ rte_errno = EINVAL;
+ goto err_name;
+ }
+
ret = bus->scan();
if (ret) {
rte_errno = -ret;
- return NULL;
+ goto err_name;
}

dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
@@ -153,7 +185,7 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
rte_errno = EINVAL;
- return NULL;
+ goto err_name;
}

ret = bus->plug(dev, devargs);
@@ -161,9 +193,14 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
rte_errno = -ret;
- return NULL;
+ goto err_name;
}
+ free(name);
return dev;
+
+err_name:
+ free(name);
+ return NULL;
}

int rte_eal_hotplug_remove(const char *busname, const char *devname)
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:10 UTC
Permalink
Hotplug support introduces the possibility of removing devices from the
system. Allocated resources must be freed.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/bsdapp/eal/rte_eal_version.map | 1 +
lib/librte_eal/common/eal_common_devargs.c | 18 ++++++++++++++++++
lib/librte_eal/common/include/rte_devargs.h | 18 ++++++++++++++++++
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 +
4 files changed, 38 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 381f895..d3cf1ae 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -207,6 +207,7 @@ EXPERIMENTAL {
global:

rte_eal_devargs_parse;
+ rte_eal_devargs_rmv;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;

diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index 3ea070d..2f29e89 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -41,6 +41,7 @@
#include <string.h>

#include <rte_devargs.h>
+#include <rte_tailq.h>
#include "eal_private.h"

/** Global list of user devices */
@@ -185,6 +186,23 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
return -1;
}

+int
+rte_eal_devargs_rmv(const char *busname, const char *devname)
+{
+ struct rte_devargs *d;
+ void *tmp;
+
+ TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
+ if (strcmp(d->bus->name, busname) == 0 &&
+ strcmp(d->name, devname) == 0) {
+ TAILQ_REMOVE(&devargs_list, d, next);
+ free(d->args);
+ free(d);
+ }
+ }
+ return 1;
+}
+
/* count the number of devices of a specified type */
unsigned int
rte_eal_devargs_type_count(enum rte_devtype devtype)
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index a0427cd..89679bb 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -163,6 +163,24 @@ rte_eal_devargs_parse(const char *dev,
int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);

/**
+ * Remove a device from the user device list.
+ * Its resources are freed.
+ * If the devargs cannot be found, nothing happens.
+ *
+ * @param busname
+ * bus name of the devargs to remove.
+ *
+ * @param devname
+ * device name of the devargs to remove.
+ *
+ * @return
+ * 0 on success.
+ * <0 on error.
+ * >0 if the devargs was not within the user device list.
+ */
+int rte_eal_devargs_rmv(const char *busname, const char *devname);
+
+/**
* Count the number of user devices of a specified type
*
* @param devtype
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 0f9e009..d59308a 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -212,6 +212,7 @@ EXPERIMENTAL {
global:

rte_eal_devargs_parse;
+ rte_eal_devargs_rmv;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:11 UTC
Permalink
Release resources allocated for devargs when the device is being
removed.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_dev.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 708c8e9..143c231 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -177,7 +177,7 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
ret = bus->scan();
if (ret) {
rte_errno = -ret;
- goto err_name;
+ goto err_devarg;
}

dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
@@ -185,7 +185,7 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
devname);
rte_errno = EINVAL;
- goto err_name;
+ goto err_devarg;
}

ret = bus->plug(dev, devargs);
@@ -193,11 +193,13 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
rte_errno = -ret;
- goto err_name;
+ goto err_devarg;
}
free(name);
return dev;

+err_devarg:
+ rte_eal_devargs_rmv(busname, devname);
err_name:
free(name);
return NULL;
@@ -230,6 +232,8 @@ int rte_eal_hotplug_remove(const char *busname, const char *devname)
return -EINVAL;
}

+ rte_eal_devargs_rmv(busname, devname);
+ dev->devargs = NULL;
ret = bus->unplug(dev);
if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:12 UTC
Permalink
The PCI device is referenced by other DPDK systems by the name of its
parameter, not by the system name.

Fixes: beec692c5157 ("eal: add name field to generic device")
Cc: ***@dpdk.org

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/bsdapp/eal/eal_pci.c | 4 ++--
lib/librte_eal/common/eal_common_pci.c | 21 ++++++++++++++++-----
lib/librte_eal/common/eal_private.h | 5 +++++
lib/librte_eal/linuxapp/eal/eal_pci.c | 4 ++--
4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index e321461..97a88ec 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -282,8 +282,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* FreeBSD has no NUMA support (yet) */
dev->device.numa_node = 0;

- rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name));
- dev->device.name = dev->name;
+ pci_name_set(dev);

/* FreeBSD has only one pass through driver */
dev->kdrv = RTE_KDRV_NIC_UIO;
@@ -334,6 +333,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
+ pci_name_set(dev2);
memmove(dev2->mem_resource,
dev->mem_resource,
sizeof(dev->mem_resource));
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index e03a407..ab01749 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -88,6 +88,21 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
return NULL;
}

+void
+pci_name_set(struct rte_pci_device *dev)
+{
+ struct rte_devargs *devargs;
+
+ rte_pci_device_name(&dev->addr,
+ dev->name, sizeof(dev->name));
+ devargs = pci_devargs_lookup(dev);
+ dev->device.devargs = devargs;
+ if (devargs != NULL)
+ dev->device.name = dev->device.devargs->name;
+ else
+ dev->device.name = dev->name;
+}
+
/* map a particular resource from a file */
void *
pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
@@ -396,11 +411,7 @@ rte_pci_probe(void)
FOREACH_DEVICE_ON_PCIBUS(dev) {
probed++;

- /* set devargs in PCI structure */
- devargs = pci_devargs_lookup(dev);
- if (devargs != NULL)
- dev->device.devargs = devargs;
-
+ devargs = dev->device.devargs;
/* probe all or only whitelisted devices */
if (probe_all)
ret = pci_probe_all_drivers(dev);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0836339..597d82e 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -113,6 +113,11 @@ struct rte_pci_driver;
struct rte_pci_device;

/**
+ * Find the name of a PCI device.
+ */
+void pci_name_set(struct rte_pci_device *dev);
+
+/**
* Add a PCI device to the PCI Bus (append to PCI Device list). This function
* also updates the bus references of the PCI Device (and the generic device
* object embedded within.
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 7d9e1a9..556ae2c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -324,8 +324,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
dev->device.numa_node = 0;
}

- rte_pci_device_name(addr, dev->name, sizeof(dev->name));
- dev->device.name = dev->name;
+ pci_name_set(dev);

/* parse resources */
snprintf(filename, sizeof(filename), "%s/resource", dirname);
@@ -373,6 +372,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
+ pci_name_set(dev2);
memmove(dev2->mem_resource, dev->mem_resource,
sizeof(dev->mem_resource));
free(dev);
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:13 UTC
Permalink
The field is set but never resetted on error.

Fixes: 7917d5f5ea46 ("pci: initialize generic driver pointer")
Cc: ***@dpdk.org

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_pci.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index ab01749..fbdc356 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -237,6 +237,7 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
ret = dr->probe(dr, dev);
if (ret) {
dev->driver = NULL;
+ dev->device.driver = NULL;
if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
/* Don't unmap if device is unsupported and
* driver needs mapped resources.
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:14 UTC
Permalink
The device handle is already known and does not have to be infered from
the PCI address.

Additionally, rte_memcpy.h was erroneously included.

Fixes: 00e62aae69c0 ("bus/pci: implement plug/unplug operations")

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_pci.c | 25 ++++++-------------------
1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index fbdc356..a9a04db 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -47,7 +47,6 @@
#include <rte_pci.h>
#include <rte_per_lcore.h>
#include <rte_memory.h>
-#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_string_fns.h>
@@ -538,32 +537,20 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
static int
pci_plug(struct rte_device *dev, const char *devargs __rte_unused)
{
- struct rte_pci_device *pdev;
- struct rte_pci_addr *addr;
-
- addr = &RTE_DEV_TO_PCI(dev)->addr;
-
- /* Find the current device holding this address in the bus. */
- FOREACH_DEVICE_ON_PCIBUS(pdev) {
- if (rte_eal_compare_pci_addr(&pdev->addr, addr) == 0)
- return rte_pci_probe_one(addr);
- }
-
- rte_errno = ENODEV;
- return -1;
+ return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
}

static int
pci_unplug(struct rte_device *dev)
{
struct rte_pci_device *pdev;
+ int ret;

pdev = RTE_DEV_TO_PCI(dev);
- if (rte_pci_detach(&pdev->addr) != 0) {
- rte_errno = ENODEV;
- return -1;
- }
- return 0;
+ ret = rte_pci_detach_dev(pdev);
+ rte_pci_remove_device(pdev);
+ free(pdev);
+ return ret;
}

struct rte_pci_bus rte_pci_bus = {
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:15 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/include/rte_vdev.h | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
index 3c07b76..639e6d6 100644
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -46,6 +46,13 @@ struct rte_vdev_device {
struct rte_device device; /**< Inherit core device */
};

+/**
+ * @internal
+ * Helper macro for drivers that need to convert to struct rte_vdev_device.
+ */
+#define RTE_DEV_TO_VDEV(ptr) \
+ container_of(ptr, struct rte_vdev_device, device)
+
static inline const char *
rte_vdev_device_name(const struct rte_vdev_device *dev)
{
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:16 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_vdev.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
index 517a9f3..5f1c224 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -356,12 +356,14 @@ vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
}

static int
+vdev_plug(struct rte_device *dev, const char *args __rte_unused)
+{
+ return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
+}
+
+static int
vdev_unplug(struct rte_device *dev)
{
- /*
- * The virtual bus doesn't support 'unattached' devices so this is
- * actually equal to hotplugging removal of it.
- */
return rte_vdev_uninit(dev->name);
}

@@ -369,7 +371,7 @@ static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
.probe = vdev_probe,
.find_device = vdev_find_device,
- /* .plug = NULL, see comment on vdev_unplug */
+ .plug = vdev_plug,
.unplug = vdev_unplug,
.parse = vdev_parse,
};
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:17 UTC
Permalink
The prior scan should link the relevant rte_devargs to the newly
allocated rte_device. As such, it is useless to pass device arguments to
the plug callback.

Fixes: 7c8810f43f6e ("bus: introduce device plug/unplug")
Fixes: 00e62aae69c0 ("bus/pci: implement plug/unplug operations")
Fixes: a3ee360f4440 ("eal: add hotplug add/remove device")

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_eal/common/eal_common_dev.c | 2 +-
lib/librte_eal/common/eal_common_pci.c | 2 +-
lib/librte_eal/common/eal_common_vdev.c | 2 +-
lib/librte_eal/common/include/rte_bus.h | 6 +-----
4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 143c231..42e91db 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -188,7 +188,7 @@ rte_eal_hotplug_add(const char *busname, const char *devname,
goto err_devarg;
}

- ret = bus->plug(dev, devargs);
+ ret = bus->plug(dev);
if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index a9a04db..adfe644 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -535,7 +535,7 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
}

static int
-pci_plug(struct rte_device *dev, const char *devargs __rte_unused)
+pci_plug(struct rte_device *dev)
{
return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
}
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
index 5f1c224..ea3917a 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -356,7 +356,7 @@ vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
}

static int
-vdev_plug(struct rte_device *dev, const char *args __rte_unused)
+vdev_plug(struct rte_device *dev)
{
return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
}
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index 37cc230..167635a 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -114,15 +114,11 @@ typedef struct rte_device *
* @param dev
* Device pointer that was returned by a previous call to find_device.
*
- * @param devargs
- * Device declaration.
- *
* @return
* 0 on success.
* !0 on error.
*/
-typedef int (*rte_bus_plug_t)(struct rte_device *dev,
- const char *devargs);
+typedef int (*rte_bus_plug_t)(struct rte_device *dev);

/**
* Implementation specific remove function which is responsible for unlinking
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:18 UTC
Permalink
This allows PMDs and applications to save flow rules in their generic
format for later processing. This is useful when rules cannot be applied
immediately, such as when the device is not properly initialized.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Signed-off-by: Adrien Mazarguil <***@6wind.com>
---
lib/librte_ether/rte_ether_version.map | 1 +
lib/librte_ether/rte_flow.c | 227 +++++++++++++++++++++++++++++++++
lib/librte_ether/rte_flow.h | 40 ++++++
3 files changed, 268 insertions(+)

diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 019a93d..6f65f83 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -152,6 +152,7 @@ DPDK_17.08 {
global:

_rte_eth_dev_callback_process;
+ rte_flow_copy;
rte_flow_isolate;

} DPDK_17.05;
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index c1de31b..884e4f6 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -31,14 +31,81 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <errno.h>
+#include <stddef.h>
#include <stdint.h>
+#include <string.h>

+#include <rte_common.h>
#include <rte_errno.h>
#include <rte_branch_prediction.h>
#include "rte_ethdev.h"
#include "rte_flow_driver.h"
#include "rte_flow.h"

+/**
+ * Flow elements description tables.
+ */
+struct rte_flow_desc_data {
+ const char *name;
+ size_t size;
+};
+
+/** Generate flow_item[] entry. */
+#define MK_FLOW_ITEM(t, s) \
+ [RTE_FLOW_ITEM_TYPE_ ## t] = { \
+ .name = # t, \
+ .size = s, \
+ }
+
+/** Information about known flow pattern items. */
+static const struct rte_flow_desc_data rte_flow_desc_item[] = {
+ MK_FLOW_ITEM(END, 0),
+ MK_FLOW_ITEM(VOID, 0),
+ MK_FLOW_ITEM(INVERT, 0),
+ MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
+ MK_FLOW_ITEM(PF, 0),
+ MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
+ MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
+ MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
+ MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
+ MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
+ MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
+ MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
+ MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
+ MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
+ MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
+ MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
+ MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
+ MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
+ MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
+ MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
+ MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
+};
+
+/** Generate flow_action[] entry. */
+#define MK_FLOW_ACTION(t, s) \
+ [RTE_FLOW_ACTION_TYPE_ ## t] = { \
+ .name = # t, \
+ .size = s, \
+ }
+
+/** Information about known flow actions. */
+static const struct rte_flow_desc_data rte_flow_desc_action[] = {
+ MK_FLOW_ACTION(END, 0),
+ MK_FLOW_ACTION(VOID, 0),
+ MK_FLOW_ACTION(PASSTHRU, 0),
+ MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
+ MK_FLOW_ACTION(FLAG, 0),
+ MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
+ MK_FLOW_ACTION(DROP, 0),
+ MK_FLOW_ACTION(COUNT, 0),
+ MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
+ MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
+ MK_FLOW_ACTION(PF, 0),
+ MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
+};
+
/* Get generic flow operations structure from a port. */
const struct rte_flow_ops *
rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
@@ -175,3 +242,163 @@ rte_flow_isolate(uint8_t port_id,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL, rte_strerror(ENOSYS));
}
+
+/** Compute storage space needed by item specification. */
+static void
+flow_item_spec_size(const struct rte_flow_item *item,
+ size_t *size, size_t *pad)
+{
+ if (!item->spec)
+ goto empty;
+ switch (item->type) {
+ union {
+ const struct rte_flow_item_raw *raw;
+ } spec;
+
+ /* Not a fall-through */
+ case RTE_FLOW_ITEM_TYPE_RAW:
+ spec.raw = item->spec;
+ *size = offsetof(struct rte_flow_item_raw, pattern) +
+ spec.raw->length * sizeof(*spec.raw->pattern);
+ break;
+ default:
+empty:
+ *size = 0;
+ break;
+ }
+ *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Compute storage space needed by action configuration. */
+static void
+flow_action_conf_size(const struct rte_flow_action *action,
+ size_t *size, size_t *pad)
+{
+ if (!action->conf)
+ goto empty;
+ switch (action->type) {
+ union {
+ const struct rte_flow_action_rss *rss;
+ } conf;
+
+ /* Not a fall-through. */
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ conf.rss = action->conf;
+ *size = offsetof(struct rte_flow_action_rss, queue) +
+ conf.rss->num * sizeof(*conf.rss->queue);
+ break;
+ default:
+empty:
+ *size = 0;
+ break;
+ }
+ *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Store a full rte_flow description. */
+size_t
+rte_flow_copy(struct rte_flow_desc *desc, size_t len,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow_desc *fd = NULL;
+ size_t tmp;
+ size_t pad;
+ size_t off1 = 0;
+ size_t off2 = 0;
+ size_t size = 0;
+
+store:
+ if (items) {
+ const struct rte_flow_item *item;
+
+ item = items;
+ if (fd)
+ fd->items = (void *)&fd->data[off1];
+ do {
+ struct rte_flow_item *dst = NULL;
+
+ if ((size_t)item->type >=
+ RTE_DIM(rte_flow_desc_item) ||
+ !rte_flow_desc_item[item->type].name) {
+ rte_errno = ENOTSUP;
+ return 0;
+ }
+ if (fd)
+ dst = memcpy(fd->data + off1, item,
+ sizeof(*item));
+ off1 += sizeof(*item);
+ flow_item_spec_size(item, &tmp, &pad);
+ if (item->spec) {
+ if (fd)
+ dst->spec = memcpy(fd->data + off2,
+ item->spec, tmp);
+ off2 += tmp + pad;
+ }
+ if (item->last) {
+ if (fd)
+ dst->last = memcpy(fd->data + off2,
+ item->last, tmp);
+ off2 += tmp + pad;
+ }
+ if (item->mask) {
+ if (fd)
+ dst->mask = memcpy(fd->data + off2,
+ item->mask, tmp);
+ off2 += tmp + pad;
+ }
+ off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+ } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
+ off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+ }
+ if (actions) {
+ const struct rte_flow_action *action;
+
+ action = actions;
+ if (fd)
+ fd->actions = (void *)&fd->data[off1];
+ do {
+ struct rte_flow_action *dst = NULL;
+
+ if ((size_t)action->type >=
+ RTE_DIM(rte_flow_desc_action) ||
+ !rte_flow_desc_action[action->type].name) {
+ rte_errno = ENOTSUP;
+ return 0;
+ }
+ if (fd)
+ dst = memcpy(fd->data + off1, action,
+ sizeof(*action));
+ off1 += sizeof(*action);
+ flow_action_conf_size(action, &tmp, &pad);
+ if (action->conf) {
+ if (fd)
+ dst->conf = memcpy(fd->data + off2,
+ action->conf, tmp);
+ off2 += tmp + pad;
+ }
+ off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+ } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
+ }
+ if (fd != NULL)
+ return size;
+ off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+ tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data),
+ sizeof(double));
+ size = tmp + off1 + off2;
+ if (size > len)
+ return size;
+ fd = desc;
+ if (fd != NULL) {
+ *fd = (const struct rte_flow_desc) {
+ .size = size,
+ .attr = *attr,
+ };
+ tmp -= offsetof(struct rte_flow_desc, data);
+ off2 = tmp + off1;
+ off1 = tmp;
+ goto store;
+ }
+ return 0;
+}
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index cfbed30..6ac7cdb 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1266,6 +1266,46 @@ rte_flow_query(uint8_t port_id,
int
rte_flow_isolate(uint8_t port_id, int set, struct rte_flow_error *error);

+/**
+ * Generic flow representation.
+ *
+ * This form is sufficient to describe an rte_flow independently from any
+ * PMD implementation and allows for replayability and identification.
+ */
+struct rte_flow_desc {
+ size_t size; /**< Allocated space including data[]. */
+ struct rte_flow_attr attr; /**< Attributes. */
+ struct rte_flow_item *items; /**< Items. */
+ struct rte_flow_action *actions; /**< Actions. */
+ uint8_t data[]; /**< Storage for items/actions. */
+};
+
+/**
+ * Copy an rte_flow rule description.
+ *
+ * @param[in] fd
+ * Flow rule description.
+ * @param[in] len
+ * Total size of allocated data for the flow description.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ *
+ * @return
+ * If len is greater or equal to the size of the flow, the total size of the
+ * flow description and its data.
+ * If len is lower than the size of the flow, the number of bytes that would
+ * have been written to desc had it been sufficient. Nothing is written.
+ */
+size_t
+rte_flow_copy(struct rte_flow_desc *fd, size_t len,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions);
+
#ifdef __cplusplus
}
#endif
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:19 UTC
Permalink
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 19 ++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 10 ++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 957ae2a..38c5ad4 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1981,6 +1981,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1996,7 +1997,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);

- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}

int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d1076c8..acbbbb6 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -374,6 +374,14 @@ enum rte_vlan_type {
};

/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1674,6 +1682,8 @@ struct rte_eth_dev_data {
uint32_t dev_flags; /**< Capabilities */
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};

/** Device supports hotplug detach */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:20 UTC
Permalink
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 3 ++-
lib/librte_ether/rte_ethdev.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 38c5ad4..0377b97 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -291,7 +291,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index acbbbb6..4b485f7 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1589,6 +1589,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};

/**
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:21 UTC
Permalink
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 19 ++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 10 ++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 957ae2a..38c5ad4 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1981,6 +1981,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;

RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1996,7 +1997,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);

- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}

int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d1076c8..acbbbb6 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -374,6 +374,14 @@ enum rte_vlan_type {
};

/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1674,6 +1682,8 @@ struct rte_eth_dev_data {
uint32_t dev_flags; /**< Capabilities */
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};

/** Device supports hotplug detach */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:22 UTC
Permalink
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 3 ++-
lib/librte_ether/rte_ethdev.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 38c5ad4..0377b97 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -291,7 +291,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index acbbbb6..4b485f7 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1589,6 +1589,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};

/**
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:23 UTC
Permalink
Make the rte_eth_dev_count() return the number of available devices even
after some are detached by the hotplug API or put in a deferred state.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 16 +++++++++-------
lib/librte_ether/rte_ethdev.h | 13 ++++++-------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 0377b97..012950a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;

/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -210,7 +209,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev_last_created_port = port_id;
- nb_ports++;

return eth_dev;
}
@@ -283,7 +281,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;

eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}

@@ -308,7 +305,15 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}

int
@@ -341,9 +346,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}

- if (!nb_ports)
- return -ENODEV;
-
RTE_ETH_FOREACH_DEV(i) {
if (!rte_eth_devices[i].device)
continue;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4b485f7..abac320 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1724,13 +1724,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);

/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:24 UTC
Permalink
Make the rte_eth_dev_count() return the number of available devices even
after some are detached by the hotplug API or put in a deferred state.

Signed-off-by: Gaetan Rivet <***@6wind.com>
---
lib/librte_ether/rte_ethdev.c | 16 +++++++++-------
lib/librte_ether/rte_ethdev.h | 13 ++++++-------
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 0377b97..012950a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;

/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -210,7 +209,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev_last_created_port = port_id;
- nb_ports++;

return eth_dev;
}
@@ -283,7 +281,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;

eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}

@@ -308,7 +305,15 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}

int
@@ -341,9 +346,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}

- if (!nb_ports)
- return -ENODEV;
-
RTE_ETH_FOREACH_DEV(i) {
if (!rte_eth_devices[i].device)
continue;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4b485f7..abac320 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1724,13 +1724,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);

/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:25 UTC
Permalink
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefit from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 133 +++++
doc/guides/nics/features/failsafe.ini | 24 +
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 76 +++
drivers/net/failsafe/failsafe.c | 231 ++++++++
drivers/net/failsafe/failsafe_args.c | 331 +++++++++++
drivers/net/failsafe/failsafe_eal.c | 154 +++++
drivers/net/failsafe/failsafe_ops.c | 663 ++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 227 ++++++++
drivers/net/failsafe/failsafe_rxtx.c | 107 ++++
drivers/net/failsafe/rte_pmd_failsafe_version.map | 4 +
mk/rte.app.mk | 1 +
15 files changed, 1965 insertions(+)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
create mode 100644 drivers/net/failsafe/rte_pmd_failsafe_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index b4424ea..5ae007f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -336,6 +336,11 @@ F: drivers/net/enic/
F: doc/guides/nics/enic.rst
F: doc/guides/nics/features/enic.ini

+Fail-safe PMD
+M: Gaetan Rivet <***@6wind.com>
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
Intel e1000
M: Wenzhuo Lu <***@intel.com>
F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 660588a..f402c4b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -420,6 +420,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_NULL=y

#
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 0000000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+.. BSD LICENSE
+ Copyright 2017 6WIND S.A.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of 6WIND S.A. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the primary has been
+removed from the system.
+
+.. note::
+
+ The library is enabled by default. You can enable it or disable it manually
+ by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option.
+
+Features
+--------
+
+The Fail-safe PMD only supports a limited set of features. If you plan to use a
+device underneath the Fail-safe PMD with a specific feature, this feature must
+be supported by the Fail-safe PMD to avoid throwing any error.
+
+Check the feature matrix for the complete set of supported features.
+
+Compilation options
+-------------------
+
+These options can be modified in the ``$RTE_TARGET/build/.config`` file.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**)
+
+ Toggle compiling librte_pmd_failsafe itself.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG`` (default **n**)
+
+ Toggle debugging code.
+
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
+
+Fail-safe command line parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **dev(<iface>)** parameter
+
+ This parameter allows the user to define a sub-device. The ``<iface>`` part of
+ this parameter must be a valid device definition. It could be the argument
+ provided to a ``-w`` PCI device specification or the argument that would be
+ given to a ``--vdev`` parameter (including a fail-safe).
+ Enclosing the device definition within parenthesis here allows using
+ additional sub-device parameters if need be. They will be passed on to the
+ sub-device.
+
+- **mac** parameter [MAC address]
+
+ This parameter allows the user to set a default MAC address to the fail-safe
+ and all of its sub-devices.
+ If no default mac address is provided, the fail-safe PMD will read the MAC
+ address of the first of its sub-device to be successfully probed and use it as
+ its default MAC address, trying to set it to all of its other sub-devices.
+ If no sub-device was successfully probed at initialization, then a random MAC
+ address is generated, that will be subsequently applied to all sub-device once
+ they are probed.
+
+Usage example
+~~~~~~~~~~~~~
+
+This section shows some example of using **testpmd** with a fail-safe PMD.
+
+#. Request huge pages:
+
+ .. code-block:: console
+
+ echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+#. Start testpmd
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
+ -i
+
+Using the Fail-safe PMD from an application
+-------------------------------------------
+
+This driver strives to be as seamless as possible to existing applications, in
+order to propose the hotplug functionality in the easiest way possible.
+
+Care must be taken, however, to respect the **ether** API concerning device
+access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
+over ethernet devices, instead of directly accessing them or by writing one's
+own device iterator.
diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
new file mode 100644
index 0000000..3c52823
--- /dev/null
+++ b/doc/guides/nics/features/failsafe.ini
@@ -0,0 +1,24 @@
+;
+; Supported features of the 'fail-safe' poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status = Y
+Queue start/stop = Y
+MTU update = Y
+Jumbo frame = Y
+Promiscuous mode = Y
+Allmulticast mode = Y
+Unicast MAC filter = Y
+Multicast MAC filter = Y
+VLAN filter = Y
+Packet type parsing = Y
+Basic stats = Y
+Stats per queue = Y
+ARMv7 = Y
+ARMv8 = Y
+Power8 = Y
+x86-32 = Y
+x86-64 = Y
+Usage doc = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 240d082..17eaaf4 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -64,6 +64,7 @@ Network Interface Controller Drivers
vhost
vmxnet3
pcap_ring
+ fail_safe

**Figures**

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 35ed813..d33c959 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -59,6 +59,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
DEPDIRS-ena = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
DEPDIRS-enic = $(core-libs) librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
+DEPDIRS-failsafe = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
DEPDIRS-fm10k = $(core-libs) librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
new file mode 100644
index 0000000..c759035
--- /dev/null
+++ b/drivers/net/failsafe/Makefile
@@ -0,0 +1,76 @@
+# BSD LICENSE
+#
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+EXPORT_MAP := rte_pmd_failsafe_version.map
+
+LIBABIVER := 1
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+
+# No exported include files
+
+# This lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_mbuf
+
+ifneq ($(DEBUG),)
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG := y
+endif
+
+# Basic CFLAGS:
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -I.
+CFLAGS += -D_DEFAULT_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic -DPEDANTIC
+
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG),y)
+CFLAGS += -g -UNDEBUG
+else
+CFLAGS += -O3
+CFLAGS += -DNDEBUG
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
new file mode 100644
index 0000000..7cf33e8
--- /dev/null
+++ b/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_vdev.h>
+
+#include "failsafe_private.h"
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_SPEED_AUTONEG,
+};
+
+static int
+fs_sub_device_create(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ params = rte_vdev_device_args(vdev);
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u",
+ socket_id);
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = dev->data->dev_private;
+ PRIV(dev)->dev = dev;
+ dev->dev_ops = &failsafe_ops;
+ TAILQ_INIT(&dev->link_intr_cbs);
+ dev->data->dev_flags = 0x0;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ goto free_dev;
+ }
+ ret = fs_sub_device_create(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto free_args;
+ mac = &dev->data->mac_addrs[0];
+ if (mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto free_args;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ return 0;
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ rte_free(PRIV(dev));
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (vdev == NULL)
+ return -EINVAL;
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (name == NULL)
+ return -EINVAL;
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_ALIAS(net_failsafe, eth_failsafe);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 0000000..f07d26e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <errno.h>
+
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "failsafe_private.h"
+
+#define DEVARGS_MAXLEN 4096
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+int mac_from_arg;
+
+const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_eal_devargs_parse(args, d);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0')
+ i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
+ if (params[b] == '(') {
+ size_t start = b;
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ snprintf(params, DEVARGS_MAXLEN, "%s", buffer);
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ if (dev == NULL || params == NULL)
+ return -EINVAL;
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ mac_from_arg = 0;
+ n = snprintf(mut_params, sizeof(mut_params), "%s", params);
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+ mac_from_arg = 1;
+ }
+ }
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) &&
+ strncmp(param, "exec", b)) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 0000000..6c3a811
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,154 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev *
+fs_find_ethdev(const struct rte_device *dev)
+{
+ struct rte_eth_dev *eth_dev;
+ uint8_t i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ eth_dev = &rte_eth_devices[i];
+ if (eth_dev->device == dev)
+ return eth_dev;
+ }
+ return NULL;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_device *rdev;
+ struct rte_devargs *da;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ da = &sdev->devargs;
+ rdev = rte_eal_hotplug_add(da->bus->name,
+ da->name,
+ da->args);
+ ret = rdev ? 0 : -rte_errno;
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ errno ? "(" : "",
+ errno ? strerror(rte_errno) : "",
+ errno ? ")" : "");
+ continue;
+ }
+ ETH(sdev) = fs_find_ethdev(rdev);
+ if (ETH(sdev) == NULL) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ sdev->dev = ETH(sdev)->device;
+ ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ /*
+ * We only update TX_SUBDEV if we are not started.
+ * If a sub_device is emitting, we will switch the TX_SUBDEV to the
+ * preferred port only upon starting it, so that the switch is smoother.
+ */
+ if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
+ TX_SUBDEV(dev) == NULL) {
+ /* Using first probed device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eal_hotplug_remove(sdev->bus->name,
+ sdev->dev->name);
+ if (ret) {
+ ERROR("Failed to remove requested device %s",
+ sdev->dev->name);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 0000000..693162e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,663 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ .driver_name = pmd_failsafe_driver_name,
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /* Set of understood capabilities */
+ .rx_offload_capa = 0x0,
+ .tx_offload_capa = 0x0,
+ .flow_type_rss_offloads = 0x0,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ ERROR("Could not configure sub_device %d", i);
+ return ret;
+ }
+ sdev->state = DEV_ACTIVE;
+ }
+ return 0;
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret)
+ return ret;
+ sdev->state = DEV_STARTED;
+ }
+ if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
+ TX_SUBDEV(dev) == NULL) {
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ DEBUG("Switching tx_dev to sub_device %d", i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_STARTED - 1;
+ }
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = rxq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL)
+ return -ENOMEM;
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if (ret) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ return ret;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = txq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL)
+ return -ENOMEM;
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if (ret) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ if (TX_SUBDEV(dev) == NULL)
+ return;
+ rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_stats_reset(PORT_ID(sdev));
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint32_t rx_offload_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads &=
+ default_infos.flow_type_rss_offloads;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return NULL;
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
+ return NULL;
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return 0;
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL)
+ return -ENOTSUP;
+ return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ assert(index < FAILSAFE_MAX_ETHADDR);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ return 0;
+}
+
+static void
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 0000000..e7a7592
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <rte_dev.h>
+#include <rte_ethdev.h>
+#include <rte_devargs.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "mac=mac_addr" \
+ ""
+
+#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+/* TYPES */
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* id of last sub_device polled */
+ uint8_t last_polled;
+ unsigned int socket_id;
+ struct rte_eth_rxq_info info;
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+};
+
+enum dev_state {
+ DEV_UNDEFINED = 0,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct rte_devargs devargs;
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_eth_dev *edev;
+ /* Device state machine */
+ enum dev_state state;
+};
+
+struct fs_priv {
+ struct rte_eth_dev *dev;
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs;
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+};
+
+/* RX / TX */
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern int mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->edev)
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ (ETH(sdev)->data->port_id)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_ST(s, i, dev, state) \
+ for (i = fs_find_next((dev), 0, state); \
+ i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]); \
+ i = fs_find_next((dev), i + 1, state))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_ST(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+#ifndef NDEBUG
+#include <stdio.h>
+#define DEBUG__(m, ...) \
+ (fprintf(stderr, "%s:%d: %s(): " m "%c", \
+ __FILE__, __LINE__, __func__, __VA_ARGS__), \
+ (void)0)
+#define DEBUG_(...) \
+ (errno = ((int []){ \
+ *(volatile int *)&errno, \
+ (DEBUG__(__VA_ARGS__), 0) \
+ })[0])
+#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define DEBUG(...) ((void)0)
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, "WARNING: " __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, "ERROR: " __VA_ARGS__)
+#endif
+
+/* inlined functions */
+
+static inline uint8_t
+fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
+ enum dev_state min_state)
+{
+ while (sid < PRIV(dev)->subs_tail) {
+ if (PRIV(dev)->subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ if (sid >= PRIV(dev)->subs_tail)
+ return PRIV(dev)->subs_tail;
+ return sid;
+}
+
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 0000000..a45b4e5
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ if (unlikely(ETH(sdev) == NULL))
+ continue;
+ if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
+ continue;
+ if (unlikely(sdev->state != DEV_STARTED))
+ continue;
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ if (unlikely(sdev == NULL))
+ return 0;
+ if (unlikely(ETH(sdev) == NULL))
+ return 0;
+ if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
diff --git a/drivers/net/failsafe/rte_pmd_failsafe_version.map b/drivers/net/failsafe/rte_pmd_failsafe_version.map
new file mode 100644
index 0000000..b6d2840
--- /dev/null
+++ b/drivers/net/failsafe/rte_pmd_failsafe_version.map
@@ -0,0 +1,4 @@
+DPDK_17.08 {
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 7d71a49..ffc089c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,6 +120,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:26 UTC
Permalink
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefit from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
MAINTAINERS | 5 +
config/common_base | 6 +
doc/guides/nics/fail_safe.rst | 133 +++++
doc/guides/nics/features/failsafe.ini | 24 +
doc/guides/nics/index.rst | 1 +
drivers/net/Makefile | 2 +
drivers/net/failsafe/Makefile | 76 +++
drivers/net/failsafe/failsafe.c | 231 ++++++++
drivers/net/failsafe/failsafe_args.c | 331 +++++++++++
drivers/net/failsafe/failsafe_eal.c | 154 +++++
drivers/net/failsafe/failsafe_ops.c | 663 ++++++++++++++++++++++
drivers/net/failsafe/failsafe_private.h | 227 ++++++++
drivers/net/failsafe/failsafe_rxtx.c | 107 ++++
drivers/net/failsafe/rte_pmd_failsafe_version.map | 4 +
mk/rte.app.mk | 1 +
15 files changed, 1965 insertions(+)
create mode 100644 doc/guides/nics/fail_safe.rst
create mode 100644 doc/guides/nics/features/failsafe.ini
create mode 100644 drivers/net/failsafe/Makefile
create mode 100644 drivers/net/failsafe/failsafe.c
create mode 100644 drivers/net/failsafe/failsafe_args.c
create mode 100644 drivers/net/failsafe/failsafe_eal.c
create mode 100644 drivers/net/failsafe/failsafe_ops.c
create mode 100644 drivers/net/failsafe/failsafe_private.h
create mode 100644 drivers/net/failsafe/failsafe_rxtx.c
create mode 100644 drivers/net/failsafe/rte_pmd_failsafe_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index b4424ea..5ae007f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -336,6 +336,11 @@ F: drivers/net/enic/
F: doc/guides/nics/enic.rst
F: doc/guides/nics/features/enic.ini

+Fail-safe PMD
+M: Gaetan Rivet <***@6wind.com>
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
Intel e1000
M: Wenzhuo Lu <***@intel.com>
F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 660588a..f402c4b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -420,6 +420,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
CONFIG_RTE_LIBRTE_PMD_NULL=y

#
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG=n
+
+#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 0000000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+.. BSD LICENSE
+ Copyright 2017 6WIND S.A.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of 6WIND S.A. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==================================
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the primary has been
+removed from the system.
+
+.. note::
+
+ The library is enabled by default. You can enable it or disable it manually
+ by setting the ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` configuration option.
+
+Features
+--------
+
+The Fail-safe PMD only supports a limited set of features. If you plan to use a
+device underneath the Fail-safe PMD with a specific feature, this feature must
+be supported by the Fail-safe PMD to avoid throwing any error.
+
+Check the feature matrix for the complete set of supported features.
+
+Compilation options
+-------------------
+
+These options can be modified in the ``$RTE_TARGET/build/.config`` file.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE`` (default **y**)
+
+ Toggle compiling librte_pmd_failsafe itself.
+
+- ``CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG`` (default **n**)
+
+ Toggle debugging code.
+
+Using the Fail-safe PMD from the EAL command line
+-------------------------------------------------
+
+The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a
+``--vdev`` parameter to the EAL when starting the application. The device name
+must start with the *net_failsafe* prefix, followed by numbers or letters. This
+name must be unique for each device. Each fail-safe instance must have at least one
+sub-device, up to ``RTE_MAX_ETHPORTS-1``.
+
+A sub-device can be any legal DPDK device, including possibly another fail-safe
+instance.
+
+Fail-safe command line parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **dev(<iface>)** parameter
+
+ This parameter allows the user to define a sub-device. The ``<iface>`` part of
+ this parameter must be a valid device definition. It could be the argument
+ provided to a ``-w`` PCI device specification or the argument that would be
+ given to a ``--vdev`` parameter (including a fail-safe).
+ Enclosing the device definition within parenthesis here allows using
+ additional sub-device parameters if need be. They will be passed on to the
+ sub-device.
+
+- **mac** parameter [MAC address]
+
+ This parameter allows the user to set a default MAC address to the fail-safe
+ and all of its sub-devices.
+ If no default mac address is provided, the fail-safe PMD will read the MAC
+ address of the first of its sub-device to be successfully probed and use it as
+ its default MAC address, trying to set it to all of its other sub-devices.
+ If no sub-device was successfully probed at initialization, then a random MAC
+ address is generated, that will be subsequently applied to all sub-device once
+ they are probed.
+
+Usage example
+~~~~~~~~~~~~~
+
+This section shows some example of using **testpmd** with a fail-safe PMD.
+
+#. Request huge pages:
+
+ .. code-block:: console
+
+ echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+#. Start testpmd
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
+ -i
+
+Using the Fail-safe PMD from an application
+-------------------------------------------
+
+This driver strives to be as seamless as possible to existing applications, in
+order to propose the hotplug functionality in the easiest way possible.
+
+Care must be taken, however, to respect the **ether** API concerning device
+access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
+over ethernet devices, instead of directly accessing them or by writing one's
+own device iterator.
diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
new file mode 100644
index 0000000..3c52823
--- /dev/null
+++ b/doc/guides/nics/features/failsafe.ini
@@ -0,0 +1,24 @@
+;
+; Supported features of the 'fail-safe' poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status = Y
+Queue start/stop = Y
+MTU update = Y
+Jumbo frame = Y
+Promiscuous mode = Y
+Allmulticast mode = Y
+Unicast MAC filter = Y
+Multicast MAC filter = Y
+VLAN filter = Y
+Packet type parsing = Y
+Basic stats = Y
+Stats per queue = Y
+ARMv7 = Y
+ARMv8 = Y
+Power8 = Y
+x86-32 = Y
+x86-64 = Y
+Usage doc = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 240d082..17eaaf4 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -64,6 +64,7 @@ Network Interface Controller Drivers
vhost
vmxnet3
pcap_ring
+ fail_safe

**Figures**

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 35ed813..d33c959 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -59,6 +59,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
DEPDIRS-ena = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
DEPDIRS-enic = $(core-libs) librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
+DEPDIRS-failsafe = $(core-libs)
DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
DEPDIRS-fm10k = $(core-libs) librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
new file mode 100644
index 0000000..c759035
--- /dev/null
+++ b/drivers/net/failsafe/Makefile
@@ -0,0 +1,76 @@
+# BSD LICENSE
+#
+# Copyright 2017 6WIND S.A.
+# Copyright 2017 Mellanox.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# Library name
+LIB = librte_pmd_failsafe.a
+
+EXPORT_MAP := rte_pmd_failsafe_version.map
+
+LIBABIVER := 1
+
+# Sources are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+
+# No exported include files
+
+# This lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_kvargs
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += lib/librte_mbuf
+
+ifneq ($(DEBUG),)
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG := y
+endif
+
+# Basic CFLAGS:
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -I.
+CFLAGS += -D_DEFAULT_SOURCE
+CFLAGS += -D_XOPEN_SOURCE=700
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-strict-prototypes
+CFLAGS += -pedantic -DPEDANTIC
+
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_FAILSAFE_DEBUG),y)
+CFLAGS += -g -UNDEBUG
+else
+CFLAGS += -O3
+CFLAGS += -DNDEBUG
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
new file mode 100644
index 0000000..7cf33e8
--- /dev/null
+++ b/drivers/net/failsafe/failsafe.c
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_devargs.h>
+#include <rte_kvargs.h>
+#include <rte_vdev.h>
+
+#include "failsafe_private.h"
+
+const char pmd_failsafe_driver_name[] = FAILSAFE_DRIVER_NAME;
+static const struct rte_eth_link eth_link = {
+ .link_speed = ETH_SPEED_NUM_10G,
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_status = ETH_LINK_UP,
+ .link_autoneg = ETH_LINK_SPEED_AUTONEG,
+};
+
+static int
+fs_sub_device_create(struct rte_eth_dev *dev,
+ const char *params)
+{
+ uint8_t nb_subs;
+ int ret;
+
+ ret = failsafe_args_count_subdevice(dev, params);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->subs_tail > FAILSAFE_MAX_ETHPORTS) {
+ ERROR("Cannot allocate more than %d ports",
+ FAILSAFE_MAX_ETHPORTS);
+ return -ENOSPC;
+ }
+ nb_subs = PRIV(dev)->subs_tail;
+ PRIV(dev)->subs = rte_zmalloc(NULL,
+ sizeof(struct sub_device) * nb_subs,
+ RTE_CACHE_LINE_SIZE);
+ if (PRIV(dev)->subs == NULL) {
+ ERROR("Could not allocate sub_devices");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+fs_sub_device_free(struct rte_eth_dev *dev)
+{
+ rte_free(PRIV(dev)->subs);
+}
+
+static int
+fs_eth_dev_create(struct rte_vdev_device *vdev)
+{
+ struct rte_eth_dev *dev;
+ struct ether_addr *mac;
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ const char *params;
+ unsigned int socket_id;
+ uint8_t i;
+ int ret;
+
+ dev = NULL;
+ priv = NULL;
+ params = rte_vdev_device_args(vdev);
+ socket_id = rte_socket_id();
+ INFO("Creating fail-safe device on NUMA socket %u",
+ socket_id);
+ dev = rte_eth_vdev_allocate(vdev, sizeof(*priv));
+ if (dev == NULL) {
+ ERROR("Unable to allocate rte_eth_dev");
+ return -1;
+ }
+ priv = dev->data->dev_private;
+ PRIV(dev)->dev = dev;
+ dev->dev_ops = &failsafe_ops;
+ TAILQ_INIT(&dev->link_intr_cbs);
+ dev->data->dev_flags = 0x0;
+ dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
+ dev->data->dev_link = eth_link;
+ PRIV(dev)->nb_mac_addr = 1;
+ dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+ dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+ if (params == NULL) {
+ ERROR("This PMD requires sub-devices, none provided");
+ goto free_dev;
+ }
+ ret = fs_sub_device_create(dev, params);
+ if (ret) {
+ ERROR("Could not allocate sub_devices");
+ goto free_dev;
+ }
+ ret = failsafe_args_parse(dev, params);
+ if (ret)
+ goto free_subs;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ goto free_args;
+ mac = &dev->data->mac_addrs[0];
+ if (mac_from_arg) {
+ /*
+ * If MAC address was provided as a parameter,
+ * apply to all probed slaves.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ mac);
+ if (ret) {
+ ERROR("Failed to set default MAC address");
+ goto free_args;
+ }
+ }
+ } else {
+ /*
+ * Use the ether_addr from first probed
+ * device, either preferred or fallback.
+ */
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state >= DEV_PROBED) {
+ ether_addr_copy(&ETH(sdev)->data->mac_addrs[0],
+ mac);
+ break;
+ }
+ /*
+ * If no device has been probed and no ether_addr
+ * has been provided on the command line, use a random
+ * valid one.
+ * It will be applied during future slave state syncs to
+ * probed slaves.
+ */
+ if (i == priv->subs_tail)
+ eth_random_addr(&mac->addr_bytes[0]);
+ }
+ INFO("MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+ mac->addr_bytes[0], mac->addr_bytes[1],
+ mac->addr_bytes[2], mac->addr_bytes[3],
+ mac->addr_bytes[4], mac->addr_bytes[5]);
+ return 0;
+free_args:
+ failsafe_args_free(dev);
+free_subs:
+ fs_sub_device_free(dev);
+free_dev:
+ rte_eth_dev_release_port(dev);
+ return -1;
+}
+
+static int
+fs_rte_eth_free(const char *name)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ dev = rte_eth_dev_allocated(name);
+ if (dev == NULL)
+ return -ENODEV;
+ ret = failsafe_eal_uninit(dev);
+ if (ret)
+ ERROR("Error while uninitializing sub-EAL");
+ failsafe_args_free(dev);
+ fs_sub_device_free(dev);
+ rte_free(PRIV(dev));
+ rte_eth_dev_release_port(dev);
+ return ret;
+}
+
+static int
+rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (vdev == NULL)
+ return -EINVAL;
+ INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
+ name);
+ return fs_eth_dev_create(vdev);
+}
+
+static int
+rte_pmd_failsafe_remove(struct rte_vdev_device *vdev)
+{
+ const char *name;
+
+ name = rte_vdev_device_name(vdev);
+ if (name == NULL)
+ return -EINVAL;
+ INFO("Uninitializing " FAILSAFE_DRIVER_NAME " for %s", name);
+ return fs_rte_eth_free(name);
+}
+
+static struct rte_vdev_driver failsafe_drv = {
+ .probe = rte_pmd_failsafe_probe,
+ .remove = rte_pmd_failsafe_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_failsafe, failsafe_drv);
+RTE_PMD_REGISTER_ALIAS(net_failsafe, eth_failsafe);
+RTE_PMD_REGISTER_PARAM_STRING(net_failsafe, PMD_FAILSAFE_PARAM_STRING);
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
new file mode 100644
index 0000000..f07d26e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -0,0 +1,331 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <errno.h>
+
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "failsafe_private.h"
+
+#define DEVARGS_MAXLEN 4096
+
+/* Callback used when a new device is found in devargs */
+typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
+ uint8_t head);
+
+int mac_from_arg;
+
+const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_MAC_KVARG,
+ NULL,
+};
+
+/*
+ * input: text.
+ * output: 0: if text[0] != '(',
+ * 0: if there are no corresponding ')'
+ * n: distance to corresponding ')' otherwise
+ */
+static size_t
+closing_paren(const char *text)
+{
+ int nb_open = 0;
+ size_t i = 0;
+
+ while (text[i] != '\0') {
+ if (text[i] == '(')
+ nb_open++;
+ if (text[i] == ')')
+ nb_open--;
+ if (nb_open == 0)
+ return i;
+ i++;
+ }
+ return 0;
+}
+
+static int
+fs_parse_device(struct sub_device *sdev, char *args)
+{
+ struct rte_devargs *d;
+ int ret;
+
+ d = &sdev->devargs;
+ DEBUG("%s", args);
+ ret = rte_eal_devargs_parse(args, d);
+ if (ret) {
+ DEBUG("devargs parsing failed with code %d", ret);
+ return ret;
+ }
+ sdev->bus = d->bus;
+ sdev->state = DEV_PARSED;
+ return 0;
+}
+
+static int
+fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
+ uint8_t head)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ char *args = NULL;
+ size_t a, b;
+ int ret;
+
+ priv = PRIV(dev);
+ a = 0;
+ b = 0;
+ ret = 0;
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ a = b;
+ b += closing_paren(&param[b]);
+ if (a == b) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ a += 1;
+ args = strndup(&param[a], b - a);
+ if (args == NULL) {
+ ERROR("Not enough memory for parameter parsing");
+ return -ENOMEM;
+ }
+ sdev = &priv->subs[head];
+ if (strncmp(param, "dev", 3) == 0) {
+ ret = fs_parse_device(sdev, args);
+ if (ret)
+ goto free_args;
+ } else {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+free_args:
+ free(args);
+ return ret;
+}
+
+static int
+fs_parse_sub_devices(parse_cb *cb,
+ struct rte_eth_dev *dev, const char *params)
+{
+ size_t a, b;
+ uint8_t head;
+ int ret;
+
+ a = 0;
+ head = 0;
+ ret = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',') {
+ a = b + 1;
+ continue;
+ }
+ if (params[b] == '(') {
+ size_t start = b;
+
+ b += closing_paren(&params[b]);
+ if (b == start) {
+ ERROR("Dangling parenthesis");
+ return -EINVAL;
+ }
+ ret = (*cb)(dev, &params[a], head);
+ if (ret)
+ return ret;
+ head += 1;
+ b += 1;
+ if (params[b] == '\0')
+ return 0;
+ }
+ a = b + 1;
+ }
+ return 0;
+}
+
+static int
+fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
+{
+ char buffer[DEVARGS_MAXLEN] = {0};
+ size_t a, b;
+ int i;
+
+ a = 0;
+ i = 0;
+ while (params[a] != '\0') {
+ b = a;
+ while (params[b] != '(' &&
+ params[b] != ',' &&
+ params[b] != '\0')
+ b++;
+ if (b == a) {
+ ERROR("Invalid parameter");
+ return -EINVAL;
+ }
+ if (params[b] == ',' || params[b] == '\0')
+ i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
+ if (params[b] == '(') {
+ size_t start = b;
+ b += closing_paren(&params[b]);
+ if (b == start)
+ return -EINVAL;
+ b += 1;
+ if (params[b] == '\0')
+ goto out;
+ }
+ a = b + 1;
+ }
+out:
+ snprintf(params, DEVARGS_MAXLEN, "%s", buffer);
+ return 0;
+}
+
+static int
+fs_get_mac_addr_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ struct ether_addr *ea = out;
+ int ret;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &ea->addr_bytes[0], &ea->addr_bytes[1],
+ &ea->addr_bytes[2], &ea->addr_bytes[3],
+ &ea->addr_bytes[4], &ea->addr_bytes[5]);
+ return ret != ETHER_ADDR_LEN;
+}
+
+int
+failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
+{
+ struct fs_priv *priv;
+ char mut_params[DEVARGS_MAXLEN] = "";
+ struct rte_kvargs *kvlist = NULL;
+ unsigned int arg_count;
+ size_t n;
+ int ret;
+
+ if (dev == NULL || params == NULL)
+ return -EINVAL;
+ priv = PRIV(dev);
+ ret = 0;
+ priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
+ /* default parameters */
+ mac_from_arg = 0;
+ n = snprintf(mut_params, sizeof(mut_params), "%s", params);
+ if (n >= sizeof(mut_params)) {
+ ERROR("Parameter string too long (>=%zu)",
+ sizeof(mut_params));
+ return -ENOMEM;
+ }
+ ret = fs_parse_sub_devices(fs_parse_device_param,
+ dev, params);
+ if (ret < 0)
+ return ret;
+ ret = fs_remove_sub_devices_definition(mut_params);
+ if (ret < 0)
+ return ret;
+ if (strnlen(mut_params, sizeof(mut_params)) > 0) {
+ kvlist = rte_kvargs_parse(mut_params,
+ pmd_failsafe_init_parameters);
+ if (kvlist == NULL) {
+ ERROR("Error parsing parameters, usage:\n"
+ PMD_FAILSAFE_PARAM_STRING);
+ return -1;
+ }
+ /* MAC addr */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_MAC_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_MAC_KVARG,
+ &fs_get_mac_addr_arg,
+ &dev->data->mac_addrs[0]);
+ if (ret < 0)
+ goto free_kvlist;
+ mac_from_arg = 1;
+ }
+ }
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+void
+failsafe_args_free(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ free(sdev->devargs.args);
+ sdev->devargs.args = NULL;
+ }
+}
+
+static int
+fs_count_device(struct rte_eth_dev *dev, const char *param,
+ uint8_t head __rte_unused)
+{
+ size_t b = 0;
+
+ while (param[b] != '(' &&
+ param[b] != '\0')
+ b++;
+ if (strncmp(param, "dev", b) &&
+ strncmp(param, "exec", b)) {
+ ERROR("Unrecognized device type: %.*s", (int)b, param);
+ return -EINVAL;
+ }
+ PRIV(dev)->subs_tail += 1;
+ return 0;
+}
+
+int
+failsafe_args_count_subdevice(struct rte_eth_dev *dev,
+ const char *params)
+{
+ return fs_parse_sub_devices(fs_count_device,
+ dev, params);
+}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
new file mode 100644
index 0000000..6c3a811
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -0,0 +1,154 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev *
+fs_find_ethdev(const struct rte_device *dev)
+{
+ struct rte_eth_dev *eth_dev;
+ uint8_t i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ eth_dev = &rte_eth_devices[i];
+ if (eth_dev->device == dev)
+ return eth_dev;
+ }
+ return NULL;
+}
+
+static int
+fs_bus_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_device *rdev;
+ struct rte_devargs *da;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PARSED)
+ continue;
+ da = &sdev->devargs;
+ rdev = rte_eal_hotplug_add(da->bus->name,
+ da->name,
+ da->args);
+ ret = rdev ? 0 : -rte_errno;
+ if (ret) {
+ ERROR("sub_device %d probe failed %s%s%s", i,
+ errno ? "(" : "",
+ errno ? strerror(rte_errno) : "",
+ errno ? ")" : "");
+ continue;
+ }
+ ETH(sdev) = fs_find_ethdev(rdev);
+ if (ETH(sdev) == NULL) {
+ ERROR("sub_device %d init went wrong", i);
+ return -ENODEV;
+ }
+ sdev->dev = ETH(sdev)->device;
+ ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+ sdev->state = DEV_PROBED;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_init(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ ret = fs_bus_init(dev);
+ if (ret)
+ return ret;
+ /*
+ * We only update TX_SUBDEV if we are not started.
+ * If a sub_device is emitting, we will switch the TX_SUBDEV to the
+ * preferred port only upon starting it, so that the switch is smoother.
+ */
+ if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
+ TX_SUBDEV(dev) == NULL) {
+ /* Using first probed device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fs_bus_uninit(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev = NULL;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ ret = rte_eal_hotplug_remove(sdev->bus->name,
+ sdev->dev->name);
+ if (ret) {
+ ERROR("Failed to remove requested device %s",
+ sdev->dev->name);
+ continue;
+ }
+ sdev->state = DEV_PROBED - 1;
+ }
+ return 0;
+}
+
+int
+failsafe_eal_uninit(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ ret = fs_bus_uninit(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
new file mode 100644
index 0000000..693162e
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -0,0 +1,663 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "failsafe_private.h"
+
+static struct rte_eth_dev_info default_infos = {
+ .driver_name = pmd_failsafe_driver_name,
+ /* Max possible number of elements */
+ .max_rx_pktlen = UINT32_MAX,
+ .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
+ .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
+ .max_hash_mac_addrs = UINT32_MAX,
+ .max_vfs = UINT16_MAX,
+ .max_vmdq_pools = UINT16_MAX,
+ .rx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ .tx_desc_lim = {
+ .nb_max = UINT16_MAX,
+ .nb_min = 0,
+ .nb_align = 1,
+ .nb_seg_max = UINT16_MAX,
+ .nb_mtu_seg_max = UINT16_MAX,
+ },
+ /* Set of understood capabilities */
+ .rx_offload_capa = 0x0,
+ .tx_offload_capa = 0x0,
+ .flow_type_rss_offloads = 0x0,
+};
+
+static int
+fs_dev_configure(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
+ DEBUG("Configuring sub-device %d", i);
+ ret = rte_eth_dev_configure(PORT_ID(sdev),
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &dev->data->dev_conf);
+ if (ret) {
+ ERROR("Could not configure sub_device %d", i);
+ return ret;
+ }
+ sdev->state = DEV_ACTIVE;
+ }
+ return 0;
+}
+
+static int
+fs_dev_start(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_ACTIVE)
+ continue;
+ DEBUG("Starting sub_device %d", i);
+ ret = rte_eth_dev_start(PORT_ID(sdev));
+ if (ret)
+ return ret;
+ sdev->state = DEV_STARTED;
+ }
+ if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else {
+ if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
+ TX_SUBDEV(dev) == NULL) {
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ DEBUG("Switching tx_dev to sub_device %d", i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+fs_dev_stop(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_STARTED - 1;
+ }
+}
+
+static int
+fs_dev_set_link_up(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_dev_set_link_down(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void fs_dev_free_queues(struct rte_eth_dev *dev);
+static void
+fs_dev_close(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Closing sub_device %d", i);
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE - 1;
+ }
+ fs_dev_free_queues(dev);
+}
+
+static void
+fs_rx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct rxq *rxq;
+
+ if (queue == NULL)
+ return;
+ rxq = queue;
+ dev = rxq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, rx_queue_release)
+ (ETH(sdev)->data->rx_queues[rxq->qid]);
+ dev->data->rx_queues[rxq->qid] = NULL;
+ rte_free(rxq);
+}
+
+static int
+fs_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mb_pool)
+{
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ uint8_t i;
+ int ret;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ if (rxq != NULL) {
+ fs_rx_queue_release(rxq);
+ dev->data->rx_queues[rx_queue_id] = NULL;
+ }
+ rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE);
+ if (rxq == NULL)
+ return -ENOMEM;
+ rxq->qid = rx_queue_id;
+ rxq->socket_id = socket_id;
+ rxq->info.mp = mb_pool;
+ rxq->info.conf = *rx_conf;
+ rxq->info.nb_desc = nb_rx_desc;
+ rxq->priv = PRIV(dev);
+ dev->data->rx_queues[rx_queue_id] = rxq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
+ rx_queue_id,
+ nb_rx_desc, socket_id,
+ rx_conf, mb_pool);
+ if (ret) {
+ ERROR("RX queue setup failed for sub_device %d", i);
+ goto free_rxq;
+ }
+ }
+ return 0;
+free_rxq:
+ fs_rx_queue_release(rxq);
+ return ret;
+}
+
+static void
+fs_tx_queue_release(void *queue)
+{
+ struct rte_eth_dev *dev;
+ struct sub_device *sdev;
+ uint8_t i;
+ struct txq *txq;
+
+ if (queue == NULL)
+ return;
+ txq = queue;
+ dev = txq->priv->dev;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ SUBOPS(sdev, tx_queue_release)
+ (ETH(sdev)->data->tx_queues[txq->qid]);
+ dev->data->tx_queues[txq->qid] = NULL;
+ rte_free(txq);
+}
+
+static int
+fs_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ uint8_t i;
+ int ret;
+
+ txq = dev->data->tx_queues[tx_queue_id];
+ if (txq != NULL) {
+ fs_tx_queue_release(txq);
+ dev->data->tx_queues[tx_queue_id] = NULL;
+ }
+ txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ RTE_CACHE_LINE_SIZE);
+ if (txq == NULL)
+ return -ENOMEM;
+ txq->qid = tx_queue_id;
+ txq->socket_id = socket_id;
+ txq->info.conf = *tx_conf;
+ txq->info.nb_desc = nb_tx_desc;
+ txq->priv = PRIV(dev);
+ dev->data->tx_queues[tx_queue_id] = txq;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
+ tx_queue_id,
+ nb_tx_desc, socket_id,
+ tx_conf);
+ if (ret) {
+ ERROR("TX queue setup failed for sub_device %d", i);
+ goto free_txq;
+ }
+ }
+ return 0;
+free_txq:
+ fs_tx_queue_release(txq);
+ return ret;
+}
+
+static void
+fs_dev_free_queues(struct rte_eth_dev *dev)
+{
+ uint16_t i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ fs_rx_queue_release(dev->data->rx_queues[i]);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ fs_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
+
+static void
+fs_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+}
+
+static void
+fs_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+}
+
+static void
+fs_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+}
+
+static int
+fs_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling link_update on sub_device %d", i);
+ ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
+ if (ret && ret != -1) {
+ ERROR("Link update failed for sub_device %d with error %d",
+ i, ret);
+ return ret;
+ }
+ }
+ if (TX_SUBDEV(dev)) {
+ struct rte_eth_link *l1;
+ struct rte_eth_link *l2;
+
+ l1 = &dev->data->dev_link;
+ l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
+ if (memcmp(l1, l2, sizeof(*l1))) {
+ *l1 = *l2;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static void
+fs_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ if (TX_SUBDEV(dev) == NULL)
+ return;
+ rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+}
+
+static void
+fs_stats_reset(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_stats_reset(PORT_ID(sdev));
+}
+
+/**
+ * Fail-safe dev_infos_get rules:
+ *
+ * No sub_device:
+ * Numerables:
+ * Use the maximum possible values for any field, so as not
+ * to impede any further configuration effort.
+ * Capabilities:
+ * Limits capabilities to those that are understood by the
+ * fail-safe PMD. This understanding stems from the fail-safe
+ * being capable of verifying that the related capability is
+ * expressed within the device configuration (struct rte_eth_conf).
+ *
+ * At least one probed sub_device:
+ * Numerables:
+ * Uses values from the active probed sub_device
+ * The rationale here is that if any sub_device is less capable
+ * (for example concerning the number of queues) than the active
+ * sub_device, then its subsequent configuration will fail.
+ * It is impossible to foresee this failure when the failing sub_device
+ * is supposed to be plugged-in later on, so the configuration process
+ * is the single point of failure and error reporting.
+ * Capabilities:
+ * Uses a logical AND of RX capabilities among
+ * all sub_devices and the default capabilities.
+ * Uses a logical AND of TX capabilities among
+ * the active probed sub_device and the default capabilities.
+ *
+ */
+static void
+fs_dev_infos_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *infos)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL) {
+ DEBUG("No probed device, using default infos");
+ rte_memcpy(&PRIV(dev)->infos, &default_infos,
+ sizeof(default_infos));
+ } else {
+ uint32_t rx_offload_capa;
+
+ rx_offload_capa = default_infos.rx_offload_capa;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
+ rte_eth_dev_info_get(PORT_ID(sdev),
+ &PRIV(dev)->infos);
+ rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
+ }
+ sdev = TX_SUBDEV(dev);
+ rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
+ PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
+ PRIV(dev)->infos.tx_offload_capa &=
+ default_infos.tx_offload_capa;
+ PRIV(dev)->infos.flow_type_rss_offloads &=
+ default_infos.flow_type_rss_offloads;
+ }
+ rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
+}
+
+static const uint32_t *
+fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ struct rte_eth_dev *edev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return NULL;
+ edev = ETH(sdev);
+ /* ENOTSUP: counts as no supported ptypes */
+ if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
+ return NULL;
+ /*
+ * The API does not permit to do a clean AND of all ptypes,
+ * It is also incomplete by design and we do not really care
+ * to have a best possible value in this context.
+ * We just return the ptypes of the device of highest
+ * priority, usually the PREFERRED device.
+ */
+ return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+}
+
+static int
+fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
+ ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int
+fs_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev == NULL)
+ return 0;
+ if (SUBOPS(sdev, flow_ctrl_get) == NULL)
+ return -ENOTSUP;
+ return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+}
+
+static int
+fs_flow_ctrl_set(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
+ ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void
+fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* No check: already done within the rte_eth_dev_mac_addr_remove
+ * call for the fail-safe device.
+ */
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
+ &dev->data->mac_addrs[index]);
+ PRIV(dev)->mac_addr_pool[index] = 0;
+}
+
+static int
+fs_mac_addr_add(struct rte_eth_dev *dev,
+ struct ether_addr *mac_addr,
+ uint32_t index,
+ uint32_t vmdq)
+{
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ assert(index < FAILSAFE_MAX_ETHADDR);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
+ PRIu8 " with error %d", i, ret);
+ return ret;
+ }
+ }
+ if (index >= PRIV(dev)->nb_mac_addr) {
+ DEBUG("Growing mac_addrs array");
+ PRIV(dev)->nb_mac_addr = index;
+ }
+ PRIV(dev)->mac_addr_pool[index] = vmdq;
+ return 0;
+}
+
+static void
+fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+}
+
+const struct eth_dev_ops failsafe_ops = {
+ .dev_configure = fs_dev_configure,
+ .dev_start = fs_dev_start,
+ .dev_stop = fs_dev_stop,
+ .dev_set_link_down = fs_dev_set_link_down,
+ .dev_set_link_up = fs_dev_set_link_up,
+ .dev_close = fs_dev_close,
+ .promiscuous_enable = fs_promiscuous_enable,
+ .promiscuous_disable = fs_promiscuous_disable,
+ .allmulticast_enable = fs_allmulticast_enable,
+ .allmulticast_disable = fs_allmulticast_disable,
+ .link_update = fs_link_update,
+ .stats_get = fs_stats_get,
+ .stats_reset = fs_stats_reset,
+ .dev_infos_get = fs_dev_infos_get,
+ .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
+ .mtu_set = fs_mtu_set,
+ .vlan_filter_set = fs_vlan_filter_set,
+ .rx_queue_setup = fs_rx_queue_setup,
+ .tx_queue_setup = fs_tx_queue_setup,
+ .rx_queue_release = fs_rx_queue_release,
+ .tx_queue_release = fs_tx_queue_release,
+ .flow_ctrl_get = fs_flow_ctrl_get,
+ .flow_ctrl_set = fs_flow_ctrl_set,
+ .mac_addr_remove = fs_mac_addr_remove,
+ .mac_addr_add = fs_mac_addr_add,
+ .mac_addr_set = fs_mac_addr_set,
+};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
new file mode 100644
index 0000000..e7a7592
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
+#define _RTE_ETH_FAILSAFE_PRIVATE_H_
+
+#include <rte_dev.h>
+#include <rte_ethdev.h>
+#include <rte_devargs.h>
+
+#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
+
+#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PARAM_STRING \
+ "dev(<ifc>)," \
+ "mac=mac_addr" \
+ ""
+
+#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+
+#define FAILSAFE_MAX_ETHPORTS 2
+#define FAILSAFE_MAX_ETHADDR 128
+
+/* TYPES */
+
+struct rxq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ /* id of last sub_device polled */
+ uint8_t last_polled;
+ unsigned int socket_id;
+ struct rte_eth_rxq_info info;
+};
+
+struct txq {
+ struct fs_priv *priv;
+ uint16_t qid;
+ unsigned int socket_id;
+ struct rte_eth_txq_info info;
+};
+
+enum dev_state {
+ DEV_UNDEFINED = 0,
+ DEV_PARSED,
+ DEV_PROBED,
+ DEV_ACTIVE,
+ DEV_STARTED,
+};
+
+struct sub_device {
+ /* Exhaustive DPDK device description */
+ struct rte_devargs devargs;
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_eth_dev *edev;
+ /* Device state machine */
+ enum dev_state state;
+};
+
+struct fs_priv {
+ struct rte_eth_dev *dev;
+ /*
+ * Set of sub_devices.
+ * subs[0] is the preferred device
+ * any other is just another slave
+ */
+ struct sub_device *subs;
+ uint8_t subs_head; /* if head == tail, no subs */
+ uint8_t subs_tail; /* first invalid */
+ uint8_t subs_tx; /* current emitting device */
+ uint8_t current_probed;
+ /* current number of mac_addr slots allocated. */
+ uint32_t nb_mac_addr;
+ struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
+ uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+ /* current capabilities */
+ struct rte_eth_dev_info infos;
+};
+
+/* RX / TX */
+
+uint16_t failsafe_rx_burst(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+/* ARGS */
+
+int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
+void failsafe_args_free(struct rte_eth_dev *dev);
+int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+
+/* EAL */
+
+int failsafe_eal_init(struct rte_eth_dev *dev);
+int failsafe_eal_uninit(struct rte_eth_dev *dev);
+
+/* GLOBALS */
+
+extern const char pmd_failsafe_driver_name[];
+extern const struct eth_dev_ops failsafe_ops;
+extern int mac_from_arg;
+
+/* HELPERS */
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PRIV(dev) \
+ ((struct fs_priv *)(dev)->data->dev_private)
+
+/* sdev: (struct sub_device *) */
+#define ETH(sdev) \
+ ((sdev)->edev)
+
+/* sdev: (struct sub_device *) */
+#define PORT_ID(sdev) \
+ (ETH(sdev)->data->port_id)
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_ST(s, i, dev, state) \
+ for (i = fs_find_next((dev), 0, state); \
+ i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]); \
+ i = fs_find_next((dev), i + 1, state))
+
+/**
+ * Iterator construct over fail-safe sub-devices:
+ * s: (struct sub_device *), iterator
+ * i: (uint8_t), increment
+ * dev: (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV(s, i, dev) \
+ FOREACH_SUBDEV_ST(s, i, dev, DEV_UNDEFINED)
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define PREFERRED_SUBDEV(dev) \
+ (&PRIV(dev)->subs[0])
+
+/* dev: (struct rte_eth_dev *) fail-safe device */
+#define TX_SUBDEV(dev) \
+ (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
+ : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
+ : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
+
+/**
+ * s: (struct sub_device *)
+ * ops: (struct eth_dev_ops) member
+ */
+#define SUBOPS(s, ops) \
+ (ETH(s)->dev_ops->ops)
+
+#ifndef NDEBUG
+#include <stdio.h>
+#define DEBUG__(m, ...) \
+ (fprintf(stderr, "%s:%d: %s(): " m "%c", \
+ __FILE__, __LINE__, __func__, __VA_ARGS__), \
+ (void)0)
+#define DEBUG_(...) \
+ (errno = ((int []){ \
+ *(volatile int *)&errno, \
+ (DEBUG__(__VA_ARGS__), 0) \
+ })[0])
+#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
+#define INFO(...) DEBUG(__VA_ARGS__)
+#define WARN(...) DEBUG(__VA_ARGS__)
+#define ERROR(...) DEBUG(__VA_ARGS__)
+#else
+#define DEBUG(...) ((void)0)
+#define LOG__(level, m, ...) \
+ RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__)
+#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
+#define INFO(...) LOG_(INFO, __VA_ARGS__)
+#define WARN(...) LOG_(WARNING, "WARNING: " __VA_ARGS__)
+#define ERROR(...) LOG_(ERR, "ERROR: " __VA_ARGS__)
+#endif
+
+/* inlined functions */
+
+static inline uint8_t
+fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
+ enum dev_state min_state)
+{
+ while (sid < PRIV(dev)->subs_tail) {
+ if (PRIV(dev)->subs[sid].state >= min_state)
+ break;
+ sid++;
+ }
+ if (sid >= PRIV(dev)->subs_tail)
+ return PRIV(dev)->subs_tail;
+ return sid;
+}
+
+#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
new file mode 100644
index 0000000..a45b4e5
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "failsafe_private.h"
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_rx_burst(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ if (unlikely(ETH(sdev) == NULL))
+ continue;
+ if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
+ continue;
+ if (unlikely(sdev->state != DEV_STARTED))
+ continue;
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
+/*
+ * TODO: write fast version,
+ * without additional checks, to be activated once
+ * everything has been verified to comply.
+ */
+uint16_t
+failsafe_tx_burst(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ if (unlikely(sdev == NULL))
+ return 0;
+ if (unlikely(ETH(sdev) == NULL))
+ return 0;
+ if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ return 0;
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
diff --git a/drivers/net/failsafe/rte_pmd_failsafe_version.map b/drivers/net/failsafe/rte_pmd_failsafe_version.map
new file mode 100644
index 0000000..b6d2840
--- /dev/null
+++ b/drivers/net/failsafe/rte_pmd_failsafe_version.map
@@ -0,0 +1,4 @@
+DPDK_17.08 {
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 7d71a49..ffc089c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,6 +120,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += -lrte_pmd_e1000
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += -lrte_pmd_failsafe
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:27 UTC
Permalink
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 19 +++
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 71 ++++++++++
drivers/net/failsafe/failsafe_args.c | 32 +++++
drivers/net/failsafe/failsafe_eal.c | 30 +----
drivers/net/failsafe/failsafe_ether.c | 228 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 25 ++--
drivers/net/failsafe/failsafe_private.h | 60 ++++++++-
8 files changed, 423 insertions(+), 43 deletions(-)
create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..c04891a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
address is generated, that will be subsequently applied to all sub-device once
they are probed.

+- **hotplug_poll** parameter [UINT64] (default **2000**)
+
+ This parameter allows the user to configure the amount of time in milliseconds
+ between two slave upkeep round.
+
Usage example
~~~~~~~~~~~~~

@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API concerning device
access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
over ethernet devices, instead of directly accessing them or by writing one's
own device iterator.
+
+Plug-in feature
+---------------
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index c759035..e27bfc0 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -44,6 +44,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7cf33e8..888f07b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -80,6 +80,72 @@ fs_sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
}

+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ if (PRIV(dev)->pending_alarm) {
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ ret = failsafe_eth_dev_state_sync(dev);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
static int
fs_eth_dev_create(struct rte_vdev_device *vdev)
{
@@ -128,6 +194,11 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
ret = failsafe_eal_init(dev);
if (ret)
goto free_args;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto free_args;
+ }
mac = &dev->data->mac_addrs[0];
if (mac_from_arg) {
/*
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index f07d26e..8f334aa 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -45,9 +45,11 @@
typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
uint8_t head);

+uint64_t hotplug_poll;
int mac_from_arg;

const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
PMD_FAILSAFE_MAC_KVARG,
NULL,
};
@@ -221,6 +223,24 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
}

static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
fs_get_mac_addr_arg(const char *key __rte_unused,
const char *value, void *out)
{
@@ -252,6 +272,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
ret = 0;
priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
/* default parameters */
+ hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
mac_from_arg = 0;
n = snprintf(mut_params, sizeof(mut_params), "%s", params);
if (n >= sizeof(mut_params)) {
@@ -274,6 +295,16 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
PMD_FAILSAFE_PARAM_STRING);
return -1;
}
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
+ &fs_get_u64_arg, &hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
/* MAC addr */
arg_count = rte_kvargs_count(kvlist,
PMD_FAILSAFE_MAC_KVARG);
@@ -287,6 +318,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
mac_from_arg = 1;
}
}
+ PRIV(dev)->state = DEV_PARSED;
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 6c3a811..16871df 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -88,37 +88,14 @@ fs_bus_init(struct rte_eth_dev *dev)
int
failsafe_eal_init(struct rte_eth_dev *dev)
{
- struct sub_device *sdev;
- uint8_t i;
int ret;

ret = fs_bus_init(dev);
if (ret)
return ret;
- /*
- * We only update TX_SUBDEV if we are not started.
- * If a sub_device is emitting, we will switch the TX_SUBDEV to the
- * preferred port only upon starting it, so that the switch is smoother.
- */
- if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
- TX_SUBDEV(dev) == NULL) {
- /* Using first probed device */
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
- DEBUG("Switching tx_dev to sub_device %d",
- i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev);
return 0;
}

@@ -150,5 +127,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
ret = fs_bus_uninit(dev);
if (ret)
return ret;
+ PRIV(dev)->state = DEV_PROBED - 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 0000000..7910952
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state == DEV_PROBED)
+ inactive |= UINT32_C(1) << i;
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ return ret;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ /* TODO: disable device */
+ return ret;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 693162e..4044473 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -89,6 +89,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
sdev->state = DEV_ACTIVE;
}
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
return 0;
}

@@ -108,21 +110,9 @@ fs_dev_start(struct rte_eth_dev *dev)
return ret;
sdev->state = DEV_STARTED;
}
- if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
- TX_SUBDEV(dev) == NULL) {
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
- DEBUG("Switching tx_dev to sub_device %d", i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_STARTED)
+ PRIV(dev)->state = DEV_STARTED;
+ fs_switch_dev(dev);
return 0;
}

@@ -132,6 +122,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ PRIV(dev)->state = DEV_STARTED - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_STARTED - 1;
@@ -183,6 +174,10 @@ fs_dev_close(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Closing sub_device %d", i);
rte_eth_dev_close(PORT_ID(sdev));
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index e7a7592..8fb72fe 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -41,12 +41,14 @@
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"

#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
- "mac=mac_addr" \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
""

-#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -105,8 +107,22 @@ struct fs_priv {
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ unsigned int pending_alarm:1; /* An alarm is pending */
};

+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
/* RX / TX */

uint16_t failsafe_rx_burst(void *rxq,
@@ -125,10 +141,15 @@ int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);

+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+
/* GLOBALS */

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern uint64_t hotplug_poll;
extern int mac_from_arg;

/* HELPERS */
@@ -224,4 +245,39 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+static inline void
+fs_switch_dev(struct rte_eth_dev *dev)
+{
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ if (PREFERRED_SUBDEV(dev)->state >= req_state) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (req_state == DEV_STARTED) ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
+ TX_SUBDEV(dev) == NULL) {
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ } else {
+ return;
+ }
+ rte_wmb();
+}
+
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:28 UTC
Permalink
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 20 +++++++
drivers/net/failsafe/failsafe_args.c | 99 +++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ether.c | 7 +++
drivers/net/failsafe/failsafe_private.h | 4 ++
4 files changed, 130 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index c04891a..1b6e110 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,19 @@ Fail-safe command line parameters
additional sub-device parameters if need be. They will be passed on to the
sub-device.

+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
- **mac** parameter [MAC address]

This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +139,13 @@ This section shows some example of using **testpmd** with a fail-safe PMD.
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
-i

+#. Start testpmd using a flexible device definition
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
Using the Fail-safe PMD from an application
-------------------------------------------

diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 8f334aa..c723ca3 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -30,6 +30,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include <assert.h>
#include <string.h>
#include <errno.h>

@@ -96,6 +98,75 @@ fs_parse_device(struct sub_device *sdev, char *args)
return 0;
}

+static void
+fs_sanitize_cmdline(char *args)
+{
+ size_t len;
+
+ len = strnlen(args, DEVARGS_MAXLEN);
+ args[len - 1] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ sdev->cmdline = new_str;
+ snprintf(sdev->cmdline, len, "%s", cmdline);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ old_err = errno;
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = errno;
+ ERROR("popen: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ return -ENODEV;
+ }
+ fs_sanitize_cmdline(output);
+ ret = fs_parse_device(sdev, output);
+ if (ret) {
+ ERROR("Parsing device '%s' failed", output);
+ goto ret_pclose;
+ }
+ret_pclose:
+ ret = pclose(fp);
+ if (ret) {
+ ret = errno;
+ ERROR("pclose: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ return ret;
+}
+
static int
fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -130,6 +201,14 @@ fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
ret = fs_parse_device(sdev, args);
if (ret)
goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
} else {
ERROR("Unrecognized device type: %.*s", (int)b, param);
return -EINVAL;
@@ -331,6 +410,8 @@ failsafe_args_free(struct rte_eth_dev *dev)
uint8_t i;

FOREACH_SUBDEV(sdev, i, dev) {
+ rte_free(sdev->cmdline);
+ sdev->cmdline = NULL;
free(sdev->devargs.args);
sdev->devargs.args = NULL;
}
@@ -361,3 +442,21 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
return fs_parse_sub_devices(fs_count_device,
dev, params);
}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7910952..2a1535e 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -188,6 +188,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
int ret;
uint8_t i;

+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ return ret;
+
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8fb72fe..554d7a3 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -44,6 +44,7 @@
#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
+ "exec(<shell command>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
@@ -87,6 +88,8 @@ struct sub_device {
struct rte_eth_dev *edev;
/* Device state machine */
enum dev_state state;
+ /* Some device are defined as a command line */
+ char *cmdline;
};

struct fs_priv {
@@ -135,6 +138,7 @@ uint16_t failsafe_tx_burst(void *txq,
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);

/* EAL */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:29 UTC
Permalink
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 19 +++
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 71 ++++++++++
drivers/net/failsafe/failsafe_args.c | 32 +++++
drivers/net/failsafe/failsafe_eal.c | 30 +----
drivers/net/failsafe/failsafe_ether.c | 228 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 25 ++--
drivers/net/failsafe/failsafe_private.h | 60 ++++++++-
8 files changed, 423 insertions(+), 43 deletions(-)
create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..c04891a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
address is generated, that will be subsequently applied to all sub-device once
they are probed.

+- **hotplug_poll** parameter [UINT64] (default **2000**)
+
+ This parameter allows the user to configure the amount of time in milliseconds
+ between two slave upkeep round.
+
Usage example
~~~~~~~~~~~~~

@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API concerning device
access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
over ethernet devices, instead of directly accessing them or by writing one's
own device iterator.
+
+Plug-in feature
+---------------
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index c759035..e27bfc0 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -44,6 +44,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7cf33e8..888f07b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -80,6 +80,72 @@ fs_sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
}

+static void fs_hotplug_alarm(void *arg);
+
+int
+failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+ if (PRIV(dev)->pending_alarm)
+ return 0;
+ ret = rte_eal_alarm_set(hotplug_poll * 1000,
+ fs_hotplug_alarm,
+ dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ return ret;
+ }
+ PRIV(dev)->pending_alarm = 1;
+ return 0;
+}
+
+int
+failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+
+ if (PRIV(dev)->pending_alarm) {
+ rte_errno = 0;
+ rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+ if (rte_errno) {
+ ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+ ret = -rte_errno;
+ } else {
+ PRIV(dev)->pending_alarm = 0;
+ }
+ }
+ return ret;
+}
+
+static void
+fs_hotplug_alarm(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct sub_device *sdev;
+ int ret;
+ uint8_t i;
+
+ if (!PRIV(dev)->pending_alarm)
+ return;
+ PRIV(dev)->pending_alarm = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ break;
+ /* if we have non-probed device */
+ if (i != PRIV(dev)->subs_tail) {
+ ret = failsafe_eth_dev_state_sync(dev);
+ if (ret)
+ ERROR("Unable to synchronize sub_device state");
+ }
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret)
+ ERROR("Unable to set up next alarm");
+}
+
static int
fs_eth_dev_create(struct rte_vdev_device *vdev)
{
@@ -128,6 +194,11 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
ret = failsafe_eal_init(dev);
if (ret)
goto free_args;
+ ret = failsafe_hotplug_alarm_install(dev);
+ if (ret) {
+ ERROR("Could not set up plug-in event detection");
+ goto free_args;
+ }
mac = &dev->data->mac_addrs[0];
if (mac_from_arg) {
/*
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index f07d26e..8f334aa 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -45,9 +45,11 @@
typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
uint8_t head);

+uint64_t hotplug_poll;
int mac_from_arg;

const char *pmd_failsafe_init_parameters[] = {
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
PMD_FAILSAFE_MAC_KVARG,
NULL,
};
@@ -221,6 +223,24 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
}

static int
+fs_get_u64_arg(const char *key __rte_unused,
+ const char *value, void *out)
+{
+ uint64_t *u64 = out;
+ char *endptr = NULL;
+
+ if ((value == NULL) || (out == NULL))
+ return -EINVAL;
+ errno = 0;
+ *u64 = strtoull(value, &endptr, 0);
+ if (errno != 0)
+ return -errno;
+ if (endptr == value)
+ return -1;
+ return 0;
+}
+
+static int
fs_get_mac_addr_arg(const char *key __rte_unused,
const char *value, void *out)
{
@@ -252,6 +272,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
ret = 0;
priv->subs_tx = FAILSAFE_MAX_ETHPORTS;
/* default parameters */
+ hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
mac_from_arg = 0;
n = snprintf(mut_params, sizeof(mut_params), "%s", params);
if (n >= sizeof(mut_params)) {
@@ -274,6 +295,16 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
PMD_FAILSAFE_PARAM_STRING);
return -1;
}
+ /* PLUG_IN event poll timer */
+ arg_count = rte_kvargs_count(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG);
+ if (arg_count == 1) {
+ ret = rte_kvargs_process(kvlist,
+ PMD_FAILSAFE_PLUG_IN_POLL_KVARG,
+ &fs_get_u64_arg, &hotplug_poll);
+ if (ret < 0)
+ goto free_kvlist;
+ }
/* MAC addr */
arg_count = rte_kvargs_count(kvlist,
PMD_FAILSAFE_MAC_KVARG);
@@ -287,6 +318,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
mac_from_arg = 1;
}
}
+ PRIV(dev)->state = DEV_PARSED;
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 6c3a811..16871df 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -88,37 +88,14 @@ fs_bus_init(struct rte_eth_dev *dev)
int
failsafe_eal_init(struct rte_eth_dev *dev)
{
- struct sub_device *sdev;
- uint8_t i;
int ret;

ret = fs_bus_init(dev);
if (ret)
return ret;
- /*
- * We only update TX_SUBDEV if we are not started.
- * If a sub_device is emitting, we will switch the TX_SUBDEV to the
- * preferred port only upon starting it, so that the switch is smoother.
- */
- if (PREFERRED_SUBDEV(dev)->state >= DEV_PROBED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_PROBED) ||
- TX_SUBDEV(dev) == NULL) {
- /* Using first probed device */
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_PROBED) {
- DEBUG("Switching tx_dev to sub_device %d",
- i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_PROBED)
+ PRIV(dev)->state = DEV_PROBED;
+ fs_switch_dev(dev);
return 0;
}

@@ -150,5 +127,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
ret = fs_bus_uninit(dev);
if (ret)
return ret;
+ PRIV(dev)->state = DEV_PROBED - 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
new file mode 100644
index 0000000..7910952
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+
+#include "failsafe_private.h"
+
+static int
+fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev *edev;
+ struct rte_vlan_filter_conf *vfc1;
+ struct rte_vlan_filter_conf *vfc2;
+ uint32_t i;
+ int ret;
+
+ edev = ETH(sdev);
+ /* RX queue setup */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct rxq *rxq;
+
+ rxq = dev->data->rx_queues[i];
+ ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
+ rxq->info.nb_desc, rxq->socket_id,
+ &rxq->info.conf, rxq->info.mp);
+ if (ret) {
+ ERROR("rx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* TX queue setup */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
+ txq->info.nb_desc, txq->socket_id,
+ &txq->info.conf);
+ if (ret) {
+ ERROR("tx_queue_setup failed");
+ return ret;
+ }
+ }
+ /* dev_link.link_status */
+ if (dev->data->dev_link.link_status !=
+ edev->data->dev_link.link_status) {
+ DEBUG("Configuring link_status");
+ if (dev->data->dev_link.link_status)
+ ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
+ else
+ ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
+ if (ret) {
+ ERROR("Failed to apply link_status");
+ return ret;
+ }
+ } else {
+ DEBUG("link_status already set");
+ }
+ /* promiscuous */
+ if (dev->data->promiscuous != edev->data->promiscuous) {
+ DEBUG("Configuring promiscuous");
+ if (dev->data->promiscuous)
+ rte_eth_promiscuous_enable(PORT_ID(sdev));
+ else
+ rte_eth_promiscuous_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("promiscuous already set");
+ }
+ /* all_multicast */
+ if (dev->data->all_multicast != edev->data->all_multicast) {
+ DEBUG("Configuring all_multicast");
+ if (dev->data->all_multicast)
+ rte_eth_allmulticast_enable(PORT_ID(sdev));
+ else
+ rte_eth_allmulticast_disable(PORT_ID(sdev));
+ } else {
+ DEBUG("all_multicast already set");
+ }
+ /* MTU */
+ if (dev->data->mtu != edev->data->mtu) {
+ DEBUG("Configuring MTU");
+ ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
+ if (ret) {
+ ERROR("Failed to apply MTU");
+ return ret;
+ }
+ } else {
+ DEBUG("MTU already set");
+ }
+ /* default MAC */
+ DEBUG("Configuring default MAC address");
+ ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
+ &dev->data->mac_addrs[0]);
+ if (ret) {
+ ERROR("Setting default MAC address failed");
+ return ret;
+ }
+ /* additional MAC */
+ if (PRIV(dev)->nb_mac_addr > 1)
+ DEBUG("Configure additional MAC address%s",
+ (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
+ for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
+ struct ether_addr *ea;
+
+ ea = &dev->data->mac_addrs[i];
+ ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
+ PRIV(dev)->mac_addr_pool[i]);
+ if (ret) {
+ char ea_fmt[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
+ ERROR("Adding MAC address %s failed", ea_fmt);
+ }
+ }
+ /* VLAN filter */
+ vfc1 = &dev->data->vlan_filter_conf;
+ vfc2 = &edev->data->vlan_filter_conf;
+ if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
+ uint64_t vbit;
+ uint64_t ids;
+ size_t i;
+ uint16_t vlan_id;
+
+ DEBUG("Configuring VLAN filter");
+ for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
+ if (vfc1->ids[i] == 0)
+ continue;
+ ids = vfc1->ids[i];
+ while (ids) {
+ vlan_id = 64 * i;
+ /* count trailing zeroes */
+ vbit = ~ids & (ids - 1);
+ /* clear least significant bit set */
+ ids ^= (ids ^ (ids - 1)) ^ vbit;
+ for (; vbit; vlan_id++)
+ vbit >>= 1;
+ ret = rte_eth_dev_vlan_filter(
+ PORT_ID(sdev), vlan_id, 1);
+ if (ret) {
+ ERROR("Failed to apply VLAN filter %hu",
+ vlan_id);
+ return ret;
+ }
+ }
+ }
+ } else {
+ DEBUG("VLAN filter already set");
+ }
+ return 0;
+}
+
+int
+failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint32_t inactive;
+ int ret;
+ uint8_t i;
+
+ if (PRIV(dev)->state < DEV_PROBED)
+ return 0;
+ ret = failsafe_eal_init(dev);
+ if (ret)
+ return ret;
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ return 0;
+ inactive = 0;
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state == DEV_PROBED)
+ inactive |= UINT32_C(1) << i;
+ ret = dev->dev_ops->dev_configure(dev);
+ if (ret)
+ return ret;
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (inactive & (UINT32_C(1) << i)) {
+ ret = fs_eth_dev_conf_apply(dev, sdev);
+ if (ret) {
+ ERROR("Could not apply configuration to sub_device %d",
+ i);
+ /* TODO: disable device */
+ return ret;
+ }
+ }
+ }
+ /*
+ * If new devices have been configured, check if
+ * the link state has changed.
+ */
+ if (inactive)
+ dev->dev_ops->link_update(dev, 1);
+ if (PRIV(dev)->state < DEV_STARTED)
+ return 0;
+ ret = dev->dev_ops->dev_start(dev);
+ if (ret)
+ return ret;
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 693162e..4044473 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -89,6 +89,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
sdev->state = DEV_ACTIVE;
}
+ if (PRIV(dev)->state < DEV_ACTIVE)
+ PRIV(dev)->state = DEV_ACTIVE;
return 0;
}

@@ -108,21 +110,9 @@ fs_dev_start(struct rte_eth_dev *dev)
return ret;
sdev->state = DEV_STARTED;
}
- if (PREFERRED_SUBDEV(dev)->state == DEV_STARTED) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev)) {
- DEBUG("Switching tx_dev to preferred sub_device");
- PRIV(dev)->subs_tx = 0;
- }
- } else {
- if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED) ||
- TX_SUBDEV(dev) == NULL) {
- FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
- DEBUG("Switching tx_dev to sub_device %d", i);
- PRIV(dev)->subs_tx = i;
- break;
- }
- }
- }
+ if (PRIV(dev)->state < DEV_STARTED)
+ PRIV(dev)->state = DEV_STARTED;
+ fs_switch_dev(dev);
return 0;
}

@@ -132,6 +122,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ PRIV(dev)->state = DEV_STARTED - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_STARTED) {
rte_eth_dev_stop(PORT_ID(sdev));
sdev->state = DEV_STARTED - 1;
@@ -183,6 +174,10 @@ fs_dev_close(struct rte_eth_dev *dev)
struct sub_device *sdev;
uint8_t i;

+ failsafe_hotplug_alarm_cancel(dev);
+ if (PRIV(dev)->state == DEV_STARTED)
+ dev->dev_ops->dev_stop(dev);
+ PRIV(dev)->state = DEV_ACTIVE - 1;
FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
DEBUG("Closing sub_device %d", i);
rte_eth_dev_close(PORT_ID(sdev));
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index e7a7592..8fb72fe 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -41,12 +41,14 @@
#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"

#define PMD_FAILSAFE_MAC_KVARG "mac"
+#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
- "mac=mac_addr" \
+ "mac=mac_addr," \
+ "hotplug_poll=u64" \
""

-#define FAILSAFE_PLUGIN_DEFAULT_TIMEOUT_MS 2000
+#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000

#define FAILSAFE_MAX_ETHPORTS 2
#define FAILSAFE_MAX_ETHADDR 128
@@ -105,8 +107,22 @@ struct fs_priv {
uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
/* current capabilities */
struct rte_eth_dev_info infos;
+ /*
+ * Fail-safe state machine.
+ * This level will be tracking state of the EAL and eth
+ * layer at large as defined by the user application.
+ * It will then steer the sub_devices toward the same
+ * synchronized state.
+ */
+ enum dev_state state;
+ unsigned int pending_alarm:1; /* An alarm is pending */
};

+/* MISC */
+
+int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
+int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
+
/* RX / TX */

uint16_t failsafe_rx_burst(void *rxq,
@@ -125,10 +141,15 @@ int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
int failsafe_eal_init(struct rte_eth_dev *dev);
int failsafe_eal_uninit(struct rte_eth_dev *dev);

+/* ETH_DEV */
+
+int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+
/* GLOBALS */

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern uint64_t hotplug_poll;
extern int mac_from_arg;

/* HELPERS */
@@ -224,4 +245,39 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+static inline void
+fs_switch_dev(struct rte_eth_dev *dev)
+{
+ enum dev_state req_state;
+
+ req_state = PRIV(dev)->state;
+ if (PREFERRED_SUBDEV(dev)->state >= req_state) {
+ if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
+ (TX_SUBDEV(dev) == NULL ||
+ (req_state == DEV_STARTED) ||
+ (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ DEBUG("Switching tx_dev to preferred sub_device");
+ PRIV(dev)->subs_tx = 0;
+ }
+ } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
+ TX_SUBDEV(dev) == NULL) {
+ struct sub_device *sdev;
+ uint8_t i;
+
+ /* Using acceptable device */
+ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ DEBUG("Switching tx_dev to sub_device %d",
+ i);
+ PRIV(dev)->subs_tx = i;
+ break;
+ }
+ } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ DEBUG("No device ready, deactivating tx_dev");
+ PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+ } else {
+ return;
+ }
+ rte_wmb();
+}
+
#endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:30 UTC
Permalink
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 20 +++++++
drivers/net/failsafe/failsafe_args.c | 99 +++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ether.c | 7 +++
drivers/net/failsafe/failsafe_private.h | 4 ++
4 files changed, 130 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index c04891a..1b6e110 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,19 @@ Fail-safe command line parameters
additional sub-device parameters if need be. They will be passed on to the
sub-device.

+- **exec(<shell command>)** parameter
+
+ This parameter allows the user to provide a command to the fail-safe PMD to
+ execute and define a sub-device.
+ It is done within a regular shell context.
+ The first line of its output is read by the fail-safe PMD and otherwise
+ interpreted as if passed by the regular **dev** parameter.
+ Any other line is discarded.
+ If the command fail or output an incorrect string, the sub-device is not
+ initialized.
+ All commas within the ``shell command`` are replaced by spaces before
+ executing the command. This helps using scripts to specify devices.
+
- **mac** parameter [MAC address]

This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +139,13 @@ This section shows some example of using **testpmd** with a fail-safe PMD.
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)' -- \
-i

+#. Start testpmd using a flexible device definition
+
+ .. code-block:: console
+
+ $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
Using the Fail-safe PMD from an application
-------------------------------------------

diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 8f334aa..c723ca3 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -30,6 +30,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#include <assert.h>
#include <string.h>
#include <errno.h>

@@ -96,6 +98,75 @@ fs_parse_device(struct sub_device *sdev, char *args)
return 0;
}

+static void
+fs_sanitize_cmdline(char *args)
+{
+ size_t len;
+
+ len = strnlen(args, DEVARGS_MAXLEN);
+ args[len - 1] = '\0';
+}
+
+static int
+fs_execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+ FILE *fp;
+ /* store possible newline as well */
+ char output[DEVARGS_MAXLEN + 1];
+ size_t len;
+ int old_err;
+ int ret;
+
+ assert(cmdline != NULL || sdev->cmdline != NULL);
+ if (sdev->cmdline == NULL) {
+ char *new_str;
+ size_t i;
+
+ len = strlen(cmdline) + 1;
+ new_str = rte_realloc(sdev->cmdline, len,
+ RTE_CACHE_LINE_SIZE);
+ if (new_str == NULL) {
+ ERROR("Command line allocation failed");
+ return -ENOMEM;
+ }
+ sdev->cmdline = new_str;
+ snprintf(sdev->cmdline, len, "%s", cmdline);
+ /* Replace all commas in the command line by spaces */
+ for (i = 0; i < len; i++)
+ if (sdev->cmdline[i] == ',')
+ sdev->cmdline[i] = ' ';
+ }
+ DEBUG("'%s'", sdev->cmdline);
+ old_err = errno;
+ fp = popen(sdev->cmdline, "r");
+ if (fp == NULL) {
+ ret = errno;
+ ERROR("popen: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ /* We only read one line */
+ if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+ DEBUG("Could not read command output");
+ return -ENODEV;
+ }
+ fs_sanitize_cmdline(output);
+ ret = fs_parse_device(sdev, output);
+ if (ret) {
+ ERROR("Parsing device '%s' failed", output);
+ goto ret_pclose;
+ }
+ret_pclose:
+ ret = pclose(fp);
+ if (ret) {
+ ret = errno;
+ ERROR("pclose: %s", strerror(errno));
+ errno = old_err;
+ return ret;
+ }
+ return ret;
+}
+
static int
fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -130,6 +201,14 @@ fs_parse_device_param(struct rte_eth_dev *dev, const char *param,
ret = fs_parse_device(sdev, args);
if (ret)
goto free_args;
+ } else if (strncmp(param, "exec", 4) == 0) {
+ ret = fs_execute_cmd(sdev, args);
+ if (ret == -ENODEV) {
+ DEBUG("Reading device info from command line failed");
+ ret = 0;
+ }
+ if (ret)
+ goto free_args;
} else {
ERROR("Unrecognized device type: %.*s", (int)b, param);
return -EINVAL;
@@ -331,6 +410,8 @@ failsafe_args_free(struct rte_eth_dev *dev)
uint8_t i;

FOREACH_SUBDEV(sdev, i, dev) {
+ rte_free(sdev->cmdline);
+ sdev->cmdline = NULL;
free(sdev->devargs.args);
sdev->devargs.args = NULL;
}
@@ -361,3 +442,21 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
return fs_parse_sub_devices(fs_count_device,
dev, params);
}
+
+int
+failsafe_args_parse_subs(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret = 0;
+
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state >= DEV_PARSED)
+ continue;
+ if (sdev->cmdline)
+ ret = fs_execute_cmd(sdev, sdev->cmdline);
+ if (ret == 0)
+ sdev->state = DEV_PARSED;
+ }
+ return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7910952..2a1535e 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -188,6 +188,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
int ret;
uint8_t i;

+ if (PRIV(dev)->state < DEV_PARSED)
+ return 0;
+
+ ret = failsafe_args_parse_subs(dev);
+ if (ret)
+ return ret;
+
if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 8fb72fe..554d7a3 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -44,6 +44,7 @@
#define PMD_FAILSAFE_PLUG_IN_POLL_KVARG "hotplug_poll"
#define PMD_FAILSAFE_PARAM_STRING \
"dev(<ifc>)," \
+ "exec(<shell command>)," \
"mac=mac_addr," \
"hotplug_poll=u64" \
""
@@ -87,6 +88,8 @@ struct sub_device {
struct rte_eth_dev *edev;
/* Device state machine */
enum dev_state state;
+ /* Some device are defined as a command line */
+ char *cmdline;
};

struct fs_priv {
@@ -135,6 +138,7 @@ uint16_t failsafe_tx_burst(void *txq,
int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
void failsafe_args_free(struct rte_eth_dev *dev);
int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
+int failsafe_args_parse_subs(struct rte_eth_dev *dev);

/* EAL */
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:31 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode = Y
Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
+Flow API = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index e27bfc0..3cccfe0 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -45,6 +45,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 888f07b..6557255 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -177,6 +177,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 16871df..86e16a6 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -78,6 +78,7 @@ fs_bus_init(struct rte_eth_dev *dev)
ERROR("sub_device %d init went wrong", i);
return -ENODEV;
}
+ SUB_ID(sdev) = i;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2a1535e..2958207 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@

#include <unistd.h>

+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
#include "failsafe_private.h"

+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
uint32_t i;
int ret;

@@ -177,6 +217,36 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
} else {
DEBUG("VLAN filter already set");
}
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ &flow->fd->attr,
+ flow->fd->items,
+ flow->fd->actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 0000000..d8f59a1
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,216 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ size_t fdsz;
+
+ fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+ flow = rte_zmalloc(NULL,
+ sizeof(struct rte_flow) + fdsz,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+ if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
+ ERROR("Failed to copy flow description");
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free((*flow)->fd);
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (ret) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if (local_ret) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ rte_free(flow);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if (ret) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ return rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)], type, arg, error);
+ }
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+};
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4044473..4cb2e90 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
+#include <rte_flow.h>

#include "failsafe_private.h"

@@ -628,6 +629,33 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
}

+static int
+fs_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
+ ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
const struct eth_dev_ops failsafe_ops = {
.dev_configure = fs_dev_configure,
.dev_start = fs_dev_start,
@@ -655,4 +683,5 @@ const struct eth_dev_ops failsafe_ops = {
.mac_addr_remove = fs_mac_addr_remove,
.mac_addr_add = fs_mac_addr_add,
.mac_addr_set = fs_mac_addr_set,
+ .filter_ctrl = fs_filter_ctrl,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 554d7a3..f40ea2f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,8 @@
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_

+#include <sys/queue.h>
+
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -72,6 +74,14 @@ struct txq {
struct rte_eth_txq_info info;
};

+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_desc *fd;
+};
+
enum dev_state {
DEV_UNDEFINED = 0,
DEV_PARSED,
@@ -86,6 +96,7 @@ struct sub_device {
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
+ uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Some device are defined as a command line */
@@ -104,6 +115,8 @@ struct fs_priv {
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
@@ -153,6 +166,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;

@@ -170,6 +184,10 @@ extern int mac_from_arg;
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)

+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:32 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 1 +
drivers/net/failsafe/Makefile | 1 +
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_eal.c | 1 +
drivers/net/failsafe/failsafe_ether.c | 70 +++++++++++
drivers/net/failsafe/failsafe_flow.c | 216 ++++++++++++++++++++++++++++++++
drivers/net/failsafe/failsafe_ops.c | 29 +++++
drivers/net/failsafe/failsafe_private.h | 18 +++
8 files changed, 337 insertions(+)
create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode = Y
Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
+Flow API = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index e27bfc0..3cccfe0 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -45,6 +45,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c

# No exported include files

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 888f07b..6557255 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -177,6 +177,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+ TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 16871df..86e16a6 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -78,6 +78,7 @@ fs_bus_init(struct rte_eth_dev *dev)
ERROR("sub_device %d init went wrong", i);
return -ENODEV;
}
+ SUB_ID(sdev) = i;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2a1535e..2958207 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@

#include <unistd.h>

+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
#include "failsafe_private.h"

+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+ static const char *const errstrlist[] = {
+ [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+ [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+ [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+ [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+ [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+ [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+ [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+ [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+ [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+ };
+ const char *errstr;
+ char buf[32];
+ int err = rte_errno;
+
+ if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+ !errstrlist[error->type])
+ errstr = "unknown type";
+ else
+ errstr = errstrlist[error->type];
+ ERROR("Caught error type %d (%s): %s%s\n",
+ error->type, errstr,
+ error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+ error->cause), buf) : "",
+ error->message ? error->message : "(no stated reason)");
+ return -err;
+}
+
static int
fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
struct rte_eth_dev *edev;
struct rte_vlan_filter_conf *vfc1;
struct rte_vlan_filter_conf *vfc2;
+ struct rte_flow *flow;
+ struct rte_flow_error ferror;
uint32_t i;
int ret;

@@ -177,6 +217,36 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
} else {
DEBUG("VLAN filter already set");
}
+ /* rte_flow */
+ if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
+ DEBUG("rte_flow already set");
+ } else {
+ DEBUG("Resetting rte_flow configuration");
+ ret = rte_flow_flush(PORT_ID(sdev), &ferror);
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ i = 0;
+ rte_errno = 0;
+ DEBUG("Configuring rte_flow");
+ TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
+ DEBUG("Creating flow #%" PRIu32, i++);
+ flow->flows[SUB_ID(sdev)] =
+ rte_flow_create(PORT_ID(sdev),
+ &flow->fd->attr,
+ flow->fd->items,
+ flow->fd->actions,
+ &ferror);
+ ret = rte_errno;
+ if (ret)
+ break;
+ }
+ if (ret) {
+ fs_flow_complain(&ferror);
+ return ret;
+ }
+ }
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
new file mode 100644
index 0000000..d8f59a1
--- /dev/null
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -0,0 +1,216 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "failsafe_private.h"
+
+static struct rte_flow *
+fs_flow_allocate(const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow *flow;
+ size_t fdsz;
+
+ fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+ flow = rte_zmalloc(NULL,
+ sizeof(struct rte_flow) + fdsz,
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL) {
+ ERROR("Could not allocate new flow");
+ return NULL;
+ }
+ flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+ if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
+ ERROR("Failed to copy flow description");
+ rte_free(flow);
+ return NULL;
+ }
+ return flow;
+}
+
+static void
+fs_flow_release(struct rte_flow **flow)
+{
+ rte_free((*flow)->fd);
+ rte_free(*flow);
+ *flow = NULL;
+}
+
+static int
+fs_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_validate on sub_device %d", i);
+ ret = rte_flow_validate(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (ret) {
+ ERROR("Operation rte_flow_validate failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct rte_flow *
+fs_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item patterns[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ uint8_t i;
+
+ flow = fs_flow_allocate(attr, patterns, actions);
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ flow->flows[i] = rte_flow_create(PORT_ID(sdev),
+ attr, patterns, actions, error);
+ if (flow->flows[i] == NULL) {
+ ERROR("Failed to create flow on sub_device %d",
+ i);
+ goto err;
+ }
+ }
+ TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+ return flow;
+err:
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (flow->flows[i] != NULL)
+ rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ }
+ fs_flow_release(&flow);
+ return NULL;
+}
+
+static int
+fs_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (flow == NULL) {
+ ERROR("Invalid flow");
+ return -EINVAL;
+ }
+ ret = 0;
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ int local_ret;
+
+ if (flow->flows[i] == NULL)
+ continue;
+ local_ret = rte_flow_destroy(PORT_ID(sdev),
+ flow->flows[i], error);
+ if (local_ret) {
+ ERROR("Failed to destroy flow on sub_device %d: %d",
+ i, local_ret);
+ if (ret == 0)
+ ret = local_ret;
+ }
+ }
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ rte_free(flow);
+ return ret;
+}
+
+static int
+fs_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+ struct rte_flow *flow;
+ void *tmp;
+ uint8_t i;
+ int ret;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_flow_flush on sub_device %d", i);
+ ret = rte_flow_flush(PORT_ID(sdev), error);
+ if (ret) {
+ ERROR("Operation rte_flow_flush failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ TAILQ_FOREACH_SAFE(flow, &PRIV(dev)->flow_list, next, tmp) {
+ TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
+ fs_flow_release(&flow);
+ }
+ return 0;
+}
+
+static int
+fs_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *arg,
+ struct rte_flow_error *error)
+{
+ struct sub_device *sdev;
+
+ sdev = TX_SUBDEV(dev);
+ if (sdev != NULL) {
+ return rte_flow_query(PORT_ID(sdev),
+ flow->flows[SUB_ID(sdev)], type, arg, error);
+ }
+ WARN("No active sub_device to query about its flow");
+ return -1;
+}
+
+const struct rte_flow_ops fs_flow_ops = {
+ .validate = fs_flow_validate,
+ .create = fs_flow_create,
+ .destroy = fs_flow_destroy,
+ .flush = fs_flow_flush,
+ .query = fs_flow_query,
+};
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4044473..4cb2e90 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
+#include <rte_flow.h>

#include "failsafe_private.h"

@@ -628,6 +629,33 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
}

+static int
+fs_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type type,
+ enum rte_filter_op op,
+ void *arg)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int ret;
+
+ if (type == RTE_ETH_FILTER_GENERIC &&
+ op == RTE_ETH_FILTER_GET) {
+ *(const void **)arg = &fs_flow_ops;
+ return 0;
+ }
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) {
+ DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
+ ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
+ if (ret) {
+ ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
+ " with error %d", i, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
const struct eth_dev_ops failsafe_ops = {
.dev_configure = fs_dev_configure,
.dev_start = fs_dev_start,
@@ -655,4 +683,5 @@ const struct eth_dev_ops failsafe_ops = {
.mac_addr_remove = fs_mac_addr_remove,
.mac_addr_add = fs_mac_addr_add,
.mac_addr_set = fs_mac_addr_set,
+ .filter_ctrl = fs_filter_ctrl,
};
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 554d7a3..f40ea2f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -34,6 +34,8 @@
#ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
#define _RTE_ETH_FAILSAFE_PRIVATE_H_

+#include <sys/queue.h>
+
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -72,6 +74,14 @@ struct txq {
struct rte_eth_txq_info info;
};

+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next;
+ /* sub_flows */
+ struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
+ /* flow description for synchronization */
+ struct rte_flow_desc *fd;
+};
+
enum dev_state {
DEV_UNDEFINED = 0,
DEV_PARSED,
@@ -86,6 +96,7 @@ struct sub_device {
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eth_dev *edev;
+ uint8_t sid;
/* Device state machine */
enum dev_state state;
/* Some device are defined as a command line */
@@ -104,6 +115,8 @@ struct fs_priv {
uint8_t subs_tail; /* first invalid */
uint8_t subs_tx; /* current emitting device */
uint8_t current_probed;
+ /* flow mapping */
+ TAILQ_HEAD(sub_flows, rte_flow) flow_list;
/* current number of mac_addr slots allocated. */
uint32_t nb_mac_addr;
struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
@@ -153,6 +166,7 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);

extern const char pmd_failsafe_driver_name[];
extern const struct eth_dev_ops failsafe_ops;
+extern const struct rte_flow_ops fs_flow_ops;
extern uint64_t hotplug_poll;
extern int mac_from_arg;

@@ -170,6 +184,10 @@ extern int mac_from_arg;
#define PORT_ID(sdev) \
(ETH(sdev)->data->port_id)

+/* sdev: (struct sub_device *) */
+#define SUB_ID(sdev) \
+ ((sdev)->sid)
+
/**
* Stateful iterator construct over fail-safe sub-devices:
* s: (struct sub_device *), iterator
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:33 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload = Y
+L4 checksum offload = Y
+Inner L3 checksum = Y
+Inner L4 checksum = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4cb2e90..5fb0135 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,149 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
- /* Set of understood capabilities */
- .rx_offload_capa = 0x0,
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
};

+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ * 0: all requested capabilities are supported by the sub_device
+ * positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev_info infos = {0};
+ struct rte_eth_conf *cf;
+ uint32_t cap;
+
+ cf = &dev->data->dev_conf;
+ SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+ /* RX capabilities */
+ cap = infos.rx_offload_capa;
+ if (cf->rxmode.hw_vlan_strip &&
+ ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+ WARN("VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_VLAN_STRIP;
+ }
+ if (cf->rxmode.hw_ip_checksum &&
+ ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+ WARN("IP checksum offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM;
+ }
+ if (cf->rxmode.enable_lro &&
+ ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+ WARN("TCP LRO offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_TCP_LRO;
+ }
+ if (cf->rxmode.hw_vlan_extend &&
+ ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+ WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_QINQ_STRIP;
+ }
+ /* TX capabilities */
+ /* Nothing to do, no tx capa supported */
+ return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+ uint32_t ol_cap)
+{
+ switch (ol_cap) {
+ case DEV_RX_OFFLOAD_VLAN_STRIP:
+ INFO("Disabling VLAN stripping offload");
+ cf->rxmode.hw_vlan_strip = 0;
+ break;
+ case DEV_RX_OFFLOAD_IPV4_CKSUM:
+ case DEV_RX_OFFLOAD_UDP_CKSUM:
+ case DEV_RX_OFFLOAD_TCP_CKSUM:
+ case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+ INFO("Disabling IP checksum offload");
+ cf->rxmode.hw_ip_checksum = 0;
+ break;
+ case DEV_RX_OFFLOAD_TCP_LRO:
+ INFO("Disabling TCP LRO offload");
+ cf->rxmode.enable_lro = 0;
+ break;
+ case DEV_RX_OFFLOAD_QINQ_STRIP:
+ INFO("Disabling stacked VLAN stripping offload");
+ cf->rxmode.hw_vlan_extend = 0;
+ break;
+ default:
+ DEBUG("Unable to disable offload capability: %" PRIx32,
+ ol_cap);
+ return -1;
+ }
+ return 0;
+}
+
static int
fs_dev_configure(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
+ int capa_flag;
int ret;

FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state != DEV_PROBED)
continue;
+ DEBUG("Checking capabilities for sub_device %d", i);
+ while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
+ /*
+ * Refuse to change configuration if multiple devices
+ * are present and we already have configured at least
+ * some of them.
+ */
+ if (PRIV(dev)->state >= DEV_ACTIVE &&
+ PRIV(dev)->subs_tail > 1) {
+ ERROR("device already configured, cannot fix live configuration");
+ return -1;
+ }
+ ret = fs_port_disable_offload(&dev->data->dev_conf,
+ capa_flag);
+ if (ret) {
+ ERROR("Unable to disable offload capability");
+ return ret;
+ }
+ }
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:34 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
drivers/net/failsafe/failsafe_private.h | 8 +++
drivers/net/failsafe/failsafe_rxtx.c | 124 ++++++++++++++++++++++++++------
2 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f40ea2f..25a4dac 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -141,11 +141,18 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);

/* RX / TX */

+void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
/* ARGS */

int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -299,6 +306,7 @@ fs_switch_dev(struct rte_eth_dev *dev)
} else {
return;
}
+ set_burst_fn(dev, 0);
rte_wmb();
}

diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..c15025f 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -31,16 +31,63 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <assert.h>
+
#include <rte_mbuf.h>
#include <rte_ethdev.h>

#include "failsafe_private.h"

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+fs_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+static inline int
+fs_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
uint16_t
failsafe_rx_burst(void *queue,
struct rte_mbuf **rx_pkts,
@@ -63,11 +110,7 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
- if (unlikely(ETH(sdev) == NULL))
- continue;
- if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
- continue;
- if (unlikely(sdev->state != DEV_STARTED))
+ if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
nb_rx = ETH(sdev)->
@@ -80,11 +123,40 @@ failsafe_rx_burst(void *queue,
return 0;
}

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ assert(!fs_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
uint16_t
failsafe_tx_burst(void *queue,
struct rte_mbuf **tx_pkts,
@@ -96,12 +168,24 @@ failsafe_tx_burst(void *queue,

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
- if (unlikely(sdev == NULL))
- return 0;
- if (unlikely(ETH(sdev) == NULL))
- return 0;
- if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ assert(!fs_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:35 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/features/failsafe.ini | 6 ++
drivers/net/failsafe/failsafe_ops.c | 131 +++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter = Y
Multicast MAC filter = Y
VLAN filter = Y
Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload = Y
+L4 checksum offload = Y
+Inner L3 checksum = Y
+Inner L4 checksum = Y
Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 4cb2e90..5fb0135 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,149 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
- /* Set of understood capabilities */
- .rx_offload_capa = 0x0,
+ /*
+ * Set of capabilities that can be verified upon
+ * configuring a sub-device.
+ */
+ .rx_offload_capa =
+ DEV_RX_OFFLOAD_VLAN_STRIP |
+ DEV_RX_OFFLOAD_QINQ_STRIP |
+ DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
};

+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ * 0: all requested capabilities are supported by the sub_device
+ * positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+ struct sub_device *sdev)
+{
+ struct rte_eth_dev_info infos = {0};
+ struct rte_eth_conf *cf;
+ uint32_t cap;
+
+ cf = &dev->data->dev_conf;
+ SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+ /* RX capabilities */
+ cap = infos.rx_offload_capa;
+ if (cf->rxmode.hw_vlan_strip &&
+ ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+ WARN("VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_VLAN_STRIP;
+ }
+ if (cf->rxmode.hw_ip_checksum &&
+ ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+ WARN("IP checksum offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM;
+ }
+ if (cf->rxmode.enable_lro &&
+ ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+ WARN("TCP LRO offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_TCP_LRO;
+ }
+ if (cf->rxmode.hw_vlan_extend &&
+ ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+ WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
+ SUB_ID(sdev));
+ return DEV_RX_OFFLOAD_QINQ_STRIP;
+ }
+ /* TX capabilities */
+ /* Nothing to do, no tx capa supported */
+ return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+ uint32_t ol_cap)
+{
+ switch (ol_cap) {
+ case DEV_RX_OFFLOAD_VLAN_STRIP:
+ INFO("Disabling VLAN stripping offload");
+ cf->rxmode.hw_vlan_strip = 0;
+ break;
+ case DEV_RX_OFFLOAD_IPV4_CKSUM:
+ case DEV_RX_OFFLOAD_UDP_CKSUM:
+ case DEV_RX_OFFLOAD_TCP_CKSUM:
+ case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+ INFO("Disabling IP checksum offload");
+ cf->rxmode.hw_ip_checksum = 0;
+ break;
+ case DEV_RX_OFFLOAD_TCP_LRO:
+ INFO("Disabling TCP LRO offload");
+ cf->rxmode.enable_lro = 0;
+ break;
+ case DEV_RX_OFFLOAD_QINQ_STRIP:
+ INFO("Disabling stacked VLAN stripping offload");
+ cf->rxmode.hw_vlan_extend = 0;
+ break;
+ default:
+ DEBUG("Unable to disable offload capability: %" PRIx32,
+ ol_cap);
+ return -1;
+ }
+ return 0;
+}
+
static int
fs_dev_configure(struct rte_eth_dev *dev)
{
struct sub_device *sdev;
uint8_t i;
+ int capa_flag;
int ret;

FOREACH_SUBDEV(sdev, i, dev) {
if (sdev->state != DEV_PROBED)
continue;
+ DEBUG("Checking capabilities for sub_device %d", i);
+ while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
+ /*
+ * Refuse to change configuration if multiple devices
+ * are present and we already have configured at least
+ * some of them.
+ */
+ if (PRIV(dev)->state >= DEV_ACTIVE &&
+ PRIV(dev)->subs_tail > 1) {
+ ERROR("device already configured, cannot fix live configuration");
+ return -1;
+ }
+ ret = fs_port_disable_offload(&dev->data->dev_conf,
+ capa_flag);
+ if (ret) {
+ ERROR("Unable to disable offload capability");
+ return ret;
+ }
+ }
+ }
+ FOREACH_SUBDEV(sdev, i, dev) {
+ if (sdev->state != DEV_PROBED)
+ continue;
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:36 UTC
Permalink
Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
drivers/net/failsafe/failsafe_private.h | 8 +++
drivers/net/failsafe/failsafe_rxtx.c | 124 ++++++++++++++++++++++++++------
2 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f40ea2f..25a4dac 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -141,11 +141,18 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);

/* RX / TX */

+void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+
uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

+uint16_t failsafe_rx_burst_fast(void *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
/* ARGS */

int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -299,6 +306,7 @@ fs_switch_dev(struct rte_eth_dev *dev)
} else {
return;
}
+ set_burst_fn(dev, 0);
rte_wmb();
}

diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..c15025f 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -31,16 +31,63 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

+#include <assert.h>
+
#include <rte_mbuf.h>
#include <rte_ethdev.h>

#include "failsafe_private.h"

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+fs_rx_unsafe(struct sub_device *sdev)
+{
+ return (ETH(sdev) == NULL) ||
+ (ETH(sdev)->rx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+static inline int
+fs_tx_unsafe(struct sub_device *sdev)
+{
+ return (sdev == NULL) ||
+ (ETH(sdev) == NULL) ||
+ (ETH(sdev)->tx_pkt_burst == NULL) ||
+ (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+ int need_safe;
+ int safe_set;
+
+ need_safe = force_safe;
+ FOREACH_SUBDEV(sdev, i, dev)
+ need_safe |= fs_rx_unsafe(sdev);
+ safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe RX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->rx_pkt_burst = &failsafe_rx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast RX bursts");
+ dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+ }
+ need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
+ safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+ if (need_safe && !safe_set) {
+ DEBUG("Using safe TX bursts%s",
+ (force_safe ? " (forced)" : ""));
+ dev->tx_pkt_burst = &failsafe_tx_burst;
+ } else if (!need_safe && safe_set) {
+ DEBUG("Using fast TX bursts");
+ dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+ }
+ rte_wmb();
+}
+
uint16_t
failsafe_rx_burst(void *queue,
struct rte_mbuf **rx_pkts,
@@ -63,11 +110,7 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
- if (unlikely(ETH(sdev) == NULL))
- continue;
- if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
- continue;
- if (unlikely(sdev->state != DEV_STARTED))
+ if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
nb_rx = ETH(sdev)->
@@ -80,11 +123,40 @@ failsafe_rx_burst(void *queue,
return 0;
}

-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct fs_priv *priv;
+ struct sub_device *sdev;
+ struct rxq *rxq;
+ void *sub_rxq;
+ uint16_t nb_rx;
+ uint8_t nb_polled, nb_subs;
+ uint8_t i;
+
+ rxq = queue;
+ priv = rxq->priv;
+ nb_subs = priv->subs_tail - priv->subs_head;
+ nb_polled = 0;
+ for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+ i++;
+ if (i == priv->subs_tail)
+ i = priv->subs_head;
+ sdev = &priv->subs[i];
+ assert(!fs_rx_unsafe(sdev));
+ sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ nb_rx = ETH(sdev)->
+ rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ if (nb_rx) {
+ rxq->last_polled = i;
+ return nb_rx;
+ }
+ }
+ return 0;
+}
+
uint16_t
failsafe_tx_burst(void *queue,
struct rte_mbuf **tx_pkts,
@@ -96,12 +168,24 @@ failsafe_tx_burst(void *queue,

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
- if (unlikely(sdev == NULL))
- return 0;
- if (unlikely(ETH(sdev) == NULL))
- return 0;
- if (unlikely(ETH(sdev)->tx_pkt_burst == NULL))
+ if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
}
+
+uint16_t
+failsafe_tx_burst_fast(void *queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct sub_device *sdev;
+ struct txq *txq;
+ void *sub_txq;
+
+ txq = queue;
+ sdev = TX_SUBDEV(txq->priv->dev);
+ assert(!fs_tx_unsafe(sdev));
+ sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
+ return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+}
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:37 UTC
Permalink
Listen to INTR_RMV events issued by slaves.
Add atomic flags on slave queues to detect use of slave bursts function.
If a removal is detected, set the recollection flag on this slave.

During a slave upkeep round, if its recollection flag is set and its
burst functions are not in use by any thread, remove that slave.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 14 +++++
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_args.c | 13 +++++
drivers/net/failsafe/failsafe_eal.c | 3 +-
drivers/net/failsafe/failsafe_ether.c | 96 ++++++++++++++++++++++++++++++---
drivers/net/failsafe/failsafe_ops.c | 38 +++++++++++--
drivers/net/failsafe/failsafe_private.h | 72 ++++++++++++++++++++++---
drivers/net/failsafe/failsafe_rxtx.c | 17 +++++-
8 files changed, 234 insertions(+), 20 deletions(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 1b6e110..4154f0a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.

+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
Check the feature matrix for the complete set of supported features.

Compilation options
@@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to
apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+----------------
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6557255..4d35860 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg)
if (!PRIV(dev)->pending_alarm)
return;
PRIV(dev)->pending_alarm = 0;
+ failsafe_dev_remove(dev);
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
break;
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index c723ca3..dd55aaf 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}

+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else
+ ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 86e16a6..3321dda 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -79,6 +79,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
+ sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@@ -96,7 +97,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2958207..ea3105c 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -250,6 +250,67 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}

+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ int ret;
+
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ ret = rte_eal_hotplug_remove(sdev->bus->name,
+ sdev->dev->name);
+ if (ret) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ /* the end */
+ break;
+ }
+ failsafe_hotplug_alarm_install(sdev->fs_dev);
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ if (sdev->remove && fs_rxtx_clean(sdev))
+ fs_dev_remove(sdev);
+}
+
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@@ -263,13 +324,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)

ret = failsafe_args_parse_subs(dev);
if (ret)
- return ret;
+ goto err_remove;

if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
- return ret;
+ goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@@ -278,15 +339,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
- return ret;
+ goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
- /* TODO: disable device */
- return ret;
+ goto err_remove;
}
}
}
@@ -300,6 +360,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
- return ret;
+ goto err_remove;
+ return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+int
+failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct sub_device *sdev = cb_arg;
+
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(sdev->fs_dev, sdev);
+ /* Use safe bursts in any case. */
+ set_burst_fn(sdev->fs_dev, 1);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 5fb0135..2e1c798 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -33,6 +33,8 @@

#include <assert.h>
#include <stdint.h>
+
+#include <rte_atomic.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_flow.h>
@@ -204,9 +206,21 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
}
FOREACH_SUBDEV(sdev, i, dev) {
+ int rmv_interrupt = 0;
+
if (sdev->state != DEV_PROBED)
continue;
+
+ rmv_interrupt = ETH(sdev)->data->dev_flags &
+ RTE_ETH_DEV_INTR_RMV;
+ if (rmv_interrupt) {
+ DEBUG("Enabling RMV interrupts for sub_device %d", i);
+ dev->data->dev_conf.intr_conf.rmv = 1;
+ } else {
+ DEBUG("sub_device %d does not support RMV event", i);
+ }
DEBUG("Configuring sub-device %d", i);
+ sdev->remove = 0;
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
@@ -215,6 +229,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
ERROR("Could not configure sub_device %d", i);
return ret;
}
+ if (rmv_interrupt) {
+ ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+ RTE_ETH_EVENT_INTR_RMV,
+ failsafe_eth_rmv_event_callback,
+ sdev);
+ if (ret)
+ WARN("Failed to register RMV callback for sub_device %d",
+ SUB_ID(sdev));
+ }
+ dev->data->dev_conf.intr_conf.rmv = 0;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
@@ -240,7 +264,7 @@ fs_dev_start(struct rte_eth_dev *dev)
}
if (PRIV(dev)->state < DEV_STARTED)
PRIV(dev)->state = DEV_STARTED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

@@ -351,10 +375,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
fs_rx_queue_release(rxq);
dev->data->rx_queues[rx_queue_id] = NULL;
}
- rxq = rte_zmalloc(NULL, sizeof(*rxq),
+ rxq = rte_zmalloc(NULL,
+ sizeof(*rxq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&rxq->refcnt[i]);
rxq->qid = rx_queue_id;
rxq->socket_id = socket_id;
rxq->info.mp = mb_pool;
@@ -414,10 +442,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
fs_tx_queue_release(txq);
dev->data->tx_queues[tx_queue_id] = NULL;
}
- txq = rte_zmalloc("ethdev TX queue", sizeof(*txq),
+ txq = rte_zmalloc("ethdev TX queue",
+ sizeof(*txq) +
+ sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -ENOMEM;
+ FOREACH_SUBDEV(sdev, i, dev)
+ rte_atomic64_init(&txq->refcnt[i]);
txq->qid = tx_queue_id;
txq->socket_id = socket_id;
txq->info.conf = *tx_conf;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 25a4dac..6a8041d 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -36,6 +36,7 @@

#include <sys/queue.h>

+#include <rte_atomic.h>
#include <rte_dev.h>
#include <rte_ethdev.h>
#include <rte_devargs.h>
@@ -65,6 +66,7 @@ struct rxq {
uint8_t last_polled;
unsigned int socket_id;
struct rte_eth_rxq_info info;
+ rte_atomic64_t refcnt[];
};

struct txq {
@@ -72,6 +74,7 @@ struct txq {
uint16_t qid;
unsigned int socket_id;
struct rte_eth_txq_info info;
+ rte_atomic64_t refcnt[];
};

struct rte_flow {
@@ -101,6 +104,10 @@ struct sub_device {
enum dev_state state;
/* Some device are defined as a command line */
char *cmdline;
+ /* fail-safe device backreference */
+ struct rte_eth_dev *fs_dev;
+ /* flag calling for recollection */
+ volatile unsigned int remove:1;
};

struct fs_priv {
@@ -168,6 +175,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
/* ETH_DEV */

int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_dev_remove(struct rte_eth_dev *dev);
+int failsafe_eth_rmv_event_callback(uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *arg, void *out);

/* GLOBALS */

@@ -233,6 +244,39 @@ extern int mac_from_arg;
#define SUBOPS(s, ops) \
(ETH(s)->dev_ops->ops)

+/**
+ * Atomic guard
+ */
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_P(a) \
+ rte_atomic64_add(&(a), 1)
+
+/**
+ * a: (rte_atomic64_t *)
+ */
+#define FS_ATOMIC_V(a) \
+ rte_atomic64_sub(&(a), 1)
+
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_RX(s, i) \
+ rte_atomic64_read( \
+ &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
+ )
+/**
+ * s: (struct sub_device *)
+ * i: uint16_t qid
+ */
+#define FS_ATOMIC_TX(s, i) \
+ rte_atomic64_read( \
+ &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
+ )
+
#ifndef NDEBUG
#include <stdio.h>
#define DEBUG__(m, ...) \
@@ -274,33 +318,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
return sid;
}

+/*
+ * Switch emitting device.
+ * If banned is set, banned must not be considered for
+ * the role of emitting device.
+ */
static inline void
-fs_switch_dev(struct rte_eth_dev *dev)
+fs_switch_dev(struct rte_eth_dev *dev,
+ struct sub_device *banned)
{
+ struct sub_device *txd;
enum dev_state req_state;

req_state = PRIV(dev)->state;
- if (PREFERRED_SUBDEV(dev)->state >= req_state) {
- if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) &&
- (TX_SUBDEV(dev) == NULL ||
+ txd = TX_SUBDEV(dev);
+ if (PREFERRED_SUBDEV(dev)->state >= req_state &&
+ PREFERRED_SUBDEV(dev) != banned) {
+ if (txd != PREFERRED_SUBDEV(dev) &&
+ (txd == NULL ||
(req_state == DEV_STARTED) ||
- (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) {
+ (txd && txd->state < DEV_STARTED))) {
DEBUG("Switching tx_dev to preferred sub_device");
PRIV(dev)->subs_tx = 0;
}
- } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) ||
- TX_SUBDEV(dev) == NULL) {
+ } else if ((txd && txd->state < req_state) ||
+ txd == NULL ||
+ txd == banned) {
struct sub_device *sdev;
uint8_t i;

/* Using acceptable device */
FOREACH_SUBDEV_ST(sdev, i, dev, req_state) {
+ if (sdev == banned)
+ continue;
DEBUG("Switching tx_dev to sub_device %d",
i);
PRIV(dev)->subs_tx = i;
break;
}
- } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) {
+ } else if (txd && txd->state < req_state) {
DEBUG("No device ready, deactivating tx_dev");
PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
} else {
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index c15025f..82a8c4e 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -33,6 +33,7 @@

#include <assert.h>

+#include <rte_atomic.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>

@@ -113,8 +114,10 @@ failsafe_rx_burst(void *queue,
if (unlikely(fs_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -147,8 +150,10 @@ failsafe_rx_burst_fast(void *queue,
sdev = &priv->subs[i];
assert(!fs_rx_unsafe(sdev));
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+ FS_ATOMIC_P(rxq->refcnt[sdev->sid]);
nb_rx = ETH(sdev)->
rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+ FS_ATOMIC_V(rxq->refcnt[sdev->sid]);
if (nb_rx) {
rxq->last_polled = i;
return nb_rx;
@@ -165,13 +170,17 @@ failsafe_tx_burst(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
if (unlikely(fs_tx_unsafe(sdev)))
return 0;
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}

uint16_t
@@ -182,10 +191,14 @@ failsafe_tx_burst_fast(void *queue,
struct sub_device *sdev;
struct txq *txq;
void *sub_txq;
+ uint16_t nb_tx;

txq = queue;
sdev = TX_SUBDEV(txq->priv->dev);
assert(!fs_tx_unsafe(sdev));
sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
- return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_P(txq->refcnt[sdev->sid]);
+ nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts);
+ FS_ATOMIC_V(txq->refcnt[sdev->sid]);
+ return nb_tx;
}
--
2.1.4
Gaetan Rivet
2017-07-07 00:09:38 UTC
Permalink
Listen to INTR_RMV events issued by slaves.
Add atomic flags on slave queues to detect use of slave bursts function.
If a removal is detected, set the recollection flag on this slave.

During a slave upkeep round, if its recollection flag is set and its
burst functions are not in use by any thread, remove that slave.

Signed-off-by: Gaetan Rivet <***@6wind.com>
Acked-by: Olga Shern <***@mellanox.com>
---
doc/guides/nics/fail_safe.rst | 14 +++++
drivers/net/failsafe/failsafe.c | 1 +
drivers/net/failsafe/failsafe_args.c | 13 +++++
drivers/net/failsafe/failsafe_eal.c | 3 +-
drivers/net/failsafe/failsafe_ether.c | 96 ++++++++++++++++++++++++++++++---
drivers/net/failsafe/failsafe_ops.c | 38 +++++++++++--
drivers/net/failsafe/failsafe_private.h | 72 ++++++++++++++++++++++---
drivers/net/failsafe/failsafe_rxtx.c | 17 +++++-
8 files changed, 234 insertions(+), 20 deletions(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 1b6e110..4154f0a 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a
device underneath the Fail-safe PMD with a specific feature, this feature must
be supported by the Fail-safe PMD to avoid throwing any error.

+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
Check the feature matrix for the complete set of supported features.

Compilation options
@@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to
apply it upon the probing of its missing sub-device. After this configuration
pass, the new sub-device will be synchronized with other sub-devices, i.e. be
started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+----------------
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6557255..4d35860 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg)
if (!PRIV(dev)->pending_alarm)
return;
PRIV(dev)->pending_alarm = 0;
+ failsafe_dev_remove(dev);
FOREACH_SUBDEV(sdev, i, dev)
if (sdev->state != PRIV(dev)->state)
break;
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index c723ca3..dd55aaf 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
}

+static int
+fs_parse_sub_device(struct sub_device *sdev)
+{
+ struct rte_devargs *da;
+ char devstr[DEVARGS_MAXLEN] = "";
+
+ da = &sdev->devargs;
+ snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args);
+ return fs_parse_device(sdev, devstr);
+}
+
int
failsafe_args_parse_subs(struct rte_eth_dev *dev)
{
@@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = fs_execute_cmd(sdev, sdev->cmdline);
+ else
+ ret = fs_parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 86e16a6..3321dda 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -79,6 +79,7 @@ fs_bus_init(struct rte_eth_dev *dev)
return -ENODEV;
}
SUB_ID(sdev) = i;
+ sdev->fs_dev = dev;
sdev->dev = ETH(sdev)->device;
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
sdev->state = DEV_PROBED;
@@ -96,7 +97,7 @@ failsafe_eal_init(struct rte_eth_dev *dev)
return ret;
if (PRIV(dev)->state < DEV_PROBED)
PRIV(dev)->state = DEV_PROBED;
- fs_switch_dev(dev);
+ fs_switch_dev(dev, NULL);
return 0;
}

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 2958207..ea3105c 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -250,6 +250,67 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
}

+static void
+fs_dev_remove(struct sub_device *sdev)
+{
+ int ret;
+
+ if (sdev == NULL)
+ return;
+ switch (sdev->state) {
+ case DEV_STARTED:
+ rte_eth_dev_stop(PORT_ID(sdev));
+ sdev->state = DEV_ACTIVE;
+ /* fallthrough */
+ case DEV_ACTIVE:
+ rte_eth_dev_close(PORT_ID(sdev));
+ sdev->state = DEV_PROBED;
+ /* fallthrough */
+ case DEV_PROBED:
+ ret = rte_eal_hotplug_remove(sdev->bus->name,
+ sdev->dev->name);
+ if (ret) {
+ ERROR("Bus detach failed for sub_device %u",
+ SUB_ID(sdev));
+ } else {
+ ETH(sdev)->state = RTE_ETH_DEV_UNUSED;
+ }
+ sdev->state = DEV_PARSED;
+ /* fallthrough */
+ case DEV_PARSED:
+ case DEV_UNDEFINED:
+ sdev->state = DEV_UNDEFINED;
+ /* the end */
+ break;
+ }
+ failsafe_hotplug_alarm_install(sdev->fs_dev);
+}
+
+static inline int
+fs_rxtx_clean(struct sub_device *sdev)
+{
+ uint16_t i;
+
+ for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
+ if (FS_ATOMIC_RX(sdev, i))
+ return 0;
+ for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
+ if (FS_ATOMIC_TX(sdev, i))
+ return 0;
+ return 1;
+}
+
+void
+failsafe_dev_remove(struct rte_eth_dev *dev)
+{
+ struct sub_device *sdev;
+ uint8_t i;
+
+ FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE)
+ if (sdev->remove && fs_rxtx_clean(sdev))
+ fs_dev_remove(sdev);
+}
+
int
failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
{
@@ -263,13 +324,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)

ret = failsafe_args_parse_subs(dev);
if (ret)
- return ret;
+ goto err_remove;

if (PRIV(dev)->state < DEV_PROBED)
return 0;
ret = failsafe_eal_init(dev);
if (ret)
- return ret;
+ goto err_remove;
if (PRIV(dev)->state < DEV_ACTIVE)
return 0;
inactive = 0;
@@ -278,15 +339,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
inactive |= UINT32_C(1) << i;
ret = dev->dev_ops->dev_configure(dev);
if (ret)
- return ret;
+ goto err_remove;
FOREACH_SUBDEV(sdev, i, dev) {
if (inactive & (UINT32_C(1) << i)) {
ret = fs_eth_dev_conf_apply(dev, sdev);
if (ret) {
ERROR("Could not apply configuration to sub_device %d",
i);
- /* TODO: disable device */
- return ret;
+ goto err_remove;
}
}
}
@@ -300,6 +360,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
return 0;
ret = dev->dev_ops->dev_start(dev);
if (ret)
- return ret;
+ goto err_remove;
+ return 0;
+err_remove:
+ FOREACH_SUBDEV(sdev, i, dev)
+ if (sdev->state != PRIV(dev)->state)
+ sdev->remove = 1;
+ return ret;
+}
+
+int
+failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+ enum rte_eth_event_type event __rte_unused,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct sub_device *sdev = cb_arg;
+
+ /* Switch as soon as possible tx_dev. */
+ fs_switch_dev(sdev->fs_dev, sdev);
+ /* Use safe bursts in any case. */
+ set_burst_fn(sdev->fs_dev, 1);
+ /*
+ * Async removal, the sub-PMD will try to unregister
+ * the callback at the source of the current thread context.
+ */
+ sdev->remove = 1;
return 0;
}
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 5fb0135..2e1c798 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -33,6 +33,8 @@

#include <assert.h>
#include <stdint.h>
+
+#include <rte_atomic.h>
#