Discussion:
[dpdk-dev] [PATCH RFC 3/3] examples: example showing use of callbacks.
(too old to reply)
Bruce Richardson
2014-12-22 16:47:24 UTC
Permalink
Rough example showing how callbacks can be used to insert a timestamp
into each packet on RX. Then on TX the timestamp is used to calculate
the packet latency in cycles through the app.

Signed-off-by: Bruce Richardson <***@intel.com>
---
examples/rxtx_callbacks/Makefile | 57 ++++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 ++++++++
3 files changed, 325 insertions(+)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h

diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..4a5d99f
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = basicfwd
+
+# all source are stored in SRCS-y
+SRCS-y := basicfwd.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/basicfwd.c b/examples/rxtx_callbacks/basicfwd.c
new file mode 100644
index 0000000..0209bf4
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include "basicfwd.h"
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+MAIN(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
diff --git a/examples/rxtx_callbacks/basicfwd.h b/examples/rxtx_callbacks/basicfwd.h
new file mode 100644
index 0000000..3797b5d
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.h
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BASICFWD_H
+#define BASICFWD_H
+
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#define MAIN _main
+#else
+#define MAIN main
+#endif
+
+int MAIN(int argc, char *argv[]);
+
+#endif /* BASICFWD_H */
--
1.9.3
Bruce Richardson
2014-12-22 16:47:22 UTC
Permalink
The callbacks member of the rte_eth_dev structure has been renamed
to intr_cbs to make it clear that it refers to callbacks from NIC
interrupts. This then allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index ade6cb0..8845ba8 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 95f2ceb..87a5323 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

/*
* Set the default MTU.
@@ -2697,7 +2697,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2711,7 +2711,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2738,7 +2738,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2752,7 +2752,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2771,7 +2771,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index f66805d..c76a5d0 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1564,7 +1564,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index ef5ddf4..01e8f21 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -247,7 +247,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
--
1.9.3
Bruce Richardson
2014-12-22 16:47:23 UTC
Permalink
Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
---
lib/librte_ether/rte_ethdev.c | 91 ++++++++++++++++++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 210 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 87a5323..882507b 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,15 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+ dev->rx_cbs = rte_zmalloc("ethdev->rx_cbs",
+ sizeof(*dev->rx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -348,10 +357,18 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -(ENOMEM);
+ dev->rx_cbs = rte_realloc(dev->rx_cbs, sizeof(*dev->rx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+ memset(dev->rx_cbs + old_nb_queues, 0,
+ sizeof(dev->rx_cbs[0]) * new_qs);
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +496,15 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+ dev->tx_cbs = rte_zmalloc("ethdev->tx_cbs",
+ sizeof(*dev->tx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -490,10 +516,19 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -(ENOMEM);
+ dev->tx_cbs = rte_realloc(dev->tx_cbs, sizeof(*dev->tx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL)
+ return -(ENOMEM);
+

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+ memset(dev->tx_cbs + old_nb_queues, 0,
+ sizeof(dev->tx_cbs[0]) * new_qs);
+ }

dev->data->tx_queues = txq;

@@ -3269,3 +3304,51 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].rx_cbs[queue_id];
+ rte_eth_devices[port_id].rx_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].tx_cbs[queue_id];
+ rte_eth_devices[port_id].tx_cbs[queue_id] = cb;
+ return cb;
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index c76a5d0..a5fa2a7 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1548,6 +1548,47 @@ struct eth_dev_ops {
};

/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+
+/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1564,7 +1605,21 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
+
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list intr_cbs;
+
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **rx_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **tx_cbs;
};

struct rte_eth_dev_sriov {
@@ -2417,7 +2472,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ if (unlikely(dev->rx_cbs[queue_id] != NULL)) {
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif

@@ -2544,6 +2609,14 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+ if (unlikely(dev->tx_cbs[queue_id] != NULL)) {
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3753,6 +3826,54 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+/**
+ * Add a callback to be called on packet RX on a given port and queue
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback() API.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback() API.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
#ifdef __cplusplus
}
#endif
--
1.9.3
Thomas Monjalon
2014-12-22 17:02:53 UTC
Permalink
Hi Bruce,

Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
Having callbacks for drivers seems strange to me.
If drivers need to accomplish some tasks, they do it by implementing an
ethdev service. New services are declared for new needs.
Callbacks are the reverse logic. Why should it be needed?
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
It would be good to have more performance tests with different configurations.

Thanks
--
Thomas
Bruce Richardson
2014-12-22 17:33:07 UTC
Permalink
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
The reason for using per-queue callbacks is that I think we can do more with
it that way. For instance, if we want to do some additional processing or
calculations on only IP traffic, then we can use hardware offloads on most
NICs to steer the IP traffic to a separate queue and only apply the callbacks
to that queue. If the performance is the same, I think we should therefore keep
the per-queue version.
Post by Thomas Monjalon
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
Having callbacks for drivers seems strange to me.
If drivers need to accomplish some tasks, they do it by implementing an
ethdev service. New services are declared for new needs.
Callbacks are the reverse logic. Why should it be needed?
Typo, I meant for applications! Drivers don't need them indeed.
Post by Thomas Monjalon
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
It would be good to have more performance tests with different configurations.
Sure, if you have ideals for specific tests you'd like to see I'll try and
get some numbers. What I did look as was the performance impact for this patch
without actually putting in place any callbacks, and the worst-case here is
hardly noticable. For an empty callback, i.e. the pure callback overhead, the
performance should still be in low single-digit percentages, but I'll test to
confirm that. For other slower RX and TX paths, e.g. those using scattered
packets, or with TX offloads, the performance impact will be even less.

Regards,
/Bruce
Post by Thomas Monjalon
Thanks
--
Thomas
Neil Horman
2014-12-22 17:47:09 UTC
Permalink
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
I was going to ask about exactly that. You say no one will be adding/removing
callbacks between RX/TX calls, but you don't know that, people will try to do so
at some point. You should add a check so that callbacks can only be
registered/unregistered on stopped queues, otherwise this is extreemely racy.
it won't impact performance to do so, and will save a good deal of debugging
down the road at some point.

Neil
Bruce Richardson
2014-12-23 09:28:08 UTC
Permalink
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
I was going to ask about exactly that. You say no one will be adding/removing
callbacks between RX/TX calls, but you don't know that, people will try to do so
at some point. You should add a check so that callbacks can only be
registered/unregistered on stopped queues, otherwise this is extreemely racy.
it won't impact performance to do so, and will save a good deal of debugging
down the road at some point.
Neil
Actually, I think it's worthwhile being able to do exactly that - add/remove
callbacks on the fly, if possible. Doing the add in a race-free manner is probably
easy enough, but doing the delete may well be more tricky. For now, though,
it might indeed be as well to limit it to stopped queues.
If we can do the dynamic add/remove of callbacks, then that can lead to all
sorts of interesting runtime instrumentation possibilities using multiprocess
support.

/Bruce
Neil Horman
2014-12-23 13:09:37 UTC
Permalink
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
I was going to ask about exactly that. You say no one will be adding/removing
callbacks between RX/TX calls, but you don't know that, people will try to do so
at some point. You should add a check so that callbacks can only be
registered/unregistered on stopped queues, otherwise this is extreemely racy.
it won't impact performance to do so, and will save a good deal of debugging
down the road at some point.
Neil
Actually, I think it's worthwhile being able to do exactly that - add/remove
callbacks on the fly, if possible. Doing the add in a race-free manner is probably
easy enough, but doing the delete may well be more tricky. For now, though,
it might indeed be as well to limit it to stopped queues.
If we can do the dynamic add/remove of callbacks, then that can lead to all
sorts of interesting runtime instrumentation possibilities using multiprocess
support.
/Bruce
Well, ok, but if you want to be able to do dynamic addition/removal of
callbacks, then you either need to stop the queue, or provide locking around the
list traversal and modification points. All you have right now is multiple
contexts hitting the list at the same time, and thats going to break.

Neil
Bruce Richardson
2014-12-23 14:09:41 UTC
Permalink
Post by Neil Horman
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
I was going to ask about exactly that. You say no one will be adding/removing
callbacks between RX/TX calls, but you don't know that, people will try to do so
at some point. You should add a check so that callbacks can only be
registered/unregistered on stopped queues, otherwise this is extreemely racy.
it won't impact performance to do so, and will save a good deal of debugging
down the road at some point.
Neil
Actually, I think it's worthwhile being able to do exactly that - add/remove
callbacks on the fly, if possible. Doing the add in a race-free manner is probably
easy enough, but doing the delete may well be more tricky. For now, though,
it might indeed be as well to limit it to stopped queues.
If we can do the dynamic add/remove of callbacks, then that can lead to all
sorts of interesting runtime instrumentation possibilities using multiprocess
support.
/Bruce
Well, ok, but if you want to be able to do dynamic addition/removal of
callbacks, then you either need to stop the queue, or provide locking around the
list traversal and modification points. All you have right now is multiple
contexts hitting the list at the same time, and thats going to break.
Neil
Yep, well aware of that. It's still a work in progress, hence the RFC part. :-)
I'll finish off the basics first, and then see what can be done in terms of
either thread safety or just restricting usage to when queues are stopped.
More polished patches hopefully to follow in the new year.

/Bruce
Bruce Richardson
2015-01-05 16:17:56 UTC
Permalink
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Callbacks, as hooks for applications, give more flexibility and are
generally a good idea.
In DPDK the main issue will be to avoid performance degradation.
I see you use "unlikely" for callback branching.
Could we reduce more the impact of this test by removing the queue array,
i.e. having port-wide callbacks instead of per-queue callbacks?
I can give that a try, but I don't see it making much difference if any. The
main thing to avoid with branching is branch mis-prediction, which should not
be a problem here, as the user is not going to be adding or removing callbacks
between each RX and TX call, making the branches highly predictable - i.e. always
go the same way.
The reason for using per-queue callbacks is that I think we can do more with
it that way. For instance, if we want to do some additional processing or
calculations on only IP traffic, then we can use hardware offloads on most
NICs to steer the IP traffic to a separate queue and only apply the callbacks
to that queue. If the performance is the same, I think we should therefore keep
the per-queue version.
I tried just using a single callback pointer per port, instead of per-queue.
Performance looked the same for the no-callback case, the case of an empty
callback function, and the test application with callbacks for latency
computation.
Post by Bruce Richardson
Post by Thomas Monjalon
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
Having callbacks for drivers seems strange to me.
If drivers need to accomplish some tasks, they do it by implementing an
ethdev service. New services are declared for new needs.
Callbacks are the reverse logic. Why should it be needed?
Typo, I meant for applications! Drivers don't need them indeed.
Post by Thomas Monjalon
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
It would be good to have more performance tests with different configurations.
Sure, if you have ideals for specific tests you'd like to see I'll try and
get some numbers. What I did look as was the performance impact for this patch
without actually putting in place any callbacks, and the worst-case here is
hardly noticable. For an empty callback, i.e. the pure callback overhead, the
performance should still be in low single-digit percentages, but I'll test to
confirm that. For other slower RX and TX paths, e.g. those using scattered
packets, or with TX offloads, the performance impact will be even less.
I modified the sample app in patch 3 of this set to have empty callbacks on
both RX and TX, and the performance impact vs the non-callback case was again
about 1% only. [Again using the vector RX/TX fastpath functions, with 4 ports
handled by a single lcore].

Regards,
/Bruce
Stephen Hemminger
2014-12-22 18:31:57 UTC
Permalink
On Mon, 22 Dec 2014 16:47:21 +0000
This RFC is for a small addition to the ethdev library, to add in support for
callbacks at the RX and TX stages. This allows packet processing to be done on
packets before they get returned to applications using rte_eth_rx_burst call.
Use case: the first use case for this is to enable a consistent set of
packets mbufs to be received by applications irrespective of the NIC used
to receive those. For example, for a port type that does not support RSS,
a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs
as they are received, such as timestamps or sequence numbers, without cluttering
up the main packet processing path with checks for whether packets have these
fields filled in or not.
A second use case is ease of intrumenting existing code. The example application
shows how combining a timestamp insertion callback on RX can be paired with a
latency calculation callback on TX to easily instrument any application for
packet latency.
A third use case is to potentially extend existing NIC capabilities beyond
what is currently supported. For example, where flow director capabilities
can match up to a certain limit of flows - in the thousands, in the case of
NICs using the ixgbe driver - a callback can extend this to potentially
millions of flows by using a software hash table lookup inline for packets
that missing the hardware lookup filters. It would all appear transparent
to the packet handling code in the main application.
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
What about SMP safety? The callback list is not thread safe.
Do you plan to start integrating with an RCU framework like userspace RCU?
Bruce Richardson
2014-12-23 09:29:09 UTC
Permalink
Post by Stephen Hemminger
On Mon, 22 Dec 2014 16:47:21 +0000
This RFC is for a small addition to the ethdev library, to add in support for
callbacks at the RX and TX stages. This allows packet processing to be done on
packets before they get returned to applications using rte_eth_rx_burst call.
Use case: the first use case for this is to enable a consistent set of
packets mbufs to be received by applications irrespective of the NIC used
to receive those. For example, for a port type that does not support RSS,
a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs
as they are received, such as timestamps or sequence numbers, without cluttering
up the main packet processing path with checks for whether packets have these
fields filled in or not.
A second use case is ease of intrumenting existing code. The example application
shows how combining a timestamp insertion callback on RX can be paired with a
latency calculation callback on TX to easily instrument any application for
packet latency.
A third use case is to potentially extend existing NIC capabilities beyond
what is currently supported. For example, where flow director capabilities
can match up to a certain limit of flows - in the thousands, in the case of
NICs using the ixgbe driver - a callback can extend this to potentially
millions of flows by using a software hash table lookup inline for packets
that missing the hardware lookup filters. It would all appear transparent
to the packet handling code in the main application.
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
What about SMP safety? The callback list is not thread safe.
Do you plan to start integrating with an RCU framework like userspace RCU?
I hadn't considered multiple CPUs adding removing callbacks simultaneously. I'll
look into it.

/Bruce
Vithal S Mohare
2014-12-23 04:23:21 UTC
Permalink
Hi Bruce,

<snip>
For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
</snip>

Wondering if this callback will also be useful to bridge the gap of no RSS support for L2 packets. i.e. in the rx call-back handler, can applications calculate hash and feed it back so that spraying happens based on this? Now, all pure L2 packets (e.g. arp pkts) comes to rx-q 0 of the 'port'. Adding callback to [port][rx-q:0] would help?

Thanks,
-Vithal

-----Original Message-----
From: dev [mailto:dev-***@dpdk.org] On Behalf Of Bruce Richardson
Sent: Monday, December 22, 2014 10:17 PM
To: ***@dpdk.org
Subject: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support

This RFC is for a small addition to the ethdev library, to add in support for callbacks at the RX and TX stages. This allows packet processing to be done on packets before they get returned to applications using rte_eth_rx_burst call.

Use case: the first use case for this is to enable a consistent set of packets mbufs to be received by applications irrespective of the NIC used to receive those. For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs as they are received, such as timestamps or sequence numbers, without cluttering up the main packet processing path with checks for whether packets have these fields filled in or not.
A second use case is ease of intrumenting existing code. The example application shows how combining a timestamp insertion callback on RX can be paired with a latency calculation callback on TX to easily instrument any application for packet latency.
A third use case is to potentially extend existing NIC capabilities beyond what is currently supported. For example, where flow director capabilities can match up to a certain limit of flows - in the thousands, in the case of NICs using the ixgbe driver - a callback can extend this to potentially millions of flows by using a software hash table lookup inline for packets that missing the hardware lookup filters. It would all appear transparent to the packet handling code in the main application.

Future extensions: in future the ethdev library can be extended to provide a standard set of callbacks for use by drivers.

For now this patch set is RFC and still needs additional work for creating a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some performance tests using testpmd with the ixgbe vector drivers (i.e. the fastest, fast-path we have :-) ). Performance drops due to this patch seems minimal to non-existant, rough tests on my system indicate a drop of perhaps 1%.

All feedback welcome.

Bruce Richardson (3):
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.

app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-) create mode 100644 examples/rxtx_callbacks/Makefile create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h

--
1.9.3
Bruce Richardson
2014-12-23 09:30:15 UTC
Permalink
Post by Thomas Monjalon
Hi Bruce,
<snip>
For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
</snip>
Wondering if this callback will also be useful to bridge the gap of no RSS support for L2 packets. i.e. in the rx call-back handler, can applications calculate hash and feed it back so that spraying happens based on this? Now, all pure L2 packets (e.g. arp pkts) comes to rx-q 0 of the 'port'. Adding callback to [port][rx-q:0] would help?
Thanks,
-Vithal
Yes, that could work. The downside is that it is no faster than having an
app do the calculation itself, it's just perhaps a little easier to work with
in the app.

/Bruce
Post by Thomas Monjalon
-----Original Message-----
Sent: Monday, December 22, 2014 10:17 PM
Subject: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
This RFC is for a small addition to the ethdev library, to add in support for callbacks at the RX and TX stages. This allows packet processing to be done on packets before they get returned to applications using rte_eth_rx_burst call.
Use case: the first use case for this is to enable a consistent set of packets mbufs to be received by applications irrespective of the NIC used to receive those. For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs as they are received, such as timestamps or sequence numbers, without cluttering up the main packet processing path with checks for whether packets have these fields filled in or not.
A second use case is ease of intrumenting existing code. The example application shows how combining a timestamp insertion callback on RX can be paired with a latency calculation callback on TX to easily instrument any application for packet latency.
A third use case is to potentially extend existing NIC capabilities beyond what is currently supported. For example, where flow director capabilities can match up to a certain limit of flows - in the thousands, in the case of NICs using the ixgbe driver - a callback can extend this to potentially millions of flows by using a software hash table lookup inline for packets that missing the hardware lookup filters. It would all appear transparent to the packet handling code in the main application.
Future extensions: in future the ethdev library can be extended to provide a standard set of callbacks for use by drivers.
For now this patch set is RFC and still needs additional work for creating a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some performance tests using testpmd with the ixgbe vector drivers (i.e. the fastest, fast-path we have :-) ). Performance drops due to this patch seems minimal to non-existant, rough tests on my system indicate a drop of perhaps 1%.
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-) create mode 100644 examples/rxtx_callbacks/Makefile create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
--
1.9.3
Vithal S Mohare
2014-12-23 09:37:32 UTC
Permalink
Agree. As the mbuf is already received in the rx-q, may not yield great advantage.
On side note, any plans to support RSS for L2 packets ?

-----Original Message-----
From: Bruce Richardson [mailto:***@intel.com]
Sent: Tuesday, December 23, 2014 3:00 PM
To: Vithal S Mohare
Cc: ***@dpdk.org
Subject: Re: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
Post by Thomas Monjalon
Hi Bruce,
<snip>
For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
</snip>
Wondering if this callback will also be useful to bridge the gap of no RSS support for L2 packets. i.e. in the rx call-back handler, can applications calculate hash and feed it back so that spraying happens based on this? Now, all pure L2 packets (e.g. arp pkts) comes to rx-q 0 of the 'port'. Adding callback to [port][rx-q:0] would help?
Thanks,
-Vithal
Yes, that could work. The downside is that it is no faster than having an app do the calculation itself, it's just perhaps a little easier to work with in the app.

/Bruce
Post by Thomas Monjalon
-----Original Message-----
Sent: Monday, December 22, 2014 10:17 PM
Subject: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
This RFC is for a small addition to the ethdev library, to add in support for callbacks at the RX and TX stages. This allows packet processing to be done on packets before they get returned to applications using rte_eth_rx_burst call.
Use case: the first use case for this is to enable a consistent set of packets mbufs to be received by applications irrespective of the NIC used to receive those. For example, for a port type that does not support RSS, a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs as they are received, such as timestamps or sequence numbers, without cluttering up the main packet processing path with checks for whether packets have these fields filled in or not.
A second use case is ease of intrumenting existing code. The example application shows how combining a timestamp insertion callback on RX can be paired with a latency calculation callback on TX to easily instrument any application for packet latency.
A third use case is to potentially extend existing NIC capabilities beyond what is currently supported. For example, where flow director capabilities can match up to a certain limit of flows - in the thousands, in the case of NICs using the ixgbe driver - a callback can extend this to potentially millions of flows by using a software hash table lookup inline for packets that missing the hardware lookup filters. It would all appear transparent to the packet handling code in the main application.
Future extensions: in future the ethdev library can be extended to provide a standard set of callbacks for use by drivers.
For now this patch set is RFC and still needs additional work for creating a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some performance tests using testpmd with the ixgbe vector drivers (i.e. the fastest, fast-path we have :-) ). Performance drops due to this patch seems minimal to non-existant, rough tests on my system indicate a drop of perhaps 1%.
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-) create mode
100644 examples/rxtx_callbacks/Makefile create mode 100644
examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
--
1.9.3
Zhang, Helin
2014-12-24 01:43:46 UTC
Permalink
For L2 RSS, I think i40e hardware supports it, which will be enabled soon later.

Regards,
Helin
Post by Vithal S Mohare
-----Original Message-----
Sent: Tuesday, December 23, 2014 5:38 PM
To: Richardson, Bruce
Subject: Re: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
Agree. As the mbuf is already received in the rx-q, may not yield great advantage.
On side note, any plans to support RSS for L2 packets ?
-----Original Message-----
Sent: Tuesday, December 23, 2014 3:00 PM
To: Vithal S Mohare
Subject: Re: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
Post by Thomas Monjalon
Hi Bruce,
<snip>
For example, for a port type that does not support RSS, a callback on RX can
be configured to calculate a hash in software.
Post by Thomas Monjalon
</snip>
Wondering if this callback will also be useful to bridge the gap of no RSS
support for L2 packets. i.e. in the rx call-back handler, can applications
calculate hash and feed it back so that spraying happens based on this? Now,
all pure L2 packets (e.g. arp pkts) comes to rx-q 0 of the 'port'. Adding
callback to [port][rx-q:0] would help?
Post by Thomas Monjalon
Thanks,
-Vithal
Yes, that could work. The downside is that it is no faster than having an app do
the calculation itself, it's just perhaps a little easier to work with in the app.
/Bruce
Post by Thomas Monjalon
-----Original Message-----
Sent: Monday, December 22, 2014 10:17 PM
Subject: [dpdk-dev] [PATCH RFC 0/3] DPDK ethdev callback support
This RFC is for a small addition to the ethdev library, to add in support for
callbacks at the RX and TX stages. This allows packet processing to be done on
packets before they get returned to applications using rte_eth_rx_burst call.
Post by Thomas Monjalon
Use case: the first use case for this is to enable a consistent set of packets
mbufs to be received by applications irrespective of the NIC used to receive
those. For example, for a port type that does not support RSS, a callback on RX
can be configured to calculate a hash in software.
Post by Thomas Monjalon
Similarly, this mechanism can be used to add other information to mbufs as
they are received, such as timestamps or sequence numbers, without
cluttering up the main packet processing path with checks for whether packets
have these fields filled in or not.
Post by Thomas Monjalon
A second use case is ease of intrumenting existing code. The example
application shows how combining a timestamp insertion callback on RX can be
paired with a latency calculation callback on TX to easily instrument any
application for packet latency.
Post by Thomas Monjalon
A third use case is to potentially extend existing NIC capabilities beyond what
is currently supported. For example, where flow director capabilities can match
up to a certain limit of flows - in the thousands, in the case of NICs using the
ixgbe driver - a callback can extend this to potentially millions of flows by using
a software hash table lookup inline for packets that missing the hardware
lookup filters. It would all appear transparent to the packet handling code in
the main application.
Post by Thomas Monjalon
Future extensions: in future the ethdev library can be extended to provide a
standard set of callbacks for use by drivers.
Post by Thomas Monjalon
For now this patch set is RFC and still needs additional work for creating a
remove function for callbacks and to add in additional testing code.
Post by Thomas Monjalon
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the fastest,
fast-path we have :-) ). Performance drops due to this patch seems minimal to
non-existant, rough tests on my system indicate a drop of perhaps 1%.
Post by Thomas Monjalon
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222
+++++++++++++++++++++++++++++++++
Post by Thomas Monjalon
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-) create mode
100644 examples/rxtx_callbacks/Makefile create mode 100644
examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
--
1.9.3
Qiu, Michael
2014-12-24 05:06:40 UTC
Permalink
Hi Bruce,

I haven't seen the third patch of you patch set.

Just want to know if only me missed.

Thanks,
Michael
This RFC is for a small addition to the ethdev library, to add in support for
callbacks at the RX and TX stages. This allows packet processing to be done on
packets before they get returned to applications using rte_eth_rx_burst call.
Use case: the first use case for this is to enable a consistent set of
packets mbufs to be received by applications irrespective of the NIC used
to receive those. For example, for a port type that does not support RSS,
a callback on RX can be configured to calculate a hash in software.
Similarly, this mechanism can be used to add other information to mbufs
as they are received, such as timestamps or sequence numbers, without cluttering
up the main packet processing path with checks for whether packets have these
fields filled in or not.
A second use case is ease of intrumenting existing code. The example application
shows how combining a timestamp insertion callback on RX can be paired with a
latency calculation callback on TX to easily instrument any application for
packet latency.
A third use case is to potentially extend existing NIC capabilities beyond
what is currently supported. For example, where flow director capabilities
can match up to a certain limit of flows - in the thousands, in the case of
NICs using the ixgbe driver - a callback can extend this to potentially
millions of flows by using a software hash table lookup inline for packets
that missing the hardware lookup filters. It would all appear transparent
to the packet handling code in the main application.
Future extensions: in future the ethdev library can be extended to provide
a standard set of callbacks for use by drivers.
For now this patch set is RFC and still needs additional work for creating
a remove function for callbacks and to add in additional testing code.
Since this adds in new code into the critical data path, I have run some
performance tests using testpmd with the ixgbe vector drivers (i.e. the
fastest, fast-path we have :-) ). Performance drops due to this patch
seems minimal to non-existant, rough tests on my system indicate a drop
of perhaps 1%.
All feedback welcome.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 +++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 103 +++++++++++++--
lib/librte_ether/rte_ethdev.h | 125 ++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 543 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
John McNamara
2015-02-12 19:57:55 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The callbacks member of the rte_eth_dev structure has been renamed
to intr_cbs to make it clear that it refers to callbacks from NIC
interrupts. This then allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..ec2474f 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..e4b3315 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

/*
* Set the default MTU.
@@ -2738,7 +2738,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2752,7 +2752,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2779,7 +2779,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2793,7 +2793,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2812,7 +2812,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9c67488 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1538,7 +1538,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..4a66609 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
--
1.7.4.1
John McNamara
2015-02-12 19:57:57 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Rough example showing how callbacks can be used to insert a timestamp
into each packet on RX. Then on TX the timestamp is used to calculate
the packet latency in cycles through the app.

Signed-off-by: Bruce Richardson <***@intel.com>
---
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 ++++++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 ++++++++
3 files changed, 325 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h

diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..4a5d99f
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = basicfwd
+
+# all source are stored in SRCS-y
+SRCS-y := basicfwd.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/basicfwd.c b/examples/rxtx_callbacks/basicfwd.c
new file mode 100644
index 0000000..0209bf4
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include "basicfwd.h"
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+MAIN(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
diff --git a/examples/rxtx_callbacks/basicfwd.h b/examples/rxtx_callbacks/basicfwd.h
new file mode 100644
index 0000000..3797b5d
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.h
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BASICFWD_H
+#define BASICFWD_H
+
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#define MAIN _main
+#else
+#define MAIN main
+#endif
+
+int MAIN(int argc, char *argv[]);
+
+#endif /* BASICFWD_H */
--
1.7.4.1
John McNamara
2015-02-12 19:57:56 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
---
lib/librte_ether/rte_ethdev.c | 165 +++++++++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 175 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 334 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e4b3315..944737e 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,15 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+ dev->rx_cbs = rte_zmalloc("ethdev->rx_cbs",
+ sizeof(*dev->rx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -348,10 +357,18 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -(ENOMEM);
+ dev->rx_cbs = rte_realloc(dev->rx_cbs, sizeof(*dev->rx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+ memset(dev->rx_cbs + old_nb_queues, 0,
+ sizeof(dev->rx_cbs[0]) * new_qs);
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +496,15 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+ dev->tx_cbs = rte_zmalloc("ethdev->tx_cbs",
+ sizeof(*dev->tx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -490,10 +516,19 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -(ENOMEM);
+ dev->tx_cbs = rte_realloc(dev->tx_cbs, sizeof(*dev->tx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL)
+ return -(ENOMEM);
+

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+ memset(dev->tx_cbs + old_nb_queues, 0,
+ sizeof(dev->tx_cbs[0]) * new_qs);
+ }

dev->data->tx_queues = txq;

@@ -3253,3 +3288,125 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].rx_cbs[queue_id];
+ rte_eth_devices[port_id].rx_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].tx_cbs[queue_id];
+ rte_eth_devices[port_id].tx_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ if (cb == user_cb) {
+ /* Reset head pointer and remove user cb if first in the list. */
+ dev->rx_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+ else {
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+ }
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ if (cb == user_cb) {
+ /* Reset head pointer and remove user cb if first in the list. */
+ dev->tx_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+ else {
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+ }
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 9c67488..def7105 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,47 @@ struct eth_dev_ops {
};

/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+
+/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1538,7 +1579,21 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
+
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list intr_cbs;
+
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **rx_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **tx_cbs;
};

struct rte_eth_dev_sriov {
@@ -2390,7 +2445,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif

@@ -2517,6 +2582,14 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3664,6 +3737,104 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+/**
+ * Add a callback to be called on packet RX on a given port and queue
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback() API.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback() API.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't
+ * freed. That should be done in a separate step using rte_free() when
+ * the port is stopped or when the callback is no longer in use.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't
+ * freed. That should be done in a separate step using rte_free() when
+ * the port is stopped or when the callback is no longer in use.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
#ifdef __cplusplus
}
#endif
--
1.7.4.1
Neil Horman
2015-02-12 21:12:15 UTC
Permalink
Post by Bruce Richardson
Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
---
lib/librte_ether/rte_ethdev.c | 165 +++++++++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 175 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 334 insertions(+), 6 deletions(-)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e4b3315..944737e 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,15 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+ dev->rx_cbs = rte_zmalloc("ethdev->rx_cbs",
+ sizeof(*dev->rx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);
@@ -348,10 +357,18 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -(ENOMEM);
+ dev->rx_cbs = rte_realloc(dev->rx_cbs, sizeof(*dev->rx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL)
+ return -(ENOMEM);
- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+ memset(dev->rx_cbs + old_nb_queues, 0,
+ sizeof(dev->rx_cbs[0]) * new_qs);
+ }
dev->data->rx_queues = rxq;
@@ -479,6 +496,15 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+ dev->tx_cbs = rte_zmalloc("ethdev->tx_cbs",
+ sizeof(*dev->tx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);
@@ -490,10 +516,19 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -(ENOMEM);
+ dev->tx_cbs = rte_realloc(dev->tx_cbs, sizeof(*dev->tx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL)
+ return -(ENOMEM);
+
- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+ memset(dev->tx_cbs + old_nb_queues, 0,
+ sizeof(dev->tx_cbs[0]) * new_qs);
+ }
dev->data->tx_queues = txq;
@@ -3253,3 +3288,125 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
These, and its companion manipulator functions need to be added to the ABI
versioning scripts. As it currently stands this won't be exposed in a shared
build, and so the examples won't build.

Neil
Declan Doherty
2015-02-13 14:54:39 UTC
Permalink
This patchset is for a small addition to the ethdev library, to
add in support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.
http://dpdk.org/ml/archives/dev/2014-December/010491.html
For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
http://urcu.so/
The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.
The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.
For this release we will document that callbacks should be added/removed
on stopped ports.
* Added callback removal functions.
* Minor fixes.
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 ++++++++
examples/rxtx_callbacks/basicfwd.c | 222 ++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 177 ++++++++++++++++++++++++--
lib/librte_ether/rte_ethdev.h | 175 +++++++++++++++++++++++++-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 667 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
Looks good to me, I'll ack the next version which has addressed the ABI
issues Neil raised. Also, it should probably be noted in the doxygen
comments for the add/remove rxtx callbacks that as currently implemented
the addition/removal of callbacks isn't thread safe

Declan
John McNamara
2015-02-13 15:39:46 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The callbacks member of the rte_eth_dev structure has been renamed
to intr_cbs to make it clear that it refers to callbacks from NIC
interrupts. This then allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..ec2474f 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..e4b3315 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

/*
* Set the default MTU.
@@ -2738,7 +2738,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2752,7 +2752,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2779,7 +2779,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2793,7 +2793,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2812,7 +2812,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9c67488 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1538,7 +1538,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..4a66609 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->intr_cbs));

eth_dev->data->dev_link.link_status = 0;
--
1.7.4.1
Thomas Monjalon
2015-02-13 16:06:57 UTC
Permalink
Post by Bruce Richardson
The callbacks member of the rte_eth_dev structure has been renamed
to intr_cbs to make it clear that it refers to callbacks from NIC
interrupts. This then allows us to add other types of callbacks to
the structure without ambiguity.
Yes, good.
Actually this callback is used only for link status interrupt, right?
In the patchset for interrupt mode, new callbacks are used, right?
Should we name this callback more precisely?
Thomas Monjalon
2015-02-13 16:52:01 UTC
Permalink
Post by Thomas Monjalon
Post by Bruce Richardson
The callbacks member of the rte_eth_dev structure has been renamed
to intr_cbs to make it clear that it refers to callbacks from NIC
interrupts. This then allows us to add other types of callbacks to
the structure without ambiguity.
Yes, good.
Actually this callback is used only for link status interrupt, right?
In the patchset for interrupt mode, new callbacks are used, right?
Should we name this callback more precisely?
More naming considerations:
- The link status interrupt callback could be named link_intr_cb.
- The Rx callback could be named post_rx_burst_cb
- The Tx callback could be named pre_tx_burst_cb
John McNamara
2015-02-13 15:39:47 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
---
lib/librte_ether/rte_ethdev.c | 163 ++++++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 191 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 348 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e4b3315..fcb35a1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,15 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+ dev->rx_cbs = rte_zmalloc("ethdev->rx_cbs",
+ sizeof(*dev->rx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -348,10 +357,18 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -(ENOMEM);
+ dev->rx_cbs = rte_realloc(dev->rx_cbs, sizeof(*dev->rx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->rx_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+ memset(dev->rx_cbs + old_nb_queues, 0,
+ sizeof(dev->rx_cbs[0]) * new_qs);
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +496,15 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+ dev->tx_cbs = rte_zmalloc("ethdev->tx_cbs",
+ sizeof(*dev->tx_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -490,10 +516,19 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -(ENOMEM);
+ dev->tx_cbs = rte_realloc(dev->tx_cbs, sizeof(*dev->tx_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->tx_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+ memset(dev->tx_cbs + old_nb_queues, 0,
+ sizeof(dev->tx_cbs[0]) * new_qs);
+ }

dev->data->tx_queues = txq;

@@ -3253,3 +3288,123 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].rx_cbs[queue_id];
+ rte_eth_devices[port_id].rx_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].tx_cbs[queue_id];
+ rte_eth_devices[port_id].tx_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->rx_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->tx_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 9c67488..cfda9af 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,47 @@ struct eth_dev_ops {
};

/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+
+/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1538,7 +1579,21 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list intr_cbs; /**< User application callbacks on interrupt*/
+
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list intr_cbs;
+
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **rx_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **tx_cbs;
};

struct rte_eth_dev_sriov {
@@ -2390,7 +2445,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif

@@ -2517,6 +2582,14 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3664,6 +3737,120 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
#ifdef __cplusplus
}
#endif
--
1.7.4.1
Thomas Monjalon
2015-02-13 16:33:12 UTC
Permalink
Post by Bruce Richardson
Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
[...]
Post by Bruce Richardson
@@ -2390,7 +2445,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif
@@ -2517,6 +2582,14 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
We all know how much the performance of these functions are important.
So I wonder if we could reduce the impact of this change.
I don't like the build options but maybe it should be discussed.
Bruce Richardson
2015-02-13 17:49:12 UTC
Permalink
Post by Thomas Monjalon
Post by Bruce Richardson
Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
[...]
Post by Bruce Richardson
@@ -2390,7 +2445,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->rx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif
@@ -2517,6 +2582,14 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->tx_cbs[queue_id];
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
We all know how much the performance of these functions are important.
So I wonder if we could reduce the impact of this change.
I don't like the build options but maybe it should be discussed.
Performance impact is minimal, there was some discussion of it previously when
I published the earlier RFC draft. In my quick tests, with vector PMD in the
fast path, the impact is <=1% for this change as is (i.e. no callbacks set up),
and a further 1% perf hit to actually call an empty callback.

http://article.gmane.org/gmane.comp.networking.dpdk.devel/10489
http://article.gmane.org/gmane.comp.networking.dpdk.devel/10735

Unless people start seeing a higher perf hit on some platforms, I don't think
a build-time option is worth having.

Regards,
/Bruce
John McNamara
2015-02-13 15:39:48 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.

Signed-off-by: Bruce Richardson <***@intel.com>
---
examples/rxtx_callbacks/Makefile | 57 +++++++++
examples/rxtx_callbacks/basicfwd.c | 222 ++++++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 ++++++++
3 files changed, 325 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h

diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..4a5d99f
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = basicfwd
+
+# all source are stored in SRCS-y
+SRCS-y := basicfwd.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/basicfwd.c b/examples/rxtx_callbacks/basicfwd.c
new file mode 100644
index 0000000..0209bf4
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.c
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include "basicfwd.h"
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+MAIN(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
diff --git a/examples/rxtx_callbacks/basicfwd.h b/examples/rxtx_callbacks/basicfwd.h
new file mode 100644
index 0000000..3797b5d
--- /dev/null
+++ b/examples/rxtx_callbacks/basicfwd.h
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BASICFWD_H
+#define BASICFWD_H
+
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#define MAIN _main
+#else
+#define MAIN main
+#endif
+
+int MAIN(int argc, char *argv[]);
+
+#endif /* BASICFWD_H */
--
1.7.4.1
Thomas Monjalon
2015-02-13 16:02:29 UTC
Permalink
It appears you made some copy paste of an old example.
Please try to send something up to date.
Post by Bruce Richardson
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
Old
Post by Bruce Richardson
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#define MAIN _main
+#else
+#define MAIN main
+#endif
There is no bare metal anymore.
Olivier MATZ
2015-02-16 14:33:40 UTC
Permalink
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.

What is the advantage of having callbacks compared to:


for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);

const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);

if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}


To me, doing like the code above has several advantages:

- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact

Regards,
Olivier
Bruce Richardson
2015-02-16 15:16:23 UTC
Permalink
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.

Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.

By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.

Hope this makes the use case clear.

Regards,
/Bruce
Thomas Monjalon
2015-02-16 17:34:37 UTC
Permalink
Post by Bruce Richardson
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.

With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)

Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set

So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.

A callback could be justified for asynchronous events, or when
doing specific processing in the middle of the driver, for instance
when freeing a mbuf. But in this case it's exactly similar to do
the processing in the application after Rx (or before Tx).
Declan Doherty
2015-02-17 12:17:30 UTC
Permalink
Post by Thomas Monjalon
Post by Bruce Richardson
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
A callback could be justified for asynchronous events, or when
doing specific processing in the middle of the driver, for instance
when freeing a mbuf. But in this case it's exactly similar to do
the processing in the application after Rx (or before Tx).
I believe that the introduction of callbacks to the ethdev layer will be
required for live migration.

For example, in the scenario were we have two ports bonded together in
active backup mode, the primary slave being a hw port and the other
slave a virtio port, in normal operation it would be desirable to
leverage the available hw offload capabilities for maximum performance,
but for these two devices to be bonded together then it is required that
the both slave devices support the same set of offload features. In the
occurrence of a planned or unplanned fail over the backup slave must
provided the same offloads as the primary device, currently the offloads
supported are the lowest common denominator of offload features of all
slave devices but obviously this isn't desirable.


I think that we could extend the bonding library API to take a set of
desired offloads as input parameters, then during the addition of slaves
we would interrogate the supported hw offloads available, enable the
desired ones and then register callbacks to implement the offloads which
the slave device does not support in hw. This would negate the user
application needing to have any knowledge of the under lying slave
offload configuration, and it would be guaranteed than the offloads
requested are happening irrespective of which slave is in use and allow
migration of vm transparently to what is happening in the ethdev layer

Declan
Bruce Richardson
2015-02-17 12:25:35 UTC
Permalink
Post by Thomas Monjalon
Post by Bruce Richardson
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.

As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.

/Bruce
Post by Thomas Monjalon
A callback could be justified for asynchronous events, or when
doing specific processing in the middle of the driver, for instance
when freeing a mbuf. But in this case it's exactly similar to do
the processing in the application after Rx (or before Tx).
Olivier MATZ
2015-02-17 13:28:02 UTC
Permalink
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
Why not just provide a function like this:

rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)

This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.

If you have several places where you call rx in your application
and you want to factorize it, you can have your own function that
calls rx plus the function that does the additional sw work.

Regards,
Olivier
Bruce Richardson
2015-02-17 13:50:58 UTC
Permalink
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.

/Bruce
Post by Thomas Monjalon
If you have several places where you call rx in your application
and you want to factorize it, you can have your own function that
calls rx plus the function that does the additional sw work.
Regards,
Olivier
Neil Horman
2015-02-17 15:49:24 UTC
Permalink
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.
How can you assert performance metrics on a patch like this? The point of the
change is to allow a callback to an application defined function, the contents
of which are effectively arbitrary. Not saying that its the wrong thing to do,
but you can't really claim performance is not impacted, because the details of
whats executed is outside your purview.
Neil
Post by Bruce Richardson
/Bruce
Post by Thomas Monjalon
If you have several places where you call rx in your application
and you want to factorize it, you can have your own function that
calls rx plus the function that does the additional sw work.
Regards,
Olivier
Bruce Richardson
2015-02-17 16:00:56 UTC
Permalink
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.
How can you assert performance metrics on a patch like this? The point of the
change is to allow a callback to an application defined function, the contents
of which are effectively arbitrary. Not saying that its the wrong thing to do,
but you can't really claim performance is not impacted, because the details of
whats executed is outside your purview.
Neil
I think the performance hit being referenced is a hit due to the patch itself
without any callbacks being in use. (That was certainly my assumption in replying)

/Bruce
Neil Horman
2015-02-17 16:08:10 UTC
Permalink
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.
How can you assert performance metrics on a patch like this? The point of the
change is to allow a callback to an application defined function, the contents
of which are effectively arbitrary. Not saying that its the wrong thing to do,
but you can't really claim performance is not impacted, because the details of
whats executed is outside your purview.
Neil
I think the performance hit being referenced is a hit due to the patch itself
without any callbacks being in use. (That was certainly my assumption in replying)
I figured it was, but thats still something of a misnomer. Of course this
change on its own is negligible in its performance impact. By itself, the
impact is that of a branch that is unlikely to be taken, which is to say almost
zero. But thats not an actionable number because the only time that performance
is attainable if the user doesn't use it. Since you're posing a patch that
makes application registered callbacks in a very fast path, I think its
important to state very clearly that these callbacks will have a significant
performance impact that individual applications will have to measure and be
cogniscent of.
Neil
Post by Bruce Richardson
/Bruce
Bruce Richardson
2015-02-17 16:15:09 UTC
Permalink
Post by Neil Horman
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.
How can you assert performance metrics on a patch like this? The point of the
change is to allow a callback to an application defined function, the contents
of which are effectively arbitrary. Not saying that its the wrong thing to do,
but you can't really claim performance is not impacted, because the details of
whats executed is outside your purview.
Neil
I think the performance hit being referenced is a hit due to the patch itself
without any callbacks being in use. (That was certainly my assumption in replying)
I figured it was, but thats still something of a misnomer. Of course this
change on its own is negligible in its performance impact. By itself, the
impact is that of a branch that is unlikely to be taken, which is to say almost
zero. But thats not an actionable number because the only time that performance
is attainable if the user doesn't use it. Since you're posing a patch that
makes application registered callbacks in a very fast path, I think its
important to state very clearly that these callbacks will have a significant
performance impact that individual applications will have to measure and be
cogniscent of.
Neil
Yes, agreed.
But if the app were to directly implement the same functionality directly rather
than via callbacks, the performance would be about the same (sometimes better,
sometimes worse, I suspect, depending on how it's done).
Neil Horman
2015-02-17 19:27:59 UTC
Permalink
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Neil Horman
Post by Bruce Richardson
Post by Thomas Monjalon
Hi Bruce,
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
rte_do_unsupported_stuff_by_software(m[], m_count, wanted_features,
dev_feature_flags)
This function can be called (or not) from the application mainloop.
You don't need to maintain several mainloops (for each device) as
the specific work will be done depending on the given flags. And the
applications that do not require these features (most applications?)
are not penalized at all.
Have you measured the performance hit due to this proposed change? In my tests
it's very, very small, even for the fastest vectorized path. If performance is
a real concern, I'm happy enough to have this as a compile-time option so that
those who can't take the small performance hit can avoid it.
How can you assert performance metrics on a patch like this? The point of the
change is to allow a callback to an application defined function, the contents
of which are effectively arbitrary. Not saying that its the wrong thing to do,
but you can't really claim performance is not impacted, because the details of
whats executed is outside your purview.
Neil
I think the performance hit being referenced is a hit due to the patch itself
without any callbacks being in use. (That was certainly my assumption in replying)
I figured it was, but thats still something of a misnomer. Of course this
change on its own is negligible in its performance impact. By itself, the
impact is that of a branch that is unlikely to be taken, which is to say almost
zero. But thats not an actionable number because the only time that performance
is attainable if the user doesn't use it. Since you're posing a patch that
makes application registered callbacks in a very fast path, I think its
important to state very clearly that these callbacks will have a significant
performance impact that individual applications will have to measure and be
cogniscent of.
Neil
Yes, agreed.
But if the app were to directly implement the same functionality directly rather
than via callbacks, the performance would be about the same (sometimes better,
sometimes worse, I suspect, depending on how it's done).
No argument, but doing so makes it clearly apparent to the application developer
that they are adding cycles to a hot path. That becomes much more obfuscated
when you register callbacks, and so it is imperitive to not make ambiguous
claims like "the performance impact is zero".
Neil
Thomas Monjalon
2015-02-17 15:32:01 UTC
Permalink
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
If you really prefer using callbacks intead of direct calls, why not implementing
the callbacks hooks in your application by wrapping Rx and Tx burst functions?
Post by Bruce Richardson
Post by Thomas Monjalon
A callback could be justified for asynchronous events, or when
doing specific processing in the middle of the driver, for instance
when freeing a mbuf. But in this case it's exactly similar to do
the processing in the application after Rx (or before Tx).
Bruce Richardson
2015-02-17 15:58:16 UTC
Permalink
Post by Thomas Monjalon
Post by Bruce Richardson
Post by Thomas Monjalon
Post by Bruce Richardson
Post by Olivier MATZ
Hi John,
Post by John McNamara
Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.
I'm looking at the example and I don't understand what is the advantage
of having callbacks in ethdev layer, knowing that the application can
do the same job by a standard function call.
for (port = 0; port < nb_ports; port++) {
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
add_timestamp(bufs, nb_rx);
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
calc_latency(bufs, nb_tx);
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
- code is more readable: the callback is explicitly invoked, so there is
no risk to forget it
- code is faster: the functions calls can be inlined by the compiler
- easier to handle error cases in the callback function as the error
code is accessible to the application
- there is no need to add code in ethdev api to do this
- if the application does not want to use callbacks (I suppose most
applications), it won't have any performance impact
Regards,
Olivier
In this specific instance, given that the application does little else, there
is no real advantage to using the callbacks - it's just to have a simple example
of how they can be used.
Where callbacks are really designed to be useful, is for extending or augmenting
hardware capabilities. Taking the example of sequence numbers - to use the most
trivial example - an application could be written to take advantage of sequence
numbers written to packets by the hardware which received them. However, if such
an application was to be used with a NIC which does not provide sequence numbering
capability, for example, anything using ixgbe driver, the application writer has
two choices - either modify his application code to check each packet for
a sequence number in the data path, and add it there post-rx, or alternatively,
to check the NIC capabilities at initialization time, and add a callback there
at initialization, if the hardware does not support it. In the latter case,
the main packet processing body of the application can be written as though
hardware always has sequence numbering capability, safe in the knowledge that
any hardware not supporting it will be back-filled by a software fallback at
initialization-time.
By the same token, we could also look to extend hardware capabilities. For
different filtering or hashing capabilities, there can be limits in hardware
which are far less than what we need to use in software. Again, callbacks will
allow the data path to be written in a way that is oblivious to the underlying
hardware limits, because software will transparently fill in the gaps.
Hope this makes the use case clear.
After thinking more about these callbacks, I realize these callbacks won't
help, as Olivier said.
With callback,
1/ application checks device capability
2/ application provides hardware emulation as DPDK callback
3/ application forgets previous steps
4/ application calls DPDK Rx
5/ DPDK calls callback (without calling optimization)
Without callback,
1/ application checks device capability
2/ application provides hardware emulation as internal function
3/ application set an internal device-flag to enable this function
4/ application calls DPDK Rx
5/ application calls the hardware emulation if flag is set
So the only difference is to keep persistent the device information in
the application instead of storing it as a function pointer in the
DPDK struct.
You can also be faster with this approach: at initialization time,
you can check that your NIC supports the feature and use a specific
mainloop that adds or not the sequence number without any runtime
test.
That is assuming that all NICs are equal on your system. It's also assuming
that you only have a single point in your application where you call RX or
TX burst. In the case where you have a couple of different NICs on the system,
or where you want to write an application to take advantage of capabilities of
different NICs, the ability to resolve all these difference at initialization
time is useful. The main packet handling code can be written with just the
processing of packets in mind, rather than having to have a set of branches
after each RX burst call, or before each TX burst call, to "smooth out" the
different NIC capabilities.
As for the option of maintaining different main loops for different NICs with
different capabilities - that sounds like a maintenance nightmare to
me, due to duplicated code! Callbacks is a far cleaner solution than that IMHO.
If you really prefer using callbacks intead of direct calls, why not implementing
the callbacks hooks in your application by wrapping Rx and Tx burst functions?
Because sometimes things are generally useful and are better supplied in a
standard library than forcing multiple applications to constantly re-invent the
wheel.

Furthermore, if we enable the hooks in DPDK, it gives us a standard API
prototype to code against, allowing us to provide reference implementation
callbacks to create smarter ethdevs that can be used as higher-level abstractions
inside applications.

We don't require applications to know what the underlying NIC driver is that
is being used to receive pkts - we take care of all that at initialization time
by using a function pointer to allow NIC specific calls to be referenced using the
rx_burst API. An application could be written to call directly into the driver
receive or transmit functions - and such an API could be made faster than the
existing indirect calls - but instead we set things up so that all NICs look
the same to the data-path, irrespective of type or speed. In the same way, this
feature allows us to set things up at initialization time so that all NICs look
the same to the datapath in terms of capabilities offered. We won't always do
so, but it is a worthwhile use case that brings the same benefits as the generic
RX and TX function pointers do - a datapath that is agnostic to underlying
hardware.

Going further, once NICs can be made to provide similar capabilities in terms
of offloads - at the ethdev layer - then additional libraries which use ethdevs,
such as link bonding, as Declan has highlighted, can make use of that very easily.
Having support for that in the application won't allow such use cases in libraries,
and having it in the ethdev layer allows it to be conveniently used by any other
libraries other than link bonding that may want this in future.

Can I actually also flip the discussion on it's head a bit? We have presented
a number of use cases where we see this functionality being useful, and we
have plans to build upon this in future to enable smarter ethdevs. Given that
this is not a large amount of code by any means, what is the compelling reason
why it should not be merged in, if it would be useful to at least some users of
DPDK?

/Bruce
John McNamara
2015-02-13 15:39:49 UTC
Permalink
---
lib/librte_ether/rte_ether_version.map | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
global:

_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
Thomas Monjalon
2015-02-13 15:59:13 UTC
Permalink
Post by John McNamara
---
There is no signed-off.
And there is no need of a separate patch for that.
Post by John McNamara
lib/librte_ether/rte_ether_version.map | 4 ++++
Declan Doherty
2015-02-13 15:48:53 UTC
Permalink
This patchset is for a small addition to the ethdev library, to
add in support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.
http://dpdk.org/ml/archives/dev/2014-December/010491.html
For the post-RFC version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
http://urcu.so/
The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.
The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.
For this release we will document that adding and removing callbacks
is not thread safe.
Note: Sample application documentation to follow in a patch update.
* Added ABI versioning.
* Doxygen clarifications.
* Added callback removal functions.
* Minor fixes.
abi: Added rxtx callback functions to ABI versioning
ethdev: rename callbacks field to intr_cbs
ethdev: Add in data rxtx callback support
examples: example showing use of callbacks.
app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 ++++++++
examples/rxtx_callbacks/basicfwd.c | 222 ++++++++++++++++++++++++++++++++
examples/rxtx_callbacks/basicfwd.h | 46 +++++++
lib/librte_ether/rte_ethdev.c | 175 ++++++++++++++++++++++++--
lib/librte_ether/rte_ethdev.h | 191 +++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
8 files changed, 685 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/basicfwd.c
create mode 100644 examples/rxtx_callbacks/basicfwd.h
Series Acked-by: Declan Doherty <***@intel.com>
John McNamara
2015-02-18 17:42:20 UTC
Permalink
This patchset is for a small addition to the ethdev library, to
add in support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.

Version 3 changes:
* Removed unnecessary header file from example folder
(which included baremetal reference).
* Renamed the interrupt, RX and TX callbacks to make their function
clearer (using the names suggested in the mailing list comments).
* Squashed ABI version update into the commit it relates to.
* Fixed various checkpatch warnings.

Version 2 changes:
* Added ABI versioning.
* Doxygen clarifications.

Version 1 changes:
* Added callback removal functions.
* Minor fixes.


Richardson, Bruce (3):
ethdev: Rename callbacks field to link_intr_cbs
ethdev: Add rxtx callback support
examples: example showing use of callbacks.

app/test/virtual_pmd.c | 2 +-
examples/rxtx_callbacks/Makefile | 57 ++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 183 ++++++++++++++++++++++++--
lib/librte_ether/rte_ethdev.h | 192 ++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
7 files changed, 654 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c
--
1.7.4.1
John McNamara
2015-02-18 17:42:21 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The 'callbacks' member of the rte_eth_dev structure has been renamed
to 'link_intr_cbs' to make it clear that it refers to callbacks from
NIC interrupts. This allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..eb75846 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 17be2f3..7c4e772 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/*
* Set the default MTU.
@@ -2743,7 +2743,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2757,7 +2757,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2784,7 +2784,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2798,7 +2798,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2817,7 +2817,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 6e454e8..48e4ac9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1539,7 +1539,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..077cb73 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;
--
1.7.4.1
John McNamara
2015-02-18 17:42:23 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
examples/rxtx_callbacks/Makefile | 57 ++++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++++++++
2 files changed, 285 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c

diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..0fafbb7
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = rxtx_callbacks
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c
new file mode 100644
index 0000000..9e5e68e
--- /dev/null
+++ b/examples/rxtx_callbacks/main.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - "
+ "App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
--
1.7.4.1
John McNamara
2015-02-18 17:42:22 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
lib/librte_ether/rte_ethdev.c | 171 +++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 192 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
3 files changed, 361 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 7c4e772..dde1b49 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,16 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+ dev->post_rx_burst_cbs = rte_zmalloc(
+ "ethdev->post_rx_burst_cbs",
+ sizeof(*dev->post_rx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -348,10 +358,20 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (rxq == NULL)
return -(ENOMEM);
+ dev->post_rx_burst_cbs = rte_realloc(
+ dev->post_rx_burst_cbs,
+ sizeof(*dev->post_rx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+ memset(dev->post_rx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->post_rx_burst_cbs[0]) * new_qs);
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +499,16 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+ dev->pre_tx_burst_cbs = rte_zmalloc(
+ "ethdev->pre_tx_burst_cbs",
+ sizeof(*dev->pre_tx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -490,10 +520,21 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
RTE_CACHE_LINE_SIZE);
if (txq == NULL)
return -(ENOMEM);
+ dev->pre_tx_burst_cbs = rte_realloc(
+ dev->pre_tx_burst_cbs,
+ sizeof(*dev->pre_tx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL)
+ return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+ memset(dev->pre_tx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->pre_tx_burst_cbs[0]) * new_qs);
+ }

dev->data->tx_queues = txq;

@@ -3258,3 +3299,125 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48e4ac9..80c4923 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1523,6 +1523,47 @@ struct eth_dev_ops {
};

/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+
+/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1539,7 +1580,20 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
};

struct rte_eth_dev_sriov {
@@ -2393,7 +2447,18 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
}
#endif

@@ -2520,6 +2585,15 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3741,120 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
global:

_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
Thomas Monjalon
2015-02-18 18:19:56 UTC
Permalink
Post by Bruce Richardson
Add in support for inline processing of packets inside the RX or
TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
[...]
Post by Bruce Richardson
@@ -2393,7 +2447,18 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
Excuse me, it wasn't very clear for me but I thought from the following email
that the consensus was to use a compile-time option:
http://dpdk.org/ml/archives/dev/2015-February/013450.html
Mcnamara, John
2015-02-19 09:33:45 UTC
Permalink
Post by Vithal S Mohare
-----Original Message-----
Sent: Wednesday, February 18, 2015 6:20 PM
To: Mcnamara, John; Richardson, Bruce
Doherty, Declan
Subject: Re: [PATCH v3 2/3] ethdev: Add rxtx callback support
Excuse me, it wasn't very clear for me but I thought from the following
http://dpdk.org/ml/archives/dev/2015-February/013450.html
Hi Thomas,

I think that got a little lost from our side in the follow-on discussions.

I'll revert with a revision that makes this feature a compile time option.

John.
--
John McNamara
2015-02-19 17:56:40 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The 'callbacks' member of the rte_eth_dev structure has been renamed
to 'link_intr_cbs' to make it clear that it refers to callbacks from
NIC interrupts. This allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
lib/librte_pmd_ring/rte_eth_ring.c | 2 +-
5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..eb75846 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 17be2f3..7c4e772 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/*
* Set the default MTU.
@@ -2743,7 +2743,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2757,7 +2757,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2784,7 +2784,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2798,7 +2798,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2817,7 +2817,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 6e454e8..48e4ac9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1539,7 +1539,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..077cb73 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;

diff --git a/lib/librte_pmd_ring/rte_eth_ring.c b/lib/librte_pmd_ring/rte_eth_ring.c
index a23e933..a5dc71e 100644
--- a/lib/librte_pmd_ring/rte_eth_ring.c
+++ b/lib/librte_pmd_ring/rte_eth_ring.c
@@ -340,7 +340,7 @@ rte_eth_from_rings(const char *name, struct rte_ring *const rx_queues[],
eth_dev->driver = eth_drv;
eth_dev->dev_ops = &ops;
eth_dev->pci_dev = pci_dev;
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/* finally assign rx and tx ops */
eth_dev->rx_pkt_burst = eth_ring_rx;
--
1.7.4.1
John McNamara
2015-02-19 17:56:42 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
MAINTAINERS | 4 +
examples/Makefile | 1 +
examples/rxtx_callbacks/Makefile | 57 ++++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++++++++
4 files changed, 290 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ac6d59..dcca441 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -432,6 +432,10 @@ F: doc/guides/sample_app_ug/netmap_compatibility.rst
F: examples/quota_watermark/
F: doc/guides/sample_app_ug/quota_watermark.rst

+M: Bruce Richardson <***@intel.com>
+M: John McNamara <***@intel.com>
+F: examples/rxtx_callbacks/
+
F: examples/skeleton/

F: examples/vmdq/
diff --git a/examples/Makefile b/examples/Makefile
index 095bad2..4a872f2 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -63,6 +63,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += packet_ordering
DIRS-$(CONFIG_RTE_LIBRTE_METER) += qos_meter
DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += qos_sched
DIRS-y += quota_watermark
+DIRS-$(CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS) += rxtx_callbacks
DIRS-y += skeleton
DIRS-y += timer
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost
diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..0fafbb7
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = rxtx_callbacks
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c
new file mode 100644
index 0000000..9e5e68e
--- /dev/null
+++ b/examples/rxtx_callbacks/main.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - "
+ "App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
--
1.7.4.1
John McNamara
2015-02-19 17:56:41 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
lib/librte_ether/rte_ethdev.c | 192 +++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 204 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
5 files changed, 397 insertions(+), 5 deletions(-)

diff --git a/config/common_bsdapp b/config/common_bsdapp
index f11ff39..e9c445e 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -133,6 +133,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS=n

#
# Support NIC bypass logic
diff --git a/config/common_linuxapp b/config/common_linuxapp
index f921d8c..0cb850e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -131,6 +131,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS=n

#
# Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 7c4e772..8a4e0e7 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,20 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_zmalloc(
+ "ethdev->post_rx_burst_cbs",
+ sizeof(*dev->post_rx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -349,9 +363,25 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (rxq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_realloc(
+ dev->post_rx_burst_cbs,
+ sizeof(*dev->post_rx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->post_rx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->post_rx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +509,20 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_zmalloc(
+ "ethdev->pre_tx_burst_cbs",
+ sizeof(*dev->pre_tx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -491,9 +535,25 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (txq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_realloc(
+ dev->pre_tx_burst_cbs,
+ sizeof(*dev->pre_tx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->pre_tx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->pre_tx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->tx_queues = txq;

@@ -3258,3 +3318,127 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+#endif /* RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS */
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48e4ac9..f55eeea 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,49 @@ struct eth_dev_ops {
eth_filter_ctrl_t filter_ctrl; /**< common filter control*/
};

+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+#endif
+
/**
* @internal
* The generic data structure associated with each ethernet device.
@@ -1539,7 +1582,22 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
+#endif
};

struct rte_eth_dev_sriov {
@@ -2393,7 +2451,23 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+
+#ifndef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+#else
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
+#endif
}
#endif

@@ -2520,6 +2594,17 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3752,123 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+#endif /* RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS */
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
global:

_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
Bruce Richardson
2015-02-20 10:06:00 UTC
Permalink
Post by John McNamara
Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
---
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
lib/librte_ether/rte_ethdev.c | 192 +++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 204 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
5 files changed, 397 insertions(+), 5 deletions(-)
diff --git a/config/common_bsdapp b/config/common_bsdapp
index f11ff39..e9c445e 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -133,6 +133,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS=n
I no reason why this should not be "y" by default. Those who are not using it
and don't want the tiny performance impact of it, can turn it off.

/Bruce
Post by John McNamara
#
# Support NIC bypass logic
diff --git a/config/common_linuxapp b/config/common_linuxapp
index f921d8c..0cb850e 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -131,6 +131,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS=n
#
# Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 7c4e772..8a4e0e7 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,20 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_zmalloc(
+ "ethdev->post_rx_burst_cbs",
+ sizeof(*dev->post_rx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);
@@ -349,9 +363,25 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (rxq == NULL)
return -(ENOMEM);
- if (nb_queues > old_nb_queues)
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_realloc(
+ dev->post_rx_burst_cbs,
+ sizeof(*dev->post_rx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->post_rx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->post_rx_burst_cbs[0]) * new_qs);
+#endif
+ }
dev->data->rx_queues = rxq;
@@ -479,6 +509,20 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_zmalloc(
+ "ethdev->pre_tx_burst_cbs",
+ sizeof(*dev->pre_tx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);
@@ -491,9 +535,25 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (txq == NULL)
return -(ENOMEM);
- if (nb_queues > old_nb_queues)
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_realloc(
+ dev->pre_tx_burst_cbs,
+ sizeof(*dev->pre_tx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->pre_tx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->pre_tx_burst_cbs[0]) * new_qs);
+#endif
+ }
dev->data->tx_queues = txq;
@@ -3258,3 +3318,127 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+#endif /* RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS */
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48e4ac9..f55eeea 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,49 @@ struct eth_dev_ops {
eth_filter_ctrl_t filter_ctrl; /**< common filter control*/
};
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * The ethernet port on which rx or tx is being performed
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * The number of packets in the burst pointed to by "pkts"
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+#endif
+
/**
* The generic data structure associated with each ethernet device.
@@ -1539,7 +1582,22 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
+#endif
};
struct rte_eth_dev_sriov {
@@ -2393,7 +2451,23 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+
+#ifndef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+#else
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
+#endif
}
#endif
@@ -2520,6 +2594,17 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3752,123 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);
+#ifdef RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device on which the callback is to be added.
+ * The callback function
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device on which the callback is to be added.
+ * The callback function
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+#endif /* RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS */
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
Thomas Monjalon
2015-02-20 10:31:03 UTC
Permalink
Post by Bruce Richardson
Post by John McNamara
Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
---
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
lib/librte_ether/rte_ethdev.c | 192 +++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 204 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
5 files changed, 397 insertions(+), 5 deletions(-)
diff --git a/config/common_bsdapp b/config/common_bsdapp
index f11ff39..e9c445e 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -133,6 +133,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_LIBRTE_ETHDEV_RXTX_CALLBACKS=n
I no reason why this should not be "y" by default. Those who are not using it
and don't want the tiny performance impact of it, can turn it off.
If you are going to change it, I think the shorter name CONFIG_RTE_ETHDEV_RXTX_CALLBACKS
would be sufficient.
John McNamara
2015-02-20 17:03:44 UTC
Permalink
This patchset is for a small optional addition to the ethdev library,
to add support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.

See the RFC cover letter for the use cases:

http://dpdk.org/ml/archives/dev/2014-December/010491.html

For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
SMP safety:

http://urcu.so/

The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.

The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.

For this release we will document that adding and removing callbacks
is not thread safe.

Note: Sample application documentation to follow in a patch update.

Version 5 changes:
* Turn the callback feature on by default.
* Simplify #define name.

Version 4 changes:
* Make the callback feature a compile time option.

Version 3 changes:
* Removed unnecessary header file from example folder
(which included baremetal reference).
* Renamed the interrupt, RX and TX callbacks to make their function
clearer (using the names suggested in the mailing list comments).
* Squashed ABI version update into the commit it relates to.
* Fixed various checkpatch warnings.

Version 2 changes:
* Added ABI versioning.
* Doxygen clarifications.

Version 1 changes:
* Added callback removal functions.
* Minor fixes.


Richardson, Bruce (3):
ethdev: rename callbacks field to link_intr_cbs
ethdev: add optional rxtx callback support
examples: example showing use of callbacks.

MAINTAINERS | 4 +
app/test/virtual_pmd.c | 2 +-
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
examples/Makefile | 1 +
examples/rxtx_callbacks/Makefile | 57 ++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 204 +++++++++++++++++++++++++++--
lib/librte_ether/rte_ethdev.h | 204 ++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
lib/librte_pmd_ring/rte_eth_ring.c | 2 +-
12 files changed, 696 insertions(+), 14 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c
--
1.7.4.1
John McNamara
2015-02-20 17:03:47 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
MAINTAINERS | 4 +
examples/Makefile | 1 +
examples/rxtx_callbacks/Makefile | 57 ++++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++++++++
4 files changed, 290 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ac6d59..dcca441 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -432,6 +432,10 @@ F: doc/guides/sample_app_ug/netmap_compatibility.rst
F: examples/quota_watermark/
F: doc/guides/sample_app_ug/quota_watermark.rst

+M: Bruce Richardson <***@intel.com>
+M: John McNamara <***@intel.com>
+F: examples/rxtx_callbacks/
+
F: examples/skeleton/

F: examples/vmdq/
diff --git a/examples/Makefile b/examples/Makefile
index 095bad2..3d191a6 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -63,6 +63,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += packet_ordering
DIRS-$(CONFIG_RTE_LIBRTE_METER) += qos_meter
DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += qos_sched
DIRS-y += quota_watermark
+DIRS-$(CONFIG_RTE_ETHDEV_RXTX_CALLBACKS) += rxtx_callbacks
DIRS-y += skeleton
DIRS-y += timer
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost
diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..0fafbb7
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = rxtx_callbacks
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c
new file mode 100644
index 0000000..9e5e68e
--- /dev/null
+++ b/examples/rxtx_callbacks/main.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - "
+ "App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
--
1.7.4.1
John McNamara
2015-02-20 17:03:46 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
lib/librte_ether/rte_ethdev.c | 192 +++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 204 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
5 files changed, 397 insertions(+), 5 deletions(-)

diff --git a/config/common_bsdapp b/config/common_bsdapp
index f11ff39..6fc7074 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -133,6 +133,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

#
# Support NIC bypass logic
diff --git a/config/common_linuxapp b/config/common_linuxapp
index f921d8c..c0ef9d6 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -131,6 +131,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

#
# Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 7c4e772..a8a8243 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,20 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_zmalloc(
+ "ethdev->post_rx_burst_cbs",
+ sizeof(*dev->post_rx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -349,9 +363,25 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (rxq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_realloc(
+ dev->post_rx_burst_cbs,
+ sizeof(*dev->post_rx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->post_rx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->post_rx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +509,20 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_zmalloc(
+ "ethdev->pre_tx_burst_cbs",
+ sizeof(*dev->pre_tx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -491,9 +535,25 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (txq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_realloc(
+ dev->pre_tx_burst_cbs,
+ sizeof(*dev->pre_tx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->pre_tx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->pre_tx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->tx_queues = txq;

@@ -3258,3 +3318,127 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+#endif /* RTE_ETHDEV_RXTX_CALLBACKS */
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48e4ac9..16eea8e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,49 @@ struct eth_dev_ops {
eth_filter_ctrl_t filter_ctrl; /**< common filter control*/
};

+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+#endif
+
/**
* @internal
* The generic data structure associated with each ethernet device.
@@ -1539,7 +1582,22 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
+#endif
};

struct rte_eth_dev_sriov {
@@ -2393,7 +2451,23 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+#else
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+ return nb_pkts;
+#endif
}
#endif

@@ -2520,6 +2594,17 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3752,123 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+#endif /* RTE_ETHDEV_RXTX_CALLBACKS */
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
global:

_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
Thomas Monjalon
2015-02-23 15:11:45 UTC
Permalink
Hi John,
Post by John McNamara
Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.
[...]
Post by John McNamara
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
Why not putting #ifdef only here and return an error ENOTSUP?
Post by John McNamara
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
[...]
Post by John McNamara
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1522,6 +1522,49 @@ struct eth_dev_ops {
eth_filter_ctrl_t filter_ctrl; /**< common filter control*/
};
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * The ethernet port on which rx or tx is being performed
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * The number of packets in the burst pointed to by "pkts"
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+#endif
+
/**
* The generic data structure associated with each ethernet device.
@@ -1539,7 +1582,22 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
+#endif
};
Generally, I think it's a bad idea to put #ifdef in API (structs or functions).
Post by John McNamara
struct rte_eth_dev_sriov {
@@ -2393,7 +2451,23 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+#else
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
Why not #ifdef only from here...
Post by John McNamara
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
... to here?
Post by John McNamara
+ return nb_pkts;
+#endif
}
#endif
@@ -2520,6 +2594,17 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;
dev = &rte_eth_devices[port_id];
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3752,123 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device on which the callback is to be added.
+ * The callback function
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device on which the callback is to be added.
+ * The callback function
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * The port identifier of the Ethernet device.
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * - 0: Success. Callback was removed.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback is
+ * NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+#endif /* RTE_ETHDEV_RXTX_CALLBACKS */
Please avoid #ifdef around function declarations.

Thanks
Mcnamara, John
2015-02-23 17:27:30 UTC
Permalink
Post by Vithal S Mohare
-----Original Message-----
Sent: Monday, February 23, 2015 3:12 PM
To: Mcnamara, John
Subject: Re: [PATCH v5 2/3] ethdev: add optional rxtx callback support
Post by John McNamara
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param) {
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
Why not putting #ifdef only here and return an error ENOTSUP?
Hi Thomas,

That would probably be cleaner/clearer. I'll rework this patch with your suggestions.

John
--
John McNamara
2015-02-20 17:03:45 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The 'callbacks' member of the rte_eth_dev structure has been renamed
to 'link_intr_cbs' to make it clear that it refers to callbacks from
NIC interrupts. This allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
lib/librte_pmd_ring/rte_eth_ring.c | 2 +-
5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..eb75846 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 17be2f3..7c4e772 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/*
* Set the default MTU.
@@ -2743,7 +2743,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2757,7 +2757,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2784,7 +2784,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2798,7 +2798,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2817,7 +2817,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 6e454e8..48e4ac9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1539,7 +1539,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..077cb73 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;

diff --git a/lib/librte_pmd_ring/rte_eth_ring.c b/lib/librte_pmd_ring/rte_eth_ring.c
index a23e933..a5dc71e 100644
--- a/lib/librte_pmd_ring/rte_eth_ring.c
+++ b/lib/librte_pmd_ring/rte_eth_ring.c
@@ -340,7 +340,7 @@ rte_eth_from_rings(const char *name, struct rte_ring *const rx_queues[],
eth_dev->driver = eth_drv;
eth_dev->dev_ops = &ops;
eth_dev->pci_dev = pci_dev;
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/* finally assign rx and tx ops */
eth_dev->rx_pkt_burst = eth_ring_rx;
--
1.7.4.1
John McNamara
2015-02-23 18:30:08 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

The 'callbacks' member of the rte_eth_dev structure has been renamed
to 'link_intr_cbs' to make it clear that it refers to callbacks from
NIC interrupts. This allows us to add other types of callbacks to
the structure without ambiguity.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
app/test/virtual_pmd.c | 2 +-
lib/librte_ether/rte_ethdev.c | 12 ++++++------
lib/librte_ether/rte_ethdev.h | 2 +-
lib/librte_pmd_bond/rte_eth_bond_api.c | 2 +-
lib/librte_pmd_ring/rte_eth_ring.c | 2 +-
5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9fac95d..eb75846 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -576,7 +576,7 @@ virtual_ethdev_create(const char *name, struct ether_addr *mac_addr,
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;
eth_dev->data->dev_link.link_speed = ETH_LINK_SPEED_10000;
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 17be2f3..7c4e772 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -265,7 +265,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_dev->data->rx_mbuf_alloc_failed = 0;

/* init user callbacks */
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/*
* Set the default MTU.
@@ -2743,7 +2743,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
dev = &rte_eth_devices[port_id];
rte_spinlock_lock(&rte_eth_dev_cb_lock);

- TAILQ_FOREACH(user_cb, &(dev->callbacks), next) {
+ TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
if (user_cb->cb_fn == cb_fn &&
user_cb->cb_arg == cb_arg &&
user_cb->event == event) {
@@ -2757,7 +2757,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
user_cb->cb_fn = cb_fn;
user_cb->cb_arg = cb_arg;
user_cb->event = event;
- TAILQ_INSERT_TAIL(&(dev->callbacks), user_cb, next);
+ TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
}

rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2784,7 +2784,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
rte_spinlock_lock(&rte_eth_dev_cb_lock);

ret = 0;
- for (cb = TAILQ_FIRST(&dev->callbacks); cb != NULL; cb = next) {
+ for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {

next = TAILQ_NEXT(cb, next);

@@ -2798,7 +2798,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
* then remove it.
*/
if (cb->active == 0) {
- TAILQ_REMOVE(&(dev->callbacks), cb, next);
+ TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
rte_free(cb);
} else {
ret = -EAGAIN;
@@ -2817,7 +2817,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
struct rte_eth_dev_callback dev_cb;

rte_spinlock_lock(&rte_eth_dev_cb_lock);
- TAILQ_FOREACH(cb_lst, &(dev->callbacks), next) {
+ TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
if (cb_lst->cb_fn == NULL || cb_lst->event != event)
continue;
dev_cb = *cb_lst;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 6e454e8..48e4ac9 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1539,7 +1539,7 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
+ struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
};

struct rte_eth_dev_sriov {
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index 4ab3267..077cb73 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -251,7 +251,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->data->nb_rx_queues = (uint16_t)1;
eth_dev->data->nb_tx_queues = (uint16_t)1;

- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

eth_dev->data->dev_link.link_status = 0;

diff --git a/lib/librte_pmd_ring/rte_eth_ring.c b/lib/librte_pmd_ring/rte_eth_ring.c
index a23e933..a5dc71e 100644
--- a/lib/librte_pmd_ring/rte_eth_ring.c
+++ b/lib/librte_pmd_ring/rte_eth_ring.c
@@ -340,7 +340,7 @@ rte_eth_from_rings(const char *name, struct rte_ring *const rx_queues[],
eth_dev->driver = eth_drv;
eth_dev->dev_ops = &ops;
eth_dev->pci_dev = pci_dev;
- TAILQ_INIT(&(eth_dev->callbacks));
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));

/* finally assign rx and tx ops */
eth_dev->rx_pkt_burst = eth_ring_rx;
--
1.7.4.1
John McNamara
2015-02-23 18:30:09 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Add optional support for inline processing of packets inside the RX
or TX call. For an RX callback, what happens is that we get a set of
packets from the NIC and then pass them to a callback function, if
configured, to allow additional processing to be done on them, e.g.
filling in more mbuf fields, before passing back to the application.
On TX, the packets are similarly post-processed before being handed
to the NIC for transmission.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
config/common_bsdapp | 1 +
config/common_linuxapp | 1 +
lib/librte_ether/rte_ethdev.c | 204 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ethdev.h | 203 +++++++++++++++++++++++++++++++-
lib/librte_ether/rte_ether_version.map | 4 +
5 files changed, 407 insertions(+), 6 deletions(-)

diff --git a/config/common_bsdapp b/config/common_bsdapp
index f11ff39..6fc7074 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -133,6 +133,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

#
# Support NIC bypass logic
diff --git a/config/common_linuxapp b/config/common_linuxapp
index f921d8c..c0ef9d6 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -131,6 +131,7 @@ CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n
CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
+CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

#
# Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 7c4e772..9bba89d 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -337,6 +337,20 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_rx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_zmalloc(
+ "ethdev->post_rx_burst_cbs",
+ sizeof(*dev->post_rx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL) {
+ rte_free(dev->data->rx_queues);
+ dev->data->rx_queues = NULL;
+ dev->data->nb_rx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);

@@ -349,9 +363,25 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (rxq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->post_rx_burst_cbs = rte_realloc(
+ dev->post_rx_burst_cbs,
+ sizeof(*dev->post_rx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->post_rx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(rxq + old_nb_queues, 0,
- sizeof(rxq[0]) * (nb_queues - old_nb_queues));
+ sizeof(rxq[0]) * new_qs);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->post_rx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->post_rx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->rx_queues = rxq;

@@ -479,6 +509,20 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = 0;
return -(ENOMEM);
}
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_zmalloc(
+ "ethdev->pre_tx_burst_cbs",
+ sizeof(*dev->pre_tx_burst_cbs) * nb_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL) {
+ rte_free(dev->data->tx_queues);
+ dev->data->tx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
+ return -(ENOMEM);
+ }
+#endif
+
} else { /* re-configure */
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);

@@ -491,9 +535,25 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
if (txq == NULL)
return -(ENOMEM);

- if (nb_queues > old_nb_queues)
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ dev->pre_tx_burst_cbs = rte_realloc(
+ dev->pre_tx_burst_cbs,
+ sizeof(*dev->pre_tx_burst_cbs) *
+ nb_queues, RTE_CACHE_LINE_SIZE);
+ if (dev->pre_tx_burst_cbs == NULL)
+ return -(ENOMEM);
+#endif
+
+ if (nb_queues > old_nb_queues) {
+ uint16_t new_qs = nb_queues - old_nb_queues;
memset(txq + old_nb_queues, 0,
- sizeof(txq[0]) * (nb_queues - old_nb_queues));
+ sizeof(txq[0]) * new_qs);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ memset(dev->pre_tx_burst_cbs + old_nb_queues, 0,
+ sizeof(dev->pre_tx_burst_cbs[0]) * new_qs);
+#endif
+ }

dev->data->tx_queues = txq;

@@ -3258,3 +3318,139 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+ rte_errno = ENOTSUP;
+ return NULL;
+#endif
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+ rte_errno = ENOTSUP;
+ return NULL;
+#endif
+ /* check input parameters */
+ if (port_id >= nb_ports || fn == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+ if (cb == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->param = user_param;
+ cb->next = rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+ rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+ return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+ return (-ENOTSUP);
+#endif
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+ return (-ENOTSUP);
+#endif
+ /* Check input parameters. */
+ if (port_id >= nb_ports || user_cb == NULL ||
+ queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+ return (-EINVAL);
+ }
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+ struct rte_eth_rxtx_callback *prev_cb;
+
+ /* Reset head pointer and remove user cb if first in the list. */
+ if (cb == user_cb) {
+ dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+ return 0;
+ }
+
+ /* Remove the user cb from the callback list. */
+ do {
+ prev_cb = cb;
+ cb = cb->next;
+
+ if (cb == user_cb) {
+ prev_cb->next = user_cb->next;
+ return 0;
+ }
+
+ } while (cb != NULL);
+
+ /* Callback wasn't found. */
+ return (-EINVAL);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48e4ac9..d6d3a05 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1523,6 +1523,47 @@ struct eth_dev_ops {
};

/**
+ * Function type used for callbacks for processing packets on RX and TX
+ *
+ * If configured for RX, it is called with a burst of packets that have just
+ * been received on the given port and queue. On TX, it is called with a burst
+ * of packets immediately before those packets are put onto the hardware queue
+ * for transmission.
+ *
+ * @param port
+ * The ethernet port on which rx or tx is being performed
+ * @param queue
+ * The queue on the ethernet port which is being used to receive or transmit
+ * the packets.
+ * @param pkts
+ * The burst of packets on which processing is to be done. On RX, these
+ * packets have just been received. On TX, they are about to be transmitted.
+ * @param nb_pkts
+ * The number of packets in the burst pointed to by "pkts"
+ * @param user_param
+ * The arbitrary user parameter passed in by the application when the callback
+ * was originally configured.
+ * @return
+ * The number of packets remaining in pkts are processing.
+ * * On RX, this will be returned to the user as the return value from
+ * rte_eth_rx_burst.
+ * * On TX, this will be the number of packets actually written to the NIC.
+ */
+typedef uint16_t (*rte_rxtx_callback_fn)(uint8_t port, uint16_t queue,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+ struct rte_eth_rxtx_callback *next;
+ rte_rxtx_callback_fn fn;
+ void *param;
+};
+
+/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1539,7 +1580,20 @@ struct rte_eth_dev {
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
- struct rte_eth_dev_cb_list link_intr_cbs; /**< User application callbacks on interrupt*/
+ /** User application callbacks for NIC interrupts */
+ struct rte_eth_dev_cb_list link_intr_cbs;
+
+ /**
+ * User-supplied functions called from rx_burst to post-process
+ * received packets before passing them to the user
+ */
+ struct rte_eth_rxtx_callback **post_rx_burst_cbs;
+
+ /**
+ * User-supplied functions called from tx_burst to pre-process
+ * received packets before passing them to the driver for transmission.
+ */
+ struct rte_eth_rxtx_callback **pre_tx_burst_cbs;
};

struct rte_eth_dev_sriov {
@@ -2393,7 +2447,23 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
- return (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);
+
+ nb_pkts = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts,
+ nb_pkts);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, rx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
+
+ return nb_pkts;
}
#endif

@@ -2520,6 +2590,19 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev *dev;

dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+ struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+ if (unlikely(cb != NULL)) {
+ do {
+ nb_pkts = cb->fn(port_id, queue_id, tx_pkts, nb_pkts,
+ cb->param);
+ cb = cb->next;
+ } while (cb != NULL);
+ }
+#endif
+
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
#endif
@@ -3667,6 +3750,122 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);

+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ * The callback function
+ * @param user_param
+ * A generic pointer parameter which will be passed to each invocation of the
+ * callback function on this port and queue.
+ *
+ * @return
+ * NULL on error.
+ * On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+ rte_rxtx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -ENOTSUP: Callback support is not available.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback
+ * is NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ * callbacks are in flight e.g. if called from the thread doing RX/TX
+ * on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ * in-flight callbacks to complete.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ * User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ * - 0: Success. Callback was removed.
+ * - -ENOTSUP: Callback support is not available.
+ * - -EINVAL: The port_id or the queue_id is out of range, or the callback
+ * is NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+ struct rte_eth_rxtx_callback *user_cb);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 7316530..3227cda 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -2,6 +2,8 @@ DPDK_2.0 {
global:

_rte_eth_dev_callback_process;
+ rte_eth_add_rx_callback;
+ rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
rte_eth_allmulticast_enable;
rte_eth_allmulticast_get;
@@ -96,6 +98,8 @@ DPDK_2.0 {
rte_eth_promiscuous_disable;
rte_eth_promiscuous_enable;
rte_eth_promiscuous_get;
+ rte_eth_remove_rx_callback;
+ rte_eth_remove_tx_callback;
rte_eth_rx_burst;
rte_eth_rx_descriptor_done;
rte_eth_rx_queue_count;
--
1.7.4.1
John McNamara
2015-02-23 18:30:10 UTC
Permalink
From: Richardson, Bruce <***@intel.com>

Example showing how callbacks can be used to insert a timestamp
into each packet on RX. On TX the timestamp is used to calculate
the packet latency through the app, in cycles.

Signed-off-by: Bruce Richardson <***@intel.com>
Signed-off-by: John McNamara <***@intel.com>
---
MAINTAINERS | 4 +
examples/Makefile | 1 +
examples/rxtx_callbacks/Makefile | 57 ++++++++++
examples/rxtx_callbacks/main.c | 228 ++++++++++++++++++++++++++++++++++++++
4 files changed, 290 insertions(+), 0 deletions(-)
create mode 100644 examples/rxtx_callbacks/Makefile
create mode 100644 examples/rxtx_callbacks/main.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ac6d59..dcca441 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -432,6 +432,10 @@ F: doc/guides/sample_app_ug/netmap_compatibility.rst
F: examples/quota_watermark/
F: doc/guides/sample_app_ug/quota_watermark.rst

+M: Bruce Richardson <***@intel.com>
+M: John McNamara <***@intel.com>
+F: examples/rxtx_callbacks/
+
F: examples/skeleton/

F: examples/vmdq/
diff --git a/examples/Makefile b/examples/Makefile
index 095bad2..3d191a6 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -63,6 +63,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += packet_ordering
DIRS-$(CONFIG_RTE_LIBRTE_METER) += qos_meter
DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += qos_sched
DIRS-y += quota_watermark
+DIRS-$(CONFIG_RTE_ETHDEV_RXTX_CALLBACKS) += rxtx_callbacks
DIRS-y += skeleton
DIRS-y += timer
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost
diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 0000000..0fafbb7
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = rxtx_callbacks
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c
new file mode 100644
index 0000000..9e5e68e
--- /dev/null
+++ b/examples/rxtx_callbacks/main.c
@@ -0,0 +1,228 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_SIZE (1600 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - "
+ "App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
--
1.7.4.1
Thomas Monjalon
2015-02-23 23:39:58 UTC
Permalink
Post by John McNamara
This patchset is for a small optional addition to the ethdev library,
to add support for callbacks at the RX and TX stages. This allows
packet processing to be done on packets before they get returned
to applications using rte_eth_rx_burst call.
http://dpdk.org/ml/archives/dev/2014-December/010491.html
For this version we spent some time investigating Stephen Hemminger's
suggestion of using the userspace RCU (read-copy-update) library for
http://urcu.so/
The default liburcu (which defaulted to liburcu-mb) requires the least
interaction from the end user but showed a 25% drop in packet throughput
in the callback sample app.
The liburcu-qsbr (quiescent state) variant showed a 1% drop in packet
throughput in the callback sample app. However it requires registered
RCU threads in the program to periodically announce quiescent states.
This makes it more difficult to implement for end user applications.
For this release we will document that adding and removing callbacks
is not thread safe.
Note: Sample application documentation to follow in a patch update.
* RX/TX callback functions are no longer #ifdefed out if callback
option is off. Instead they return ENOTSUP.
* Simplified callbacks #ifdefs in rte_ethdev.
* Turned the callback feature on by default.
* Simplified #define name.
* Made the callback feature a compile time option.
* Removed unnecessary header file from example folder
(which included baremetal reference).
* Renamed the interrupt, RX and TX callbacks to make their function
clearer (using the names suggested in the mailing list comments).
* Squashed ABI version update into the commit it relates to.
* Fixed various checkpatch warnings.
* Added ABI versioning.
* Doxygen clarifications.
* Added callback removal functions.
* Minor fixes.
ethdev: rename callbacks field to link_intr_cbs
ethdev: add optional rxtx callback support
examples: example showing use of callbacks.
Applied, thanks

Continue reading on narkive:
Loading...