Discussion:
[dpdk-dev] [PATCH 0/7] Hyper-v driver and infrastructure
(too old to reply)
Stephen Hemminger
2015-02-05 01:13:22 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

This patch series contains the necessary pieces to implement
a poll mode driver for the Hyper-V virtual network interface.

This device driver was developed from the FreeBSD code.
And is released by Brocade under the same license as the rest
of the DPDK. I.e the PMD is BSD and kernel parts are GPL.

The code was developed on previous version of DPDK (1.6)
and has been ported to current version. But it has not been
tested on the current version yet.

Stephen Hemminger (7):
ether: add function to query for link state interrupt
pmd: change drivers initialization for pci
hv: add basic vmbus support
hv: uio driver
hv: poll mode driver
hv: enable driver in common config
hv: add kernel patch

config/common_linuxapp | 9 +
lib/Makefile | 1 +
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/eal_common_options.c | 5 +
lib/librte_eal/common/eal_internal_cfg.h | 1 +
lib/librte_eal/common/eal_options.h | 2 +
lib/librte_eal/common/eal_private.h | 10 +
lib/librte_eal/common/include/rte_pci.h | 2 +
lib/librte_eal/common/include/rte_vmbus.h | 153 ++
lib/librte_eal/linuxapp/Makefile | 3 +
lib/librte_eal/linuxapp/eal/Makefile | 3 +
lib/librte_eal/linuxapp/eal/eal.c | 5 +
lib/librte_eal/linuxapp/eal/eal_vmbus.c | 658 ++++++++
lib/librte_eal/linuxapp/hv_uio/Makefile | 57 +
lib/librte_eal/linuxapp/hv_uio/hv_uio.c | 551 +++++++
lib/librte_eal/linuxapp/hv_uio/hyperv_net.h | 907 +++++++++++
.../linuxapp/hv_uio/vmbus-get-pages.patch | 55 +
lib/librte_ether/rte_ethdev.c | 98 +-
lib/librte_ether/rte_ethdev.h | 27 +-
lib/librte_pmd_e1000/em_ethdev.c | 2 +-
lib/librte_pmd_e1000/igb_ethdev.c | 4 +-
lib/librte_pmd_enic/enic_ethdev.c | 2 +-
lib/librte_pmd_hyperv/Makefile | 28 +
lib/librte_pmd_hyperv/hyperv.h | 169 ++
lib/librte_pmd_hyperv/hyperv_drv.c | 1660 ++++++++++++++++++++
lib/librte_pmd_hyperv/hyperv_drv.h | 558 +++++++
lib/librte_pmd_hyperv/hyperv_ethdev.c | 334 ++++
lib/librte_pmd_hyperv/hyperv_logs.h | 68 +
lib/librte_pmd_hyperv/hyperv_rxtx.c | 402 +++++
lib/librte_pmd_hyperv/hyperv_rxtx.h | 35 +
lib/librte_pmd_i40e/i40e_ethdev.c | 2 +-
lib/librte_pmd_i40e/i40e_ethdev_vf.c | 2 +-
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 4 +-
lib/librte_pmd_virtio/virtio_ethdev.c | 2 +-
lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c | 2 +-
mk/rte.app.mk | 4 +
36 files changed, 5809 insertions(+), 18 deletions(-)
create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
create mode 100644 lib/librte_eal/linuxapp/hv_uio/Makefile
create mode 100644 lib/librte_eal/linuxapp/hv_uio/hv_uio.c
create mode 100644 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h
create mode 100644 lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
create mode 100644 lib/librte_pmd_hyperv/Makefile
create mode 100644 lib/librte_pmd_hyperv/hyperv.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h
--
2.1.4
Stephen Hemminger
2015-02-05 01:13:23 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

Allow application to query whether link state will work.
This is also part of abstracting dependency on PCI.

Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
lib/librte_ether/rte_ethdev.c | 14 ++++++++++++++
lib/librte_ether/rte_ethdev.h | 12 ++++++++++++
2 files changed, 26 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..4d803d0 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1264,6 +1264,20 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
}
}

+int
+rte_eth_has_link_state(uint8_t port_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return 0;
+ }
+ dev = &rte_eth_devices[port_id];
+
+ return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
+}
+
void
rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9d43ca3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2038,6 +2038,18 @@ extern void rte_eth_link_get_nowait(uint8_t port_id,
struct rte_eth_link *link);

/**
+ * Test whether device supports link state interrupt mode.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (1) if link state interrupt is supported
+ * - (0) if link state interrupt is not supported
+ */
+extern int
+rte_eth_has_link_state(uint8_t port_id);
+
+/**
* Retrieve the general I/O statistics of an Ethernet device.
*
* @param port_id
--
2.1.4
Neil Horman
2015-02-05 01:47:37 UTC
Permalink
Post by Stephen Hemminger
Allow application to query whether link state will work.
This is also part of abstracting dependency on PCI.
---
lib/librte_ether/rte_ethdev.c | 14 ++++++++++++++
lib/librte_ether/rte_ethdev.h | 12 ++++++++++++
2 files changed, 26 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..4d803d0 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1264,6 +1264,20 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
}
}
+int
+rte_eth_has_link_state(uint8_t port_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return 0;
+ }
+ dev = &rte_eth_devices[port_id];
+
+ return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
+}
+
void
rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9d43ca3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2038,6 +2038,18 @@ extern void rte_eth_link_get_nowait(uint8_t port_id,
struct rte_eth_link *link);
/**
+ * Test whether device supports link state interrupt mode.
+ *
+ * The port identifier of the Ethernet device.
+ * - (1) if link state interrupt is supported
+ * - (0) if link state interrupt is not supported
+ */
+extern int
+rte_eth_has_link_state(uint8_t port_id);
+
+/**
* Retrieve the general I/O statistics of an Ethernet device.
*
--
2.1.4
I think if your intention is to expose this as an application (or other library
visible api), you'll need to add it to the appropriate version map file for the
link now. Should just be a single line addition

Regards
Neil
Stephen Hemminger
2015-02-05 05:49:07 UTC
Permalink
On Thu, 5 Feb 2015 01:47:37 +0000
Post by Neil Horman
Post by Stephen Hemminger
Allow application to query whether link state will work.
This is also part of abstracting dependency on PCI.
---
lib/librte_ether/rte_ethdev.c | 14 ++++++++++++++
lib/librte_ether/rte_ethdev.h | 12 ++++++++++++
2 files changed, 26 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..4d803d0 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1264,6 +1264,20 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
}
}
+int
+rte_eth_has_link_state(uint8_t port_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return 0;
+ }
+ dev = &rte_eth_devices[port_id];
+
+ return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
+}
+
void
rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9d43ca3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2038,6 +2038,18 @@ extern void rte_eth_link_get_nowait(uint8_t port_id,
struct rte_eth_link *link);
/**
+ * Test whether device supports link state interrupt mode.
+ *
+ * The port identifier of the Ethernet device.
+ * - (1) if link state interrupt is supported
+ * - (0) if link state interrupt is not supported
+ */
+extern int
+rte_eth_has_link_state(uint8_t port_id);
+
+/**
* Retrieve the general I/O statistics of an Ethernet device.
*
--
2.1.4
I think if your intention is to expose this as an application (or other library
visible api), you'll need to add it to the appropriate version map file for the
link now. Should just be a single line addition
Regards
Neil
Will add that after other changes are approved.
Also looking at the map files they seem to have been auto generated
because some symbols are exposed which are unnecessary (example pci_driver_list)
Neil Horman
2015-02-05 12:39:50 UTC
Permalink
Post by Stephen Hemminger
On Thu, 5 Feb 2015 01:47:37 +0000
Post by Neil Horman
Post by Stephen Hemminger
Allow application to query whether link state will work.
This is also part of abstracting dependency on PCI.
---
lib/librte_ether/rte_ethdev.c | 14 ++++++++++++++
lib/librte_ether/rte_ethdev.h | 12 ++++++++++++
2 files changed, 26 insertions(+)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index ea3a1fb..4d803d0 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1264,6 +1264,20 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
}
}
+int
+rte_eth_has_link_state(uint8_t port_id)
+{
+ struct rte_eth_dev *dev;
+
+ if (port_id >= nb_ports) {
+ PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+ return 0;
+ }
+ dev = &rte_eth_devices[port_id];
+
+ return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
+}
+
void
rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
{
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 1200c1c..9d43ca3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2038,6 +2038,18 @@ extern void rte_eth_link_get_nowait(uint8_t port_id,
struct rte_eth_link *link);
/**
+ * Test whether device supports link state interrupt mode.
+ *
+ * The port identifier of the Ethernet device.
+ * - (1) if link state interrupt is supported
+ * - (0) if link state interrupt is not supported
+ */
+extern int
+rte_eth_has_link_state(uint8_t port_id);
+
+/**
* Retrieve the general I/O statistics of an Ethernet device.
*
--
2.1.4
I think if your intention is to expose this as an application (or other library
visible api), you'll need to add it to the appropriate version map file for the
link now. Should just be a single line addition
Regards
Neil
Will add that after other changes are approved.
Also looking at the map files they seem to have been auto generated
because some symbols are exposed which are unnecessary (example pci_driver_list)
It wasn't quite auto generated. It was generated based on what the requisite
header files for a library exposed to the various applications that used them.
In your example above pci_driver_list is a global public variable, that is
referenced by applications that the DPDK ships (in this case the test_pci
application). I completely agree that it shouldn't need to be versioned, as it
shouldn't be exposed at all, relying instead on accessor functions to reach it,
but as such, it is what it is. By versioning it, we at least allow ourselves
the option to maintain backwards compatibility if the list type changes or we
wish to start hiding the symbol in future versions.

Neil
Stephen Hemminger
2015-02-05 01:13:24 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

The change to generic ether device structure to support multiple
bus types requires a change to all existing PMD but only in the
initialization (and the change is backwards compatiable).

Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
lib/librte_pmd_e1000/em_ethdev.c | 2 +-
lib/librte_pmd_e1000/igb_ethdev.c | 4 ++--
lib/librte_pmd_enic/enic_ethdev.c | 2 +-
lib/librte_pmd_i40e/i40e_ethdev.c | 2 +-
lib/librte_pmd_i40e/i40e_ethdev_vf.c | 2 +-
lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 4 ++--
lib/librte_pmd_virtio/virtio_ethdev.c | 2 +-
lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c | 2 +-
8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c
index 3f2897e..15f4647 100644
--- a/lib/librte_pmd_e1000/em_ethdev.c
+++ b/lib/librte_pmd_e1000/em_ethdev.c
@@ -283,7 +283,7 @@ eth_em_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
}

static struct eth_driver rte_em_pmd = {
- {
+ .pci_drv = {
.name = "rte_em_pmd",
.id_table = pci_id_em_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 2a268b8..0901ffa 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -682,7 +682,7 @@ eth_igbvf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
}

static struct eth_driver rte_igb_pmd = {
- {
+ .pci_drv = {
.name = "rte_igb_pmd",
.id_table = pci_id_igb_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
@@ -695,7 +695,7 @@ static struct eth_driver rte_igb_pmd = {
* virtual function driver struct
*/
static struct eth_driver rte_igbvf_pmd = {
- {
+ .pci_drv = {
.name = "rte_igbvf_pmd",
.id_table = pci_id_igbvf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_enic/enic_ethdev.c b/lib/librte_pmd_enic/enic_ethdev.c
index 9cb6666..1e751b4 100644
--- a/lib/librte_pmd_enic/enic_ethdev.c
+++ b/lib/librte_pmd_enic/enic_ethdev.c
@@ -581,7 +581,7 @@ static int eth_enicpmd_dev_init(
}

static struct eth_driver rte_enic_pmd = {
- {
+ .pci_drv = {
.name = "rte_enic_pmd",
.id_table = pci_id_enic_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c
index 9fa6bec..4716413 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -266,7 +266,7 @@ static struct eth_dev_ops i40e_eth_dev_ops = {
};

static struct eth_driver rte_i40e_pmd = {
- {
+ .pci_drv = {
.name = "rte_i40e_pmd",
.id_table = pci_id_i40e_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
diff --git a/lib/librte_pmd_i40e/i40e_ethdev_vf.c b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
index 7d8d8ef..505cd43 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev_vf.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
@@ -1203,7 +1203,7 @@ i40evf_dev_init(__rte_unused struct eth_driver *eth_drv,
* virtual function driver struct
*/
static struct eth_driver rte_i40evf_pmd = {
- {
+ .pci_drv = {
.name = "rte_i40evf_pmd",
.id_table = pci_id_i40evf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index b341dd0..f21aeb3 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -1092,7 +1092,7 @@ eth_ixgbevf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
}

static struct eth_driver rte_ixgbe_pmd = {
- {
+ .pci_drv = {
.name = "rte_ixgbe_pmd",
.id_table = pci_id_ixgbe_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
@@ -1105,7 +1105,7 @@ static struct eth_driver rte_ixgbe_pmd = {
* virtual function driver struct
*/
static struct eth_driver rte_ixgbevf_pmd = {
- {
+ .pci_drv = {
.name = "rte_ixgbevf_pmd",
.id_table = pci_id_ixgbevf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
index b3b5bb6..a6c3edc 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -962,7 +962,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
}

static struct eth_driver rte_virtio_pmd = {
- {
+ .pci_drv = {
.name = "rte_virtio_pmd",
.id_table = pci_id_virtio_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c b/lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c
index ef0af16..7ba41d8 100644
--- a/lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c
+++ b/lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c
@@ -264,7 +264,7 @@ eth_vmxnet3_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
}

static struct eth_driver rte_vmxnet3_pmd = {
- {
+ .pci_drv = {
.name = "rte_vmxnet3_pmd",
.id_table = pci_id_vmxnet3_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
--
2.1.4
Stephen Hemminger
2015-02-05 01:13:25 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.

As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.

Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/eal_common_options.c | 5 +
lib/librte_eal/common/eal_internal_cfg.h | 1 +
lib/librte_eal/common/eal_options.h | 2 +
lib/librte_eal/common/eal_private.h | 10 +
lib/librte_eal/common/include/rte_pci.h | 2 +
lib/librte_eal/common/include/rte_vmbus.h | 153 +++++++
lib/librte_eal/linuxapp/eal/Makefile | 3 +
lib/librte_eal/linuxapp/eal/eal.c | 5 +
lib/librte_eal/linuxapp/eal/eal_vmbus.c | 658 +++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 84 +++-
lib/librte_ether/rte_ethdev.h | 15 +-
12 files changed, 932 insertions(+), 8 deletions(-)
create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 52c1a5f..f4326e9 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk

INC := rte_branch_prediction.h rte_common.h
INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h rte_tailq_elem.h
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 67e02dc..b254b83 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -73,6 +73,7 @@ eal_long_options[] = {
{OPT_NO_HPET, 0, 0, OPT_NO_HPET_NUM},
{OPT_VMWARE_TSC_MAP, 0, 0, OPT_VMWARE_TSC_MAP_NUM},
{OPT_NO_PCI, 0, 0, OPT_NO_PCI_NUM},
+ {OPT_NO_VMBUS, 0, 0, OPT_NO_VMBUS_NUM},
{OPT_NO_HUGE, 0, 0, OPT_NO_HUGE_NUM},
{OPT_FILE_PREFIX, 1, 0, OPT_FILE_PREFIX_NUM},
{OPT_SOCKET_MEM, 1, 0, OPT_SOCKET_MEM_NUM},
@@ -441,6 +442,10 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_pci = 1;
break;

+ case OPT_NO_VMBUS_NUM:
+ conf->no_vmbus = 1;
+ break;
+
case OPT_NO_HPET_NUM:
conf->no_hpet = 1;
break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
+ volatile unsigned no_vmbus; /**< true to disable VMBUS */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e476f8d..b6075b9 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -57,6 +57,8 @@ enum {
OPT_VMWARE_TSC_MAP_NUM,
#define OPT_NO_PCI "no-pci"
OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS "no-vmbus"
+ OPT_NO_VMBUS_NUM,
#define OPT_NO_HUGE "no-huge"
OPT_NO_HUGE_NUM,
#define OPT_FILE_PREFIX "file-prefix"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 159cd66..af559a4 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -165,6 +165,16 @@ int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev);

/**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+ struct rte_vmbus_device *dev);
+/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
*
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 66ed793..0ede642 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -199,6 +199,8 @@ struct rte_pci_driver {
#define RTE_PCI_DRV_FORCE_UNBIND 0x0004
/** Device driver supports link state interrupt */
#define RTE_PCI_DRV_INTR_LSC 0x0008
+/** Device driver needs VMBUS */
+#define RTE_PCI_DRV_NEED_HV_UIO 0x0010

/**< Internal use only - Macro used by pci addr parsing functions **/
#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
new file mode 100644
index 0000000..2742cb1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vmbus.h
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+
+#include <sys/queue.h>
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+/** Formatting string for VMBUS device identifier: Ex: vmbus_0_9 */
+#define VMBUS_PRI_FMT "vmbus_0_%u"
+
+#define VMBUS_ID_ANY 0xFFFF
+
+#define VMBUS_NETWORK_DEVICE "{f8615163-df3e-46c5-913f-f2d2f965ed0e}"
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing an ID for a VMBUS driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_vmbus_id {
+ uint16_t device_id; /**< VMBUS Device ID */
+ uint16_t sysfs_num; /**< vmbus_0_X */
+};
+
+/**
+ * A structure describing a VMBUS memory resource.
+ */
+struct rte_vmbus_resource {
+ uint64_t phys_addr; /**< Physical address, 0 if no resource. */
+ uint64_t len; /**< Length of the resource. */
+ void *addr; /**< Virtual address, NULL when not mapped. */
+};
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+ TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */
+ struct rte_vmbus_id id; /**< VMBUS ID. */
+ const struct rte_vmbus_driver *driver; /**< Associated driver */
+ int numa_node; /**< NUMA node connection */
+ unsigned int blacklisted:1; /**< Device is blacklisted */
+ struct rte_vmbus_resource mem_resource[VMBUS_MAX_RESOURCE]; /**< VMBUS Memory Resource */
+ uint32_t vmbus_monitor_id; /**< VMBus monitor ID for device */
+ int uio_fd; /** UIO device file descriptor */
+};
+
+/** Macro used to help building up tables of device IDs */
+#define RTE_VMBUS_DEVICE(dev) \
+ .device_id = (dev)
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_devinit_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+ TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
+ const char *name; /**< Driver name. */
+ vmbus_devinit_t *devinit; /**< Device init. function. */
+ struct rte_vmbus_id *id_table; /**< ID table, NULL terminated. */
+ uint32_t drv_flags; /**< Flags contolling handling of device. */
+ const char *module_name; /**< Associated kernel module */
+};
+
+/**
+ * Probe the VMBUS device for registered drivers.
+ *
+ * Scan the content of the vmbus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Dump the content of the vmbus.
+ */
+void rte_eal_vmbus_dump(void);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+int vmbus_uio_map_resource(struct rte_vmbus_device *dev);
+
+#endif /* _RTE_VMBUS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index e117cec..e4727fb 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c
+endif

# from common dir
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index f99e158..b750fd8 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -755,6 +755,11 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");

+#ifdef RTE_LIBRTE_HV_PMD
+ if (rte_eal_vmbus_init() < 0)
+ RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_init() < 0)
rte_panic("Cannot init IVSHMEM\n");
diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
new file mode 100644
index 0000000..8f87679
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
@@ -0,0 +1,658 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_vmbus.h>
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#define PROC_MODULES "/proc/modules"
+#define VMBUS_DRV_PATH "/sys/bus/vmbus/drivers/%s"
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device); /**< VMBUS devices in D-linked Q. */
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver); /**< VMBUS drivers in D-linked Q. */
+
+static struct vmbus_driver_list vmbus_driver_list =
+ TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+static struct vmbus_device_list vmbus_device_list =
+ TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+struct uio_map {
+ void *addr;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct uio_resource {
+ TAILQ_ENTRY(uio_resource) next;
+
+ struct rte_vmbus_id vmbus_addr;
+ char path[PATH_MAX];
+ size_t nb_maps;
+ struct uio_map maps[VMBUS_MAX_RESOURCE];
+};
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoull(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+vmbus_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
+{
+ size_t i;
+ char dirname[PATH_MAX];
+ char filename[PATH_MAX];
+ uint64_t offset, size;
+
+ for (i = 0; i != nb_maps; i++) {
+
+ /* check if map directory exists */
+ snprintf(dirname, sizeof(dirname),
+ "%s/maps/map%zu", devname, i);
+
+ RTE_LOG(DEBUG, EAL, "Scanning maps in %s\n", (char *)dirname);
+
+ if (access(dirname, F_OK) != 0)
+ break;
+
+ /* get mapping offset */
+ snprintf(filename, sizeof(filename),
+ "%s/offset", dirname);
+ if (vmbus_parse_sysfs_value(filename, &offset) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot parse offset of %s\n",
+ __func__, dirname);
+ return -1;
+ }
+
+ /* get mapping size */
+ snprintf(filename, sizeof(filename),
+ "%s/size", dirname);
+ if (vmbus_parse_sysfs_value(filename, &size) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot parse size of %s\n",
+ __func__, dirname);
+ return -1;
+ }
+
+ /* get mapping physical address */
+ snprintf(filename, sizeof(filename),
+ "%s/addr", dirname);
+ if (vmbus_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot parse addr of %s\n",
+ __func__, dirname);
+ return -1;
+ }
+
+ if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): offset/size exceed system max value\n",
+ __func__);
+ return -1;
+ }
+
+ maps[i].offset = offset;
+ maps[i].size = size;
+ }
+ return i;
+}
+
+/* maximum time to wait that /dev/uioX appears */
+#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(struct rte_vmbus_device *dev, void *requested_addr,
+ const char *devname, off_t offset, size_t size)
+{
+ int fd;
+ void *mapaddr;
+
+ if (dev->uio_fd <= 0) {
+#ifdef RTE_EAL_UNBIND_PORTS
+ /*
+ * open devname, and mmap it: it can take some time to
+ * appear, so we wait some time before returning an error
+ */
+ unsigned n;
+ fd = -1;
+ for (n = 0; n < UIO_DEV_WAIT_TIMEOUT*10 && fd < 0; n++) {
+ errno = 0;
+ fd = open(devname, O_RDWR);
+ if (fd < 0 && errno != ENOENT)
+ break;
+ usleep(100000);
+ }
+#else
+ /*
+ * open devname, to mmap it
+ */
+ fd = open(devname, O_RDWR);
+#endif
+ } else
+ fd = dev->uio_fd;
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto fail;
+ }
+
+ dev->uio_fd = fd;
+ /* Map the memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, offset);
+ if (mapaddr == MAP_FAILED ||
+ (requested_addr != NULL && mapaddr != requested_addr)) {
+ RTE_LOG(ERR, EAL, "%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):"
+ " %s (%p)\n", __func__, devname, fd, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno), mapaddr);
+ close(fd);
+ goto fail;
+ }
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ close(fd);
+
+ RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+
+fail:
+ return NULL;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+vmbus_uio_map_resource(struct rte_vmbus_device *dev)
+{
+ int i;
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ char dirname2[PATH_MAX];
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+ void *mapaddr;
+ unsigned uio_num;
+ uint64_t phaddr;
+ uint64_t offset;
+ uint64_t pagesz;
+ ssize_t nb_maps;
+ struct rte_vmbus_id *loc = &dev->id;
+ struct uio_resource *uio_res;
+ struct uio_map *maps;
+
+ /* depending on kernel version, uio can be located in uio/uioX
+ * or uio:uioX */
+ snprintf(dirname, sizeof(dirname),
+ "/sys/bus/vmbus/devices/" VMBUS_PRI_FMT "/uio", loc->sysfs_num);
+
+ dir = opendir(dirname);
+ if (dir == NULL) {
+ /* retry with the parent directory */
+ snprintf(dirname, sizeof(dirname),
+ "/sys/bus/vmbus/devices/" VMBUS_PRI_FMT, loc->sysfs_num);
+ dir = opendir(dirname);
+
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+ return -1;
+ }
+ }
+
+ /* take the first file starting with "uio" */
+ while ((e = readdir(dir)) != NULL) {
+ /* format could be uio%d ...*/
+ int shortprefix_len = sizeof("uio") - 1;
+ /* ... or uio:uio%d */
+ int longprefix_len = sizeof("uio:uio") - 1;
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", 3) != 0)
+ continue;
+
+ /* first try uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != e->d_name) {
+ snprintf(dirname2, sizeof(dirname2),
+ "%s/uio%u", dirname, uio_num);
+ break;
+ }
+
+ /* then try uio:uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != e->d_name) {
+ snprintf(dirname2, sizeof(dirname2),
+ "%s/uio:uio%u", dirname, uio_num);
+ break;
+ }
+ }
+ closedir(dir);
+
+ /* No uio resource found */
+ if (e == NULL) {
+ RTE_LOG(WARNING, EAL, " "VMBUS_PRI_FMT" not managed by UIO driver, "
+ "skipping\n", loc->sysfs_num);
+ return -1;
+ }
+
+ /* allocate the mapping details for secondary processes*/
+ uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+ if (uio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ return -1;
+ }
+
+ snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+ snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+ memcpy(&uio_res->vmbus_addr, &dev->id, sizeof(uio_res->vmbus_addr));
+
+ /* collect info about device mappings */
+ nb_maps = vmbus_uio_get_mappings(dirname2, uio_res->maps,
+ sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+ if (nb_maps < 0)
+ return nb_maps;
+
+ RTE_LOG(DEBUG, EAL, "Found %d memory maps for device "VMBUS_PRI_FMT"\n",
+ (int)nb_maps, loc->sysfs_num);
+
+ uio_res->nb_maps = nb_maps;
+
+ pagesz = sysconf(_SC_PAGESIZE);
+
+ maps = uio_res->maps;
+ for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+ phaddr = maps[i].phaddr;
+ if (phaddr == 0)
+ continue;
+
+ RTE_LOG(DEBUG, EAL, " mem_map%d: addr=0x%lx len = %lu\n",
+ i,
+ maps[i].phaddr,
+ maps[i].size);
+
+ if (i != nb_maps) {
+ offset = i * pagesz;
+ mapaddr = vmbus_map_resource(dev, NULL, devname, (off_t)offset,
+ (size_t)maps[i].size);
+ if (mapaddr == NULL)
+ return -1;
+
+ /* Important: offset for mapping can be non-zero, pad the addr */
+ mapaddr = ((char *)mapaddr + maps[i].offset);
+ maps[i].addr = mapaddr;
+ maps[i].offset = offset;
+ dev->mem_resource[i].addr = mapaddr;
+ dev->mem_resource[i].phys_addr = phaddr;
+ dev->mem_resource[i].len = maps[i].size;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare two VMBUS device addresses. */
+static int
+vmbus_compare(struct rte_vmbus_id *id, struct rte_vmbus_id *id2)
+{
+ return id->device_id > id2->device_id;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+ char filename[PATH_MAX];
+ char buf[BUFSIZ];
+ char dirname[PATH_MAX];
+ unsigned long tmp;
+ struct rte_vmbus_device *dev;
+ FILE *f;
+
+ dev = rte_zmalloc("vmbus_device", sizeof(*dev), 0);
+ if (dev == NULL)
+ return -1;
+
+ snprintf(dirname, sizeof(dirname), "%s/%s",
+ SYSFS_VMBUS_DEVICES, name);
+
+ /* parse directory name in sysfs. this does not always reflect
+ * the device id read below.
+ */
+ unsigned int sysfs_num;
+ if (sscanf(name, VMBUS_PRI_FMT, &sysfs_num) != 1) {
+ RTE_LOG(ERR, EAL, "Unable to parse vmbus sysfs name\n");
+ rte_free(dev);
+ return -1;
+ }
+ dev->id.sysfs_num = sysfs_num;
+
+ /* get device id */
+ snprintf(filename, sizeof(filename), "%s/id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ rte_free(dev);
+ return -1;
+ }
+ dev->id.device_id = (uint16_t)tmp;
+
+ /* get monitor id */
+ snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+ rte_free(dev);
+ return -1;
+ }
+ dev->vmbus_monitor_id = tmp;
+
+ /* compare class_id of device with {f8615163-df3e-46c5-913ff2d2f965ed0e} */
+ snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ rte_free(dev);
+ return -1;
+ }
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ rte_free(dev);
+ return -1;
+ }
+ fclose(f);
+
+ if (strncmp(buf, VMBUS_NETWORK_DEVICE, strlen(VMBUS_NETWORK_DEVICE))) {
+ RTE_LOG(DEBUG, EAL, "%s(): skip vmbus_0_%u with class_id = %s",
+ __func__, dev->id.sysfs_num, buf);
+ rte_free(dev);
+ return 0;
+ }
+
+ /* device is valid, add in list (sorted) */
+ RTE_LOG(DEBUG, EAL, "Adding vmbus device %d\n", dev->id.device_id);
+ if (!TAILQ_EMPTY(&vmbus_device_list)) {
+ struct rte_vmbus_device *dev2 = NULL;
+
+ TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+ if (vmbus_compare(&dev->id, &dev2->id))
+ continue;
+
+ TAILQ_INSERT_BEFORE(dev2, dev, next);
+ return 0;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+ return 0;
+}
+
+static int
+check_vmbus_device(const char *buf, int bufsize)
+{
+ char *n = strrchr(buf, '_');
+ /* the format is 'vmbus_0_%d' */
+ if (n == NULL)
+ return -1;
+ n++;
+ char *buf_copy = strndup(n, bufsize);
+ if (buf_copy == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): failed to strndup: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ int err = strtoul(buf_copy, NULL, 10);
+ free(buf_copy);
+
+ if (errno || err < 0) {
+ RTE_LOG(ERR, EAL, "%s(): can't parse devid: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+ struct dirent *e;
+ DIR *dir;
+
+ dir = opendir(SYSFS_VMBUS_DEVICES);
+ if (dir == NULL) {
+ if (errno == ENOENT)
+ return 0;
+ else {
+ RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (check_vmbus_device(e->d_name, sizeof(e->d_name)))
+ continue;
+
+ if (vmbus_scan_one(e->d_name) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+ /* VMBUS can be disabled */
+ if (internal_config.no_vmbus)
+ return 0;
+
+ if (vmbus_scan() < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+ return -1;
+ }
+ return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+ struct rte_vmbus_device *dev)
+{
+ struct rte_vmbus_id *id_table;
+
+ for (id_table = dr->id_table; id_table->device_id != VMBUS_ID_ANY; id_table++) {
+
+ struct rte_vmbus_id *loc = &dev->id;
+
+ RTE_LOG(DEBUG, EAL, "VMBUS device "VMBUS_PRI_FMT"\n",
+ loc->sysfs_num);
+
+ RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->name);
+
+ /* no initialization when blacklisted, return without error */
+ if (dev->blacklisted) {
+ RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n");
+ return 0;
+ }
+
+ /* map the resources */
+ if (vmbus_uio_map_resource(dev) < 0)
+ return -1;
+
+ /* reference driver structure */
+ dev->driver = dr;
+
+ /* call the driver devinit() function */
+ return dr->devinit(dr, dev);
+ }
+
+ /* return positive value if driver is not found */
+ return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+ struct rte_vmbus_driver *dr = NULL;
+ int ret;
+
+ TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+ ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+ if (ret < 0) {
+ /* negative value is an error */
+ RTE_LOG(ERR, EAL, "Failed to probe driver %s\n", dr->name);
+ break;
+ }
+ if (ret > 0) {
+ /* positive value means driver not found */
+ RTE_LOG(DEBUG, EAL, "Driver %s not found", dr->name);
+ continue;
+ }
+
+ RTE_LOG(DEBUG, EAL, "OK. Driver was found and probed.\n");
+ return 0;
+ }
+ return -1;
+}
+
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+ struct rte_vmbus_device *dev = NULL;
+
+ TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+ RTE_LOG(DEBUG, EAL, "Probing driver for device %d ...\n",
+ dev->id.device_id);
+ vmbus_probe_all_drivers(dev);
+ }
+ return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+ TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
+
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 4d803d0..f5a1d07 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -287,6 +287,57 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
return diag;
}

+#ifdef RTE_LIBRTE_HV_PMD
+static int
+rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv,
+ struct rte_vmbus_device *vmbus_dev)
+{
+ struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv;
+ struct rte_eth_dev *eth_dev;
+ char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+ int diag;
+
+ snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+ vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+ eth_dev = rte_eth_dev_allocate(ethdev_name);
+ if (eth_dev == NULL)
+ return -ENOMEM;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+ eth_drv->dev_private_size,
+ RTE_CACHE_LINE_SIZE);
+ if (eth_dev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private port data\n");
+ }
+ eth_dev->vmbus_dev = vmbus_dev;
+ eth_dev->driver = eth_drv;
+ eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+ /* init user callbacks */
+ TAILQ_INIT(&(eth_dev->callbacks));
+
+ /*
+ * Set the default maximum frame size.
+ */
+ eth_dev->data->mtu = ETHER_MTU;
+
+ /* Invoke PMD device initialization function */
+ diag = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
+ if (diag == 0)
+ return 0;
+
+ PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)"
+ " failed\n", vmbus_drv->name,
+ (unsigned) vmbus_dev->id.device_id);
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(eth_dev->data->dev_private);
+ nb_ports--;
+ return diag;
+}
+#endif
+
/**
* Register an Ethernet [Poll Mode] driver.
*
@@ -304,8 +355,20 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
void
rte_eth_driver_register(struct eth_driver *eth_drv)
{
- eth_drv->pci_drv.devinit = rte_eth_dev_init;
- rte_eal_pci_register(&eth_drv->pci_drv);
+ switch (eth_drv->bus_type) {
+ case RTE_BUS_PCI:
+ eth_drv->pci_drv.devinit = rte_eth_dev_init;
+ rte_eal_pci_register(&eth_drv->pci_drv);
+ break;
+#ifdef RTE_LIBRTE_HV_PMD
+ case RTE_BUS_VMBUS:
+ eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init;
+ rte_eal_vmbus_register(&eth_drv->vmbus_drv);
+ break;
+#endif
+ default:
+ rte_panic("unknown bus type %u\n", eth_drv->bus_type);
+ }
}

int
@@ -1275,6 +1338,9 @@ rte_eth_has_link_state(uint8_t port_id)
}
dev = &rte_eth_devices[port_id];

+ if (dev->driver->bus_type != RTE_BUS_PCI)
+ return 0;
+
return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
}

@@ -1457,9 +1523,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)

FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
(*dev->dev_ops->dev_infos_get)(dev, dev_info);
- dev_info->pci_dev = dev->pci_dev;
- if (dev->driver)
- dev_info->driver_name = dev->driver->pci_drv.name;
+
+ if (dev->driver) {
+ switch (dev->driver->bus_type) {
+ case RTE_BUS_PCI:
+ dev_info->driver_name = dev->driver->pci_drv.name;
+ dev_info->pci_dev = dev->pci_dev;
+ break;
+ case RTE_BUS_VMBUS:
+ dev_info->driver_name = dev->driver->vmbus_drv.name;
+ }
+ }
}

void
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 9d43ca3..714f94d 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -175,6 +175,7 @@ extern "C" {
#include <rte_log.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
+#include <rte_vmbus.h>
#include <rte_mbuf.h>
#include "rte_ether.h"
#include "rte_eth_ctrl.h"
@@ -1537,7 +1538,10 @@ struct rte_eth_dev {
struct rte_eth_dev_data *data; /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
- struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+ union {
+ struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+ struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. supplied by probing */
+ };
struct rte_eth_dev_cb_list callbacks; /**< User application callbacks */
};

@@ -1671,7 +1675,14 @@ typedef int (*eth_dev_init_t)(struct eth_driver *eth_drv,
* - The size of the private data to allocate for each matching device.
*/
struct eth_driver {
- struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */
+ union {
+ struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */
+ struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */
+ };
+ enum {
+ RTE_BUS_PCI=0,
+ RTE_BUS_VMBUS
+ } bus_type; /**< Device bus type. */
eth_dev_init_t eth_dev_init; /**< Device init function. */
unsigned int dev_private_size; /**< Size of device private data. */
};
--
2.1.4
Neil Horman
2015-02-05 01:50:21 UTC
Permalink
Post by Stephen Hemminger
The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.
As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.
---
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/eal_common_options.c | 5 +
lib/librte_eal/common/eal_internal_cfg.h | 1 +
lib/librte_eal/common/eal_options.h | 2 +
lib/librte_eal/common/eal_private.h | 10 +
lib/librte_eal/common/include/rte_pci.h | 2 +
lib/librte_eal/common/include/rte_vmbus.h | 153 +++++++
lib/librte_eal/linuxapp/eal/Makefile | 3 +
lib/librte_eal/linuxapp/eal/eal.c | 5 +
lib/librte_eal/linuxapp/eal/eal_vmbus.c | 658 +++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 84 +++-
lib/librte_ether/rte_ethdev.h | 15 +-
12 files changed, 932 insertions(+), 8 deletions(-)
create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
It seems like the vmbus functions need to be versioned here.
Stephen Hemminger
2015-02-05 01:13:26 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

Add new UIO driver in kernel to support DPDK Poll Mode Driver.

Signed-off-by: Stas Egorov <***@mirantis.com>
Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
lib/librte_eal/linuxapp/Makefile | 3 +
lib/librte_eal/linuxapp/hv_uio/Makefile | 57 ++
lib/librte_eal/linuxapp/hv_uio/hv_uio.c | 551 +++++++++++++++++
lib/librte_eal/linuxapp/hv_uio/hyperv_net.h | 907 ++++++++++++++++++++++++++++
4 files changed, 1518 insertions(+)
create mode 100644 lib/librte_eal/linuxapp/hv_uio/Makefile
create mode 100644 lib/librte_eal/linuxapp/hv_uio/hv_uio.c
create mode 100644 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h

diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 8fcfdf6..a28d289 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -41,5 +41,8 @@ endif
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += hv_uio
+endif

include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/hv_uio/Makefile b/lib/librte_eal/linuxapp/hv_uio/Makefile
new file mode 100644
index 0000000..2ed7771
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = hv_uio
+MODULE_PATH = drivers/net/hv_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+ifeq ($(CONFIG_RTE_LIBRTE_HV_DEBUG),y)
+MODULE_CFLAGS += -DDBG
+endif
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := hv_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/hv_uio/hv_uio.c b/lib/librte_eal/linuxapp/hv_uio/hv_uio.c
new file mode 100644
index 0000000..4cac075
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/hv_uio.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/uio_driver.h>
+#include <linux/slab.h>
+
+#include "hyperv_net.h"
+
+#define HV_DEVICE_ADD 0
+#define HV_DEVICE_REMOVE 1
+#define HV_RING_SIZE 512
+
+static uint mtu = ETH_DATA_LEN;
+/*
+ * List of resources to be mapped to uspace
+ * can be extended up to MAX_UIO_MAPS(5) items
+ */
+enum {
+ TXRX_RING_MAP,
+ INT_PAGE_MAP,
+ MON_PAGE_MAP,
+ RECV_BUF_MAP
+};
+
+struct hyperv_private_data {
+ struct netvsc_device *net_device;
+ struct uio_info *info;
+};
+
+extern void vmbus_get_monitor_pages(unsigned long *int_page,
+ unsigned long monitor_pages[2]);
+
+/* phys addrs of pages in vmbus_connection from hv_vmbus */
+static long unsigned int_page, monitor_pages[2];
+
+static inline int
+hyperv_uio_find_mem_index(struct uio_info *info, struct vm_area_struct *vma)
+{
+ if (vma->vm_pgoff < MAX_UIO_MAPS) {
+ if (unlikely(info->mem[vma->vm_pgoff].size == 0))
+ return -1;
+ return (int)vma->vm_pgoff;
+ }
+ return -1;
+}
+
+static int
+hyperv_uio_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+ int mi = hyperv_uio_find_mem_index(info, vma);
+ if (mi < 0)
+ return -EINVAL;
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ virt_to_phys((void*)info->mem[mi].addr) >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static struct netvsc_device *
+alloc_net_device(struct hv_device *dev)
+{
+ struct netvsc_device *net_device;
+
+ net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
+ if (!net_device) {
+ pr_err("unable to allocate memory for netvsc_device\n");
+ return NULL;
+ }
+
+ init_waitqueue_head(&net_device->wait_drain);
+ net_device->start_remove = false;
+ net_device->destroy = false;
+ net_device->dev = dev;
+ net_device->ndev = hv_get_drvdata(dev);
+ net_device->recv_section_cnt = 0;
+
+ return net_device;
+}
+
+/* Negotiate NVSP protocol version */
+static int
+negotiate_nvsp_ver(struct hv_device *dev,
+ struct netvsc_device *net_device,
+ struct nvsp_message *init_packet,
+ u32 nvsp_ver)
+{
+ int ret;
+
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
+ init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
+ init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
+
+ /* Send the init request */
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if (ret) {
+ pr_err("unable to send nvsp negotiation packet\n");
+ return ret;
+ }
+
+ if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
+ return 0;
+
+ /* NVSPv2 only: Send NDIS config */
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
+ init_packet->msg.v2_msg.send_ndis_config.mtu = mtu;
+ init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
+
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND, 0);
+
+ return ret;
+}
+
+static int
+netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+{
+ struct nvsp_message *revoke_packet;
+ int ret = 0;
+
+ /*
+ * If we got a section count, it means we received a
+ * SendReceiveBufferComplete msg (ie sent
+ * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
+ * to send a revoke msg here
+ */
+ if (net_device->recv_section_cnt) {
+ /* Send the revoke receive buffer */
+ revoke_packet = &net_device->revoke_packet;
+ memset(revoke_packet, 0, sizeof(struct nvsp_message));
+
+ revoke_packet->hdr.msg_type =
+ NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
+ revoke_packet->msg.v1_msg.
+ revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
+
+ ret = vmbus_sendpacket(net_device->dev->channel,
+ revoke_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)revoke_packet,
+ VM_PKT_DATA_INBAND, 0);
+ /*
+ * If we failed here, we might as well return and
+ * have a leak rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ pr_err("unable to send "
+ "revoke receive buffer to netvsp\n");
+ return ret;
+ }
+ }
+
+ /* Teardown the gpadl on the vsp end */
+ if (net_device->recv_buf_gpadl_handle) {
+ pr_devel("trying to teardown gpadl...\n");
+ ret = vmbus_teardown_gpadl(net_device->dev->channel,
+ net_device->recv_buf_gpadl_handle);
+
+ if (ret) {
+ pr_err("unable to teardown receive buffer's gpadl\n");
+ return ret;
+ }
+ net_device->recv_buf_gpadl_handle = 0;
+ }
+
+ if (net_device->recv_buf) {
+ /* Free up the receive buffer */
+ free_pages((unsigned long)net_device->recv_buf,
+ get_order(net_device->recv_buf_size));
+ net_device->recv_buf = NULL;
+ }
+
+ if (net_device->recv_section) {
+ net_device->recv_section_cnt = 0;
+ kfree(net_device->recv_section);
+ net_device->recv_section = NULL;
+ }
+
+ return ret;
+}
+
+static int
+netvsc_init_recv_buf(struct hv_device *dev, struct netvsc_device *net_dev)
+{
+ int ret = 0;
+ struct nvsp_message *init_packet;
+
+ if (!net_dev)
+ return -ENODEV;
+
+ net_dev->recv_buf = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
+ get_order(net_dev->recv_buf_size));
+ if (!net_dev->recv_buf) {
+ pr_err("unable to allocate receive "
+ "buffer of size %d\n", net_dev->recv_buf_size);
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ /*
+ * Establish the gpadl handle for this buffer on this
+ * channel. Note: This call uses the vmbus connection rather
+ * than the channel to establish the gpadl handle.
+ */
+ ret = vmbus_establish_gpadl(dev->channel, net_dev->recv_buf,
+ net_dev->recv_buf_size,
+ &net_dev->recv_buf_gpadl_handle);
+ if (ret != 0) {
+ pr_err("unable to establish receive buffer's gpadl\n");
+ goto cleanup;
+ }
+
+
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_dev->channel_init_pkt;
+
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+
+ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
+ init_packet->msg.v1_msg.send_recv_buf.
+ gpadl_handle = net_dev->recv_buf_gpadl_handle;
+ init_packet->msg.v1_msg.
+ send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
+
+ /* Send the gpadl notification request */
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ pr_err("unable to send receive buffer's gpadl to netvsp\n");
+ goto cleanup;
+ }
+
+ net_dev->recv_section_cnt = 1;
+ goto exit;
+
+cleanup:
+ netvsc_destroy_recv_buf(net_dev);
+
+exit:
+ return ret;
+}
+
+static int
+netvsc_connect_vsp(struct hv_device *dev, struct netvsc_device *net_dev)
+{
+ int ret;
+ struct nvsp_message *init_packet;
+ int ndis_version;
+
+ if (!net_dev)
+ return -ENODEV;
+
+ init_packet = &net_dev->channel_init_pkt;
+
+ /* Negotiate the latest NVSP protocol supported */
+ if (negotiate_nvsp_ver(dev, net_dev, init_packet,
+ NVSP_PROTOCOL_VERSION_2) == 0) {
+ net_dev->nvsp_version = NVSP_PROTOCOL_VERSION_2;
+ } else if (negotiate_nvsp_ver(dev, net_dev, init_packet,
+ NVSP_PROTOCOL_VERSION_1) == 0) {
+ net_dev->nvsp_version = NVSP_PROTOCOL_VERSION_1;
+ } else {
+ return -EPROTO;
+ }
+
+ pr_devel("Negotiated NVSP version:%x\n", net_dev->nvsp_version);
+
+ /* Send the ndis version */
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+
+ ndis_version = 0x00050001;
+
+ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
+ init_packet->msg.v1_msg.
+ send_ndis_ver.ndis_major_ver =
+ (ndis_version & 0xFFFF0000) >> 16;
+ init_packet->msg.v1_msg.
+ send_ndis_ver.ndis_minor_ver =
+ ndis_version & 0xFFFF;
+
+ /* Send the init request */
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND, 0);
+ if (ret != 0) {
+ pr_err("unable to send init_packet via vmbus\n");
+ return ret;
+ }
+
+ /* Post the big receive buffer to NetVSP */
+ ret = netvsc_init_recv_buf(dev, net_dev);
+
+ return ret;
+}
+
+static int
+hyperv_dev_add(struct hv_device *dev, struct netvsc_device *net_dev)
+{
+ int ret = 0;
+
+ net_dev->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+ ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
+ HV_RING_SIZE * PAGE_SIZE, NULL, 0, NULL, dev);
+ if (ret) {
+ pr_err("unable to open channel: %d\n", ret);
+ return ret;
+ }
+ dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+
+ ret = netvsc_connect_vsp(dev, net_dev);
+ if (ret) {
+ pr_err("unable to connect to NetVSP: %d\n", ret);
+ goto close;
+ }
+
+ return ret;
+
+close:
+ vmbus_close(dev->channel);
+
+ return ret;
+}
+
+static void
+hyperv_dev_remove(struct hv_device *dev, struct netvsc_device *net_dev)
+{
+ if (net_dev->recv_buf) {
+ netvsc_destroy_recv_buf(net_dev);
+ vmbus_close(dev->channel);
+ }
+}
+
+#define MAX_HV_DEVICE_NUM 256
+static struct hv_device *hv_device_list[MAX_HV_DEVICE_NUM];
+
+/*
+ * This callback is set as irqcontrol for uio, it can be used for mtu changing
+ * The variable arg consists of command, device number(see HV_DEV_ID)
+ * and value of MTU(see HV_MTU)
+ */
+static int
+hyperv_write_cb(struct uio_info *info, s32 arg)
+{
+ struct hv_device *dev;
+ int ret, cmd = arg & 1, dev_num = (arg >> 1) & 0xFF;
+ struct hyperv_private_data *pdata;
+ struct netvsc_device *net_device;
+
+ dev = hv_device_list[dev_num];
+ if (!dev)
+ return 0;
+ pdata = hv_get_drvdata(dev);
+ net_device = pdata->net_device;
+ switch (cmd) {
+ case HV_DEVICE_ADD:
+ mtu = arg >> 9;
+ pr_devel("New mtu = %u\n", mtu);
+ ret = hyperv_dev_add(dev, net_device);
+ if (!ret) {
+ info->mem[TXRX_RING_MAP].addr =
+ (phys_addr_t)(dev->channel->ringbuffer_pages);
+ info->mem[RECV_BUF_MAP].addr = (phys_addr_t)(net_device->recv_buf);
+ return sizeof(s32);
+ }
+ break;
+ case HV_DEVICE_REMOVE:
+ hyperv_dev_remove(dev, net_device);
+ return sizeof(s32);
+ }
+
+ return 0;
+}
+
+static int
+hyperv_probe(struct hv_device *dev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ int ret;
+ struct hyperv_private_data *pdata;
+ struct uio_info *info;
+ struct netvsc_device *net_device;
+
+ pdata = kzalloc(sizeof(struct hyperv_private_data), GFP_KERNEL);
+ if (!pdata) {
+ pr_err("Failed to allocate hyperv_private_data\n");
+ return -ENOMEM;
+ }
+
+ info = kzalloc(sizeof(struct uio_info), GFP_KERNEL);
+ if (!info) {
+ pr_err("Failed to allocate uio_info\n");
+ kfree(pdata);
+ return -ENOMEM;
+ }
+
+ net_device = alloc_net_device(dev);
+ if (!net_device) {
+ kfree(pdata);
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ ret = hyperv_dev_add(dev, net_device);
+ if (ret) {
+ kfree(pdata);
+ kfree(info);
+ kfree(net_device);
+ return ret;
+ }
+
+ /* Fill general uio info */
+ info->name = "hv_uio";
+ info->version = "1.0";
+ info->irqcontrol = hyperv_write_cb;
+ info->irq = UIO_IRQ_CUSTOM;
+
+ /* mem resources */
+ info->mem[TXRX_RING_MAP].name = "txrx_rings";
+ info->mem[TXRX_RING_MAP].addr =
+ (phys_addr_t)(dev->channel->ringbuffer_pages);
+ info->mem[TXRX_RING_MAP].size = HV_RING_SIZE * PAGE_SIZE * 2;
+ info->mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+
+ info->mem[INT_PAGE_MAP].name = "int_page";
+ info->mem[INT_PAGE_MAP].addr =
+ (phys_addr_t)(int_page);
+ info->mem[INT_PAGE_MAP].size = PAGE_SIZE;
+ info->mem[INT_PAGE_MAP].memtype = UIO_MEM_LOGICAL;
+
+ info->mem[MON_PAGE_MAP].name = "monitor_pages";
+ info->mem[MON_PAGE_MAP].addr =
+ (phys_addr_t)(monitor_pages[1]);
+ info->mem[MON_PAGE_MAP].size = PAGE_SIZE;
+ info->mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL;
+
+ info->mem[RECV_BUF_MAP].name = "recv_buf";
+ info->mem[RECV_BUF_MAP].addr = (phys_addr_t)(net_device->recv_buf);
+ info->mem[RECV_BUF_MAP].size = net_device->recv_buf_size;
+ info->mem[RECV_BUF_MAP].memtype = UIO_MEM_LOGICAL;
+
+ info->mmap = hyperv_uio_mmap;
+
+ pr_devel("register hyperv driver for hv_device {%pUl}\n", dev->dev_instance.b);
+ ret = uio_register_device(&dev->device, info);
+ if (ret)
+ pr_err("Failed to register uio device for hyperv\n");
+ else
+ hv_device_list[dev->channel->offermsg.child_relid] = dev;
+
+ pdata->info = info;
+ pdata->net_device = net_device;
+ hv_set_drvdata(dev, pdata);
+
+ return ret;
+}
+
+static int
+hyperv_remove(struct hv_device *dev)
+{
+ struct hyperv_private_data *pdata;
+ struct uio_info *info;
+ struct netvsc_device *net_dev;
+
+ pr_devel("unregister hyperv driver for hv_device {%pUl}\n",
+ dev->dev_instance.b);
+
+ pdata = hv_get_drvdata(dev);
+ info = pdata->info;
+ uio_unregister_device(info);
+ kfree(info);
+
+ net_dev = pdata->net_device;
+ hv_set_drvdata(dev, NULL);
+
+ hyperv_dev_remove(dev, net_dev);
+
+ kfree(net_dev);
+ kfree(pdata);
+
+ return 0;
+}
+
+static const struct hv_vmbus_device_id hyperv_id_table[] = {
+ { HV_NIC_GUID, },
+ { },
+};
+
+MODULE_DEVICE_TABLE(vmbus, hyperv_id_table);
+
+static struct hv_driver hv_uio_drv = {
+ .name = KBUILD_MODNAME,
+ .id_table = hyperv_id_table,
+ .probe = hyperv_probe,
+ .remove = hyperv_remove,
+};
+
+static int __init
+hyperv_module_init(void)
+{
+ vmbus_get_monitor_pages(&int_page, monitor_pages);
+
+ return vmbus_driver_register(&hv_uio_drv);
+}
+
+static void __exit
+hyperv_module_exit(void)
+{
+ vmbus_driver_unregister(&hv_uio_drv);
+}
+
+module_init(hyperv_module_init);
+module_exit(hyperv_module_exit);
+
+MODULE_DESCRIPTION("UIO driver for Hyper-V netVSC");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Brocade");
diff --git a/lib/librte_eal/linuxapp/hv_uio/hyperv_net.h b/lib/librte_eal/linuxapp/hv_uio/hyperv_net.h
new file mode 100644
index 0000000..8097779
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/hyperv_net.h
@@ -0,0 +1,907 @@
+/*
+ *
+ * Copyright (c) 2011, Microsoft Corporation.
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Authors:
+ * Haiyang Zhang <***@microsoft.com>
+ * Hank Janssen <***@microsoft.com>
+ * K. Y. Srinivasan <***@microsoft.com>
+ *
+ */
+
+#ifndef _HYPERV_NET_H
+#define _HYPERV_NET_H
+
+#include <linux/list.h>
+#include <linux/hyperv.h>
+#include <linux/rndis.h>
+
+/* Fwd declaration */
+struct hv_netvsc_packet;
+
+/* Represent the xfer page packet which contains 1 or more netvsc packet */
+struct xferpage_packet {
+ struct list_head list_ent;
+ u32 status;
+
+ /* # of netvsc packets this xfer packet contains */
+ u32 count;
+};
+
+/*
+ * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
+ * within the RNDIS
+ */
+struct hv_netvsc_packet {
+ /* Bookkeeping stuff */
+ struct list_head list_ent;
+ u32 status;
+
+ struct hv_device *device;
+ bool is_data_pkt;
+ u16 vlan_tci;
+
+ /*
+ * Valid only for receives when we break a xfer page packet
+ * into multiple netvsc packets
+ */
+ struct xferpage_packet *xfer_page_pkt;
+
+ union {
+ struct {
+ u64 recv_completion_tid;
+ void *recv_completion_ctx;
+ void (*recv_completion)(void *context);
+ } recv;
+ struct {
+ u64 send_completion_tid;
+ void *send_completion_ctx;
+ void (*send_completion)(void *context);
+ } send;
+ } completion;
+
+ /* This points to the memory after page_buf */
+ void *extension;
+
+ u32 total_data_buflen;
+ /* Points to the send/receive buffer where the ethernet frame is */
+ void *data;
+ u32 page_buf_cnt;
+ struct hv_page_buffer page_buf[0];
+};
+
+struct netvsc_device_info {
+ unsigned char mac_adr[ETH_ALEN];
+ bool link_state; /* 0 - link up, 1 - link down */
+ int ring_size;
+};
+
+enum rndis_device_state {
+ RNDIS_DEV_UNINITIALIZED = 0,
+ RNDIS_DEV_INITIALIZING,
+ RNDIS_DEV_INITIALIZED,
+ RNDIS_DEV_DATAINITIALIZED,
+};
+
+struct rndis_device {
+ struct netvsc_device *net_dev;
+
+ enum rndis_device_state state;
+ bool link_state;
+ atomic_t new_req_id;
+
+ spinlock_t request_lock;
+ struct list_head req_list;
+
+ unsigned char hw_mac_adr[ETH_ALEN];
+};
+
+
+/* Interface */
+int netvsc_device_add(struct hv_device *device, void *additional_info);
+int netvsc_device_remove(struct hv_device *device);
+int netvsc_send(struct hv_device *device,
+ struct hv_netvsc_packet *packet);
+void netvsc_linkstatus_callback(struct hv_device *device_obj,
+ unsigned int status);
+int netvsc_recv_callback(struct hv_device *device_obj,
+ struct hv_netvsc_packet *packet);
+int rndis_filter_open(struct hv_device *dev);
+int rndis_filter_close(struct hv_device *dev);
+int rndis_filter_device_add(struct hv_device *dev,
+ void *additional_info);
+void rndis_filter_device_remove(struct hv_device *dev);
+int rndis_filter_receive(struct hv_device *dev,
+ struct hv_netvsc_packet *pkt);
+
+
+
+int rndis_filter_send(struct hv_device *dev,
+ struct hv_netvsc_packet *pkt);
+
+int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
+int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
+
+
+#define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF)
+
+#define NVSP_PROTOCOL_VERSION_1 2
+#define NVSP_PROTOCOL_VERSION_2 0x30002
+
+enum {
+ NVSP_MSG_TYPE_NONE = 0,
+
+ /* Init Messages */
+ NVSP_MSG_TYPE_INIT = 1,
+ NVSP_MSG_TYPE_INIT_COMPLETE = 2,
+
+ NVSP_VERSION_MSG_START = 100,
+
+ /* Version 1 Messages */
+ NVSP_MSG1_TYPE_SEND_NDIS_VER = NVSP_VERSION_MSG_START,
+
+ NVSP_MSG1_TYPE_SEND_RECV_BUF,
+ NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE,
+ NVSP_MSG1_TYPE_REVOKE_RECV_BUF,
+
+ NVSP_MSG1_TYPE_SEND_SEND_BUF,
+ NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE,
+ NVSP_MSG1_TYPE_REVOKE_SEND_BUF,
+
+ NVSP_MSG1_TYPE_SEND_RNDIS_PKT,
+ NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+
+ /* Version 2 messages */
+ NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF,
+ NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF_COMP,
+ NVSP_MSG2_TYPE_REVOKE_CHIMNEY_DELEGATED_BUF,
+
+ NVSP_MSG2_TYPE_RESUME_CHIMNEY_RX_INDICATION,
+
+ NVSP_MSG2_TYPE_TERMINATE_CHIMNEY,
+ NVSP_MSG2_TYPE_TERMINATE_CHIMNEY_COMP,
+
+ NVSP_MSG2_TYPE_INDICATE_CHIMNEY_EVENT,
+
+ NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT,
+ NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT_COMP,
+
+ NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ,
+ NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ_COMP,
+
+ NVSP_MSG2_TYPE_ALLOC_RXBUF,
+ NVSP_MSG2_TYPE_ALLOC_RXBUF_COMP,
+
+ NVSP_MSG2_TYPE_FREE_RXBUF,
+
+ NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT,
+ NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT_COMP,
+
+ NVSP_MSG2_TYPE_SEND_NDIS_CONFIG,
+
+ NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE,
+ NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP,
+};
+
+enum {
+ NVSP_STAT_NONE = 0,
+ NVSP_STAT_SUCCESS,
+ NVSP_STAT_FAIL,
+ NVSP_STAT_PROTOCOL_TOO_NEW,
+ NVSP_STAT_PROTOCOL_TOO_OLD,
+ NVSP_STAT_INVALID_RNDIS_PKT,
+ NVSP_STAT_BUSY,
+ NVSP_STAT_PROTOCOL_UNSUPPORTED,
+ NVSP_STAT_MAX,
+};
+
+struct nvsp_message_header {
+ u32 msg_type;
+};
+
+/* Init Messages */
+
+/*
+ * This message is used by the VSC to initialize the channel after the channels
+ * has been opened. This message should never include anything other then
+ * versioning (i.e. this message will be the same for ever).
+ */
+struct nvsp_message_init {
+ u32 min_protocol_ver;
+ u32 max_protocol_ver;
+} __packed;
+
+/*
+ * This message is used by the VSP to complete the initialization of the
+ * channel. This message should never include anything other then versioning
+ * (i.e. this message will be the same for ever).
+ */
+struct nvsp_message_init_complete {
+ u32 negotiated_protocol_ver;
+ u32 max_mdl_chain_len;
+ u32 status;
+} __packed;
+
+union nvsp_message_init_uber {
+ struct nvsp_message_init init;
+ struct nvsp_message_init_complete init_complete;
+} __packed;
+
+/* Version 1 Messages */
+
+/*
+ * This message is used by the VSC to send the NDIS version to the VSP. The VSP
+ * can use this information when handling OIDs sent by the VSC.
+ */
+struct nvsp_1_message_send_ndis_version {
+ u32 ndis_major_ver;
+ u32 ndis_minor_ver;
+} __packed;
+
+/*
+ * This message is used by the VSC to send a receive buffer to the VSP. The VSP
+ * can then use the receive buffer to send data to the VSC.
+ */
+struct nvsp_1_message_send_receive_buffer {
+ u32 gpadl_handle;
+ u16 id;
+} __packed;
+
+struct nvsp_1_receive_buffer_section {
+ u32 offset;
+ u32 sub_alloc_size;
+ u32 num_sub_allocs;
+ u32 end_offset;
+} __packed;
+
+/*
+ * This message is used by the VSP to acknowledge a receive buffer send by the
+ * VSC. This message must be sent by the VSP before the VSP uses the receive
+ * buffer.
+ */
+struct nvsp_1_message_send_receive_buffer_complete {
+ u32 status;
+ u32 num_sections;
+
+ /*
+ * The receive buffer is split into two parts, a large suballocation
+ * section and a small suballocation section. These sections are then
+ * suballocated by a certain size.
+ */
+
+ /*
+ * For example, the following break up of the receive buffer has 6
+ * large suballocations and 10 small suballocations.
+ */
+
+ /*
+ * | Large Section | | Small Section |
+ * ------------------------------------------------------------
+ * | | | | | | | | | | | | | | | | | |
+ * | |
+ * LargeOffset SmallOffset
+ */
+
+ struct nvsp_1_receive_buffer_section sections[1];
+} __packed;
+
+/*
+ * This message is sent by the VSC to revoke the receive buffer. After the VSP
+ * completes this transaction, the vsp should never use the receive buffer
+ * again.
+ */
+struct nvsp_1_message_revoke_receive_buffer {
+ u16 id;
+};
+
+/*
+ * This message is used by the VSC to send a send buffer to the VSP. The VSC
+ * can then use the send buffer to send data to the VSP.
+ */
+struct nvsp_1_message_send_send_buffer {
+ u32 gpadl_handle;
+ u16 id;
+} __packed;
+
+/*
+ * This message is used by the VSP to acknowledge a send buffer sent by the
+ * VSC. This message must be sent by the VSP before the VSP uses the sent
+ * buffer.
+ */
+struct nvsp_1_message_send_send_buffer_complete {
+ u32 status;
+
+ /*
+ * The VSC gets to choose the size of the send buffer and the VSP gets
+ * to choose the sections size of the buffer. This was done to enable
+ * dynamic reconfigurations when the cost of GPA-direct buffers
+ * decreases.
+ */
+ u32 section_size;
+} __packed;
+
+/*
+ * This message is sent by the VSC to revoke the send buffer. After the VSP
+ * completes this transaction, the vsp should never use the send buffer again.
+ */
+struct nvsp_1_message_revoke_send_buffer {
+ u16 id;
+};
+
+/*
+ * This message is used by both the VSP and the VSC to send a RNDIS message to
+ * the opposite channel endpoint.
+ */
+struct nvsp_1_message_send_rndis_packet {
+ /*
+ * This field is specified by RNIDS. They assume there's two different
+ * channels of communication. However, the Network VSP only has one.
+ * Therefore, the channel travels with the RNDIS packet.
+ */
+ u32 channel_type;
+
+ /*
+ * This field is used to send part or all of the data through a send
+ * buffer. This values specifies an index into the send buffer. If the
+ * index is 0xFFFFFFFF, then the send buffer is not being used and all
+ * of the data was sent through other VMBus mechanisms.
+ */
+ u32 send_buf_section_index;
+ u32 send_buf_section_size;
+} __packed;
+
+/*
+ * This message is used by both the VSP and the VSC to complete a RNDIS message
+ * to the opposite channel endpoint. At this point, the initiator of this
+ * message cannot use any resources associated with the original RNDIS packet.
+ */
+struct nvsp_1_message_send_rndis_packet_complete {
+ u32 status;
+};
+
+union nvsp_1_message_uber {
+ struct nvsp_1_message_send_ndis_version send_ndis_ver;
+
+ struct nvsp_1_message_send_receive_buffer send_recv_buf;
+ struct nvsp_1_message_send_receive_buffer_complete
+ send_recv_buf_complete;
+ struct nvsp_1_message_revoke_receive_buffer revoke_recv_buf;
+
+ struct nvsp_1_message_send_send_buffer send_send_buf;
+ struct nvsp_1_message_send_send_buffer_complete send_send_buf_complete;
+ struct nvsp_1_message_revoke_send_buffer revoke_send_buf;
+
+ struct nvsp_1_message_send_rndis_packet send_rndis_pkt;
+ struct nvsp_1_message_send_rndis_packet_complete
+ send_rndis_pkt_complete;
+} __packed;
+
+
+/*
+ * Network VSP protocol version 2 messages:
+ */
+struct nvsp_2_vsc_capability {
+ union {
+ u64 data;
+ struct {
+ u64 vmq:1;
+ u64 chimney:1;
+ u64 sriov:1;
+ u64 ieee8021q:1;
+ u64 correlation_id:1;
+ };
+ };
+} __packed;
+
+struct nvsp_2_send_ndis_config {
+ u32 mtu;
+ u32 reserved;
+ struct nvsp_2_vsc_capability capability;
+} __packed;
+
+/* Allocate receive buffer */
+struct nvsp_2_alloc_rxbuf {
+ /* Allocation ID to match the allocation request and response */
+ u32 alloc_id;
+
+ /* Length of the VM shared memory receive buffer that needs to
+ * be allocated
+ */
+ u32 len;
+} __packed;
+
+/* Allocate receive buffer complete */
+struct nvsp_2_alloc_rxbuf_comp {
+ /* The NDIS_STATUS code for buffer allocation */
+ u32 status;
+
+ u32 alloc_id;
+
+ /* GPADL handle for the allocated receive buffer */
+ u32 gpadl_handle;
+
+ /* Receive buffer ID */
+ u64 recv_buf_id;
+} __packed;
+
+struct nvsp_2_free_rxbuf {
+ u64 recv_buf_id;
+} __packed;
+
+union nvsp_2_message_uber {
+ struct nvsp_2_send_ndis_config send_ndis_config;
+ struct nvsp_2_alloc_rxbuf alloc_rxbuf;
+ struct nvsp_2_alloc_rxbuf_comp alloc_rxbuf_comp;
+ struct nvsp_2_free_rxbuf free_rxbuf;
+} __packed;
+
+union nvsp_all_messages {
+ union nvsp_message_init_uber init_msg;
+ union nvsp_1_message_uber v1_msg;
+ union nvsp_2_message_uber v2_msg;
+} __packed;
+
+/* ALL Messages */
+struct nvsp_message {
+ struct nvsp_message_header hdr;
+ union nvsp_all_messages msg;
+} __packed;
+
+
+#define NETVSC_MTU 65536
+
+#define NETVSC_RECEIVE_BUFFER_SIZE (MAX_ORDER_NR_PAGES * PAGE_SIZE)
+
+#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
+
+/* Per netvsc channel-specific */
+struct netvsc_device {
+ struct hv_device *dev;
+
+ u32 nvsp_version;
+
+ atomic_t num_outstanding_sends;
+ wait_queue_head_t wait_drain;
+ bool start_remove;
+ bool destroy;
+ /*
+ * List of free preallocated hv_netvsc_packet to represent receive
+ * packet
+ */
+ struct list_head recv_pkt_list;
+ spinlock_t recv_pkt_list_lock;
+
+ /* Receive buffer allocated by us but manages by NetVSP */
+ void *recv_buf;
+ u32 recv_buf_size;
+ u32 recv_buf_gpadl_handle;
+ u32 recv_section_cnt;
+ struct nvsp_1_receive_buffer_section *recv_section;
+
+ /* Used for NetVSP initialization protocol */
+ struct completion channel_init_wait;
+ struct nvsp_message channel_init_pkt;
+
+ struct nvsp_message revoke_packet;
+ /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */
+
+ struct net_device *ndev;
+
+ /* Holds rndis device info */
+ void *extension;
+};
+
+/* NdisInitialize message */
+struct rndis_initialize_request {
+ u32 req_id;
+ u32 major_ver;
+ u32 minor_ver;
+ u32 max_xfer_size;
+};
+
+/* Response to NdisInitialize */
+struct rndis_initialize_complete {
+ u32 req_id;
+ u32 status;
+ u32 major_ver;
+ u32 minor_ver;
+ u32 dev_flags;
+ u32 medium;
+ u32 max_pkt_per_msg;
+ u32 max_xfer_size;
+ u32 pkt_alignment_factor;
+ u32 af_list_offset;
+ u32 af_list_size;
+};
+
+/* Call manager devices only: Information about an address family */
+/* supported by the device is appended to the response to NdisInitialize. */
+struct rndis_co_address_family {
+ u32 address_family;
+ u32 major_ver;
+ u32 minor_ver;
+};
+
+/* NdisHalt message */
+struct rndis_halt_request {
+ u32 req_id;
+};
+
+/* NdisQueryRequest message */
+struct rndis_query_request {
+ u32 req_id;
+ u32 oid;
+ u32 info_buflen;
+ u32 info_buf_offset;
+ u32 dev_vc_handle;
+};
+
+/* Response to NdisQueryRequest */
+struct rndis_query_complete {
+ u32 req_id;
+ u32 status;
+ u32 info_buflen;
+ u32 info_buf_offset;
+};
+
+/* NdisSetRequest message */
+struct rndis_set_request {
+ u32 req_id;
+ u32 oid;
+ u32 info_buflen;
+ u32 info_buf_offset;
+ u32 dev_vc_handle;
+};
+
+/* Response to NdisSetRequest */
+struct rndis_set_complete {
+ u32 req_id;
+ u32 status;
+};
+
+/* NdisReset message */
+struct rndis_reset_request {
+ u32 reserved;
+};
+
+/* Response to NdisReset */
+struct rndis_reset_complete {
+ u32 status;
+ u32 addressing_reset;
+};
+
+/* NdisMIndicateStatus message */
+struct rndis_indicate_status {
+ u32 status;
+ u32 status_buflen;
+ u32 status_buf_offset;
+};
+
+/* Diagnostic information passed as the status buffer in */
+/* struct rndis_indicate_status messages signifying error conditions. */
+struct rndis_diagnostic_info {
+ u32 diag_status;
+ u32 error_offset;
+};
+
+/* NdisKeepAlive message */
+struct rndis_keepalive_request {
+ u32 req_id;
+};
+
+/* Response to NdisKeepAlive */
+struct rndis_keepalive_complete {
+ u32 req_id;
+ u32 status;
+};
+
+/*
+ * Data message. All Offset fields contain byte offsets from the beginning of
+ * struct rndis_packet. All Length fields are in bytes. VcHandle is set
+ * to 0 for connectionless data, otherwise it contains the VC handle.
+ */
+struct rndis_packet {
+ u32 data_offset;
+ u32 data_len;
+ u32 oob_data_offset;
+ u32 oob_data_len;
+ u32 num_oob_data_elements;
+ u32 per_pkt_info_offset;
+ u32 per_pkt_info_len;
+ u32 vc_handle;
+ u32 reserved;
+};
+
+/* Optional Out of Band data associated with a Data message. */
+struct rndis_oobd {
+ u32 size;
+ u32 type;
+ u32 class_info_offset;
+};
+
+/* Packet extension field contents associated with a Data message. */
+struct rndis_per_packet_info {
+ u32 size;
+ u32 type;
+ u32 ppi_offset;
+};
+
+enum ndis_per_pkt_info_type {
+ TCPIP_CHKSUM_PKTINFO,
+ IPSEC_PKTINFO,
+ TCP_LARGESEND_PKTINFO,
+ CLASSIFICATION_HANDLE_PKTINFO,
+ NDIS_RESERVED,
+ SG_LIST_PKTINFO,
+ IEEE_8021Q_INFO,
+ ORIGINAL_PKTINFO,
+ PACKET_CANCEL_ID,
+ ORIGINAL_NET_BUFLIST,
+ CACHED_NET_BUFLIST,
+ SHORT_PKT_PADINFO,
+ MAX_PER_PKT_INFO
+};
+
+struct ndis_pkt_8021q_info {
+ union {
+ struct {
+ u32 pri:3; /* User Priority */
+ u32 cfi:1; /* Canonical Format ID */
+ u32 vlanid:12; /* VLAN ID */
+ u32 reserved:16;
+ };
+ u32 value;
+ };
+};
+
+#define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+ sizeof(struct ndis_pkt_8021q_info))
+
+/* Format of Information buffer passed in a SetRequest for the OID */
+/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+struct rndis_config_parameter_info {
+ u32 parameter_name_offset;
+ u32 parameter_name_length;
+ u32 parameter_type;
+ u32 parameter_value_offset;
+ u32 parameter_value_length;
+};
+
+/* Values for ParameterType in struct rndis_config_parameter_info */
+#define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0
+#define RNDIS_CONFIG_PARAM_TYPE_STRING 2
+
+/* CONDIS Miniport messages for connection oriented devices */
+/* that do not implement a call manager. */
+
+/* CoNdisMiniportCreateVc message */
+struct rcondis_mp_create_vc {
+ u32 req_id;
+ u32 ndis_vc_handle;
+};
+
+/* Response to CoNdisMiniportCreateVc */
+struct rcondis_mp_create_vc_complete {
+ u32 req_id;
+ u32 dev_vc_handle;
+ u32 status;
+};
+
+/* CoNdisMiniportDeleteVc message */
+struct rcondis_mp_delete_vc {
+ u32 req_id;
+ u32 dev_vc_handle;
+};
+
+/* Response to CoNdisMiniportDeleteVc */
+struct rcondis_mp_delete_vc_complete {
+ u32 req_id;
+ u32 status;
+};
+
+/* CoNdisMiniportQueryRequest message */
+struct rcondis_mp_query_request {
+ u32 req_id;
+ u32 request_type;
+ u32 oid;
+ u32 dev_vc_handle;
+ u32 info_buflen;
+ u32 info_buf_offset;
+};
+
+/* CoNdisMiniportSetRequest message */
+struct rcondis_mp_set_request {
+ u32 req_id;
+ u32 request_type;
+ u32 oid;
+ u32 dev_vc_handle;
+ u32 info_buflen;
+ u32 info_buf_offset;
+};
+
+/* CoNdisIndicateStatus message */
+struct rcondis_indicate_status {
+ u32 ndis_vc_handle;
+ u32 status;
+ u32 status_buflen;
+ u32 status_buf_offset;
+};
+
+/* CONDIS Call/VC parameters */
+struct rcondis_specific_parameters {
+ u32 parameter_type;
+ u32 parameter_length;
+ u32 parameter_lffset;
+};
+
+struct rcondis_media_parameters {
+ u32 flags;
+ u32 reserved1;
+ u32 reserved2;
+ struct rcondis_specific_parameters media_specific;
+};
+
+struct rndis_flowspec {
+ u32 token_rate;
+ u32 token_bucket_size;
+ u32 peak_bandwidth;
+ u32 latency;
+ u32 delay_variation;
+ u32 service_type;
+ u32 max_sdu_size;
+ u32 minimum_policed_size;
+};
+
+struct rcondis_call_manager_parameters {
+ struct rndis_flowspec transmit;
+ struct rndis_flowspec receive;
+ struct rcondis_specific_parameters call_mgr_specific;
+};
+
+/* CoNdisMiniportActivateVc message */
+struct rcondis_mp_activate_vc_request {
+ u32 req_id;
+ u32 flags;
+ u32 dev_vc_handle;
+ u32 media_params_offset;
+ u32 media_params_length;
+ u32 call_mgr_params_offset;
+ u32 call_mgr_params_length;
+};
+
+/* Response to CoNdisMiniportActivateVc */
+struct rcondis_mp_activate_vc_complete {
+ u32 req_id;
+ u32 status;
+};
+
+/* CoNdisMiniportDeactivateVc message */
+struct rcondis_mp_deactivate_vc_request {
+ u32 req_id;
+ u32 flags;
+ u32 dev_vc_handle;
+};
+
+/* Response to CoNdisMiniportDeactivateVc */
+struct rcondis_mp_deactivate_vc_complete {
+ u32 req_id;
+ u32 status;
+};
+
+
+/* union with all of the RNDIS messages */
+union rndis_message_container {
+ struct rndis_packet pkt;
+ struct rndis_initialize_request init_req;
+ struct rndis_halt_request halt_req;
+ struct rndis_query_request query_req;
+ struct rndis_set_request set_req;
+ struct rndis_reset_request reset_req;
+ struct rndis_keepalive_request keep_alive_req;
+ struct rndis_indicate_status indicate_status;
+ struct rndis_initialize_complete init_complete;
+ struct rndis_query_complete query_complete;
+ struct rndis_set_complete set_complete;
+ struct rndis_reset_complete reset_complete;
+ struct rndis_keepalive_complete keep_alive_complete;
+ struct rcondis_mp_create_vc co_miniport_create_vc;
+ struct rcondis_mp_delete_vc co_miniport_delete_vc;
+ struct rcondis_indicate_status co_indicate_status;
+ struct rcondis_mp_activate_vc_request co_miniport_activate_vc;
+ struct rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc;
+ struct rcondis_mp_create_vc_complete co_miniport_create_vc_complete;
+ struct rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete;
+ struct rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete;
+ struct rcondis_mp_deactivate_vc_complete
+ co_miniport_deactivate_vc_complete;
+};
+
+/* Remote NDIS message format */
+struct rndis_message {
+ u32 ndis_msg_type;
+
+ /* Total length of this message, from the beginning */
+ /* of the sruct rndis_message, in bytes. */
+ u32 msg_len;
+
+ /* Actual message */
+ union rndis_message_container msg;
+};
+
+
+struct rndis_filter_packet {
+ void *completion_ctx;
+ void (*completion)(void *context);
+ struct rndis_message msg;
+};
+
+/* Handy macros */
+
+/* get the size of an RNDIS message. Pass in the message type, */
+/* struct rndis_set_request, struct rndis_packet for example */
+#define RNDIS_MESSAGE_SIZE(msg) \
+ (sizeof(msg) + (sizeof(struct rndis_message) - \
+ sizeof(union rndis_message_container)))
+
+/* get pointer to info buffer with message pointer */
+#define MESSAGE_TO_INFO_BUFFER(msg) \
+ (((unsigned char *)(msg)) + msg->info_buf_offset)
+
+/* get pointer to status buffer with message pointer */
+#define MESSAGE_TO_STATUS_BUFFER(msg) \
+ (((unsigned char *)(msg)) + msg->status_buf_offset)
+
+/* get pointer to OOBD buffer with message pointer */
+#define MESSAGE_TO_OOBD_BUFFER(msg) \
+ (((unsigned char *)(msg)) + msg->oob_data_offset)
+
+/* get pointer to data buffer with message pointer */
+#define MESSAGE_TO_DATA_BUFFER(msg) \
+ (((unsigned char *)(msg)) + msg->per_pkt_info_offset)
+
+/* get pointer to contained message from NDIS_MESSAGE pointer */
+#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg) \
+ ((void *) &rndis_msg->msg)
+
+/* get pointer to contained message from NDIS_MESSAGE pointer */
+#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg) \
+ ((void *) rndis_msg)
+
+
+#define __struct_bcount(x)
+
+
+
+#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \
+ sizeof(union rndis_message_container))
+
+#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
+#define NDIS_PACKET_TYPE_SMT 0x00000040
+#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080
+#define NDIS_PACKET_TYPE_GROUP 0x00000100
+#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200
+#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400
+#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800
+
+
+
+#endif /* _HYPERV_NET_H */
--
2.1.4
Stephen Hemminger
2015-02-05 01:13:27 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

This is new Poll Mode driver for using hyper-v virtual network
interface.

Signed-off-by: Stas Egorov <***@mirantis.com>
Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
lib/Makefile | 1 +
lib/librte_pmd_hyperv/Makefile | 28 +
lib/librte_pmd_hyperv/hyperv.h | 169 ++++
lib/librte_pmd_hyperv/hyperv_drv.c | 1660 +++++++++++++++++++++++++++++++++
lib/librte_pmd_hyperv/hyperv_drv.h | 558 +++++++++++
lib/librte_pmd_hyperv/hyperv_ethdev.c | 334 +++++++
lib/librte_pmd_hyperv/hyperv_logs.h | 68 ++
lib/librte_pmd_hyperv/hyperv_rxtx.c | 402 ++++++++
lib/librte_pmd_hyperv/hyperv_rxtx.h | 35 +
mk/rte.app.mk | 4 +
10 files changed, 3259 insertions(+)
create mode 100644 lib/librte_pmd_hyperv/Makefile
create mode 100644 lib/librte_pmd_hyperv/hyperv.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h

diff --git a/lib/Makefile b/lib/Makefile
index d617d81..2b48ba7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -45,6 +45,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
+DIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += librte_pmd_hyperv
DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
diff --git a/lib/librte_pmd_hyperv/Makefile b/lib/librte_pmd_hyperv/Makefile
new file mode 100644
index 0000000..4ba08c8
--- /dev/null
+++ b/lib/librte_pmd_hyperv/Makefile
@@ -0,0 +1,28 @@
+# BSD LICENSE
+#
+# Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+# All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_hyperv.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_drv.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_hyperv/hyperv.h b/lib/librte_pmd_hyperv/hyperv.h
new file mode 100644
index 0000000..b011b6d
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _HYPERV_H_
+#define _HYPERV_H_
+
+#include <sys/param.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+
+#include "hyperv_logs.h"
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+
+/*
+ * Tunable ethdev params
+ */
+#define HV_MIN_RX_BUF_SIZE 1024
+#define HV_MAX_RX_PKT_LEN 4096
+#define HV_MAX_MAC_ADDRS 1
+#define HV_MAX_RX_QUEUES 1
+#define HV_MAX_TX_QUEUES 1
+#define HV_MAX_PKT_BURST 32
+#define HV_MAX_LINK_REQ 10
+
+/*
+ * List of resources mapped from kspace
+ * need to be the same as defined in hv_uio.c
+ */
+enum {
+ TXRX_RING_MAP,
+ INT_PAGE_MAP,
+ MON_PAGE_MAP,
+ RECV_BUF_MAP
+};
+
+/*
+ * Statistics
+ */
+struct hv_stats {
+ uint64_t opkts;
+ uint64_t obytes;
+ uint64_t oerrors;
+
+ uint64_t ipkts;
+ uint64_t ibytes;
+ uint64_t ierrors;
+ uint64_t rx_nombuf;
+};
+
+struct hv_data;
+struct netvsc_packet;
+struct rndis_msg;
+typedef void (*receive_callback_t)(struct hv_data *hv, struct rndis_msg *msg,
+ struct netvsc_packet *pkt);
+
+/*
+ * Main driver structure
+ */
+struct hv_data {
+ int vmbus_device;
+ uint8_t monitor_bit;
+ uint8_t monitor_group;
+ uint8_t kernel_initialized;
+ int uio_fd;
+ /* Flag indicates channel state. If closed, RX/TX shouldn't work further */
+ uint8_t closed;
+ /* Flag indicates whether HALT rndis request was received by host */
+ uint8_t hlt_req_sent;
+ /* Flag indicates pending state for HALT request */
+ uint8_t hlt_req_pending;
+ /* Counter for RNDIS requests */
+ uint32_t new_request_id;
+ /* State of RNDIS device */
+ uint8_t rndis_dev_state;
+ /* Number of transmitted packets but not completed yet by Hyper-V */
+ int num_outstanding_sends;
+ /* Max pkt len to fit in rx mbufs */
+ uint32_t max_rx_pkt_len;
+
+ uint8_t jumbo_frame_support;
+
+ struct hv_vmbus_ring_buffer *in;
+ struct hv_vmbus_ring_buffer *out;
+
+ /* Size of each ring_buffer(in/out) */
+ uint32_t rb_size;
+ /* Size of data in each ring_buffer(in/out) */
+ uint32_t rb_data_size;
+
+ void* int_page;
+ struct hv_vmbus_monitor_page *monitor_pages;
+ void* recv_interrupt_page;
+ void* send_interrupt_page;
+ void* ring_pages;
+ void* recv_buf;
+
+ uint8_t link_req_cnt;
+ uint32_t link_status;
+ uint8_t hw_mac_addr[ETHER_ADDR_LEN];
+ struct rndis_request *req;
+ struct netvsc_packet *netvsc_packet;
+ struct nvsp_msg *rx_comp_msg;
+ struct hv_rx_queue *rxq;
+ struct hv_tx_queue *txq;
+ struct hv_vm_packet_descriptor *desc;
+ receive_callback_t receive_callback;
+ int pkt_rxed;
+
+ uint32_t debug;
+ struct hv_stats stats;
+};
+
+/*
+ * Extern functions declarations
+ */
+int hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+
+void hyperv_dev_tx_queue_release(void *ptxq);
+
+int hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp);
+
+void hyperv_dev_rx_queue_release(void *prxq);
+
+uint16_t
+hyperv_recv_pkts(void *prxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
+uint16_t
+hyperv_xmit_pkts(void *ptxq,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+int hv_rf_on_device_add(struct hv_data *hv);
+int hv_rf_on_device_remove(struct hv_data *hv);
+int hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt);
+int hv_rf_on_open(struct hv_data *hv);
+int hv_rf_on_close(struct hv_data *hv);
+int hv_rf_set_device_mac(struct hv_data *hv, uint8_t *mac);
+void hyperv_start_rx(struct hv_data *hv);
+void hyperv_stop_rx(struct hv_data *hv);
+int hyperv_get_buffer(struct hv_data *hv, void* buffer, uint32_t bufferlen);
+void hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop);
+uint8_t hyperv_get_link_status(struct hv_data *hv);
+int hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast);
+
+inline int rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link);
+inline int rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link);
+
+#endif /* _HYPERV_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_drv.c b/lib/librte_pmd_hyperv/hyperv_drv.c
new file mode 100644
index 0000000..4ff2efa
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_drv.c
@@ -0,0 +1,1660 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hyperv.h"
+#include "hyperv_drv.h"
+#include "hyperv_rxtx.h"
+
+#define LOOP_CNT 10000
+#define MAC_STRLEN 14
+#define MAC_PARAM_STR "NetworkAddress"
+
+#define hex "0123456789abcdef"
+#define high(x) hex[(x & 0xf0) >> 4]
+#define low(x) hex[x & 0x0f]
+
+static int hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt);
+
+/*
+ * Ring buffer
+ */
+
+/* Amount of space to write to */
+#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) \
+ (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
+
+/*
+ * Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static inline void
+get_ring_buffer_avail_bytes(
+ struct hv_data *hv,
+ struct hv_vmbus_ring_buffer *ring_buffer,
+ uint32_t *read,
+ uint32_t *write)
+{
+ rte_compiler_barrier();
+
+ /*
+ * Capture the read/write indices before they changed
+ */
+ uint32_t read_loc = ring_buffer->read_index;
+ uint32_t write_loc = ring_buffer->write_index;
+
+ *write = HV_BYTES_AVAIL_TO_WRITE(
+ read_loc, write_loc, hv->rb_data_size);
+ *read = hv->rb_data_size - *write;
+}
+
+/*
+ * Helper routine to copy from source to ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in dest case only!
+ */
+static uint32_t
+copy_to_ring_buffer(
+ struct hv_vmbus_ring_buffer *ring_buffer,
+ uint32_t ring_buffer_size,
+ uint32_t start_write_offset,
+ char *src,
+ uint32_t src_len)
+{
+ char *ring_buf = (char *)ring_buffer->buffer;
+ uint32_t fragLen;
+
+ if (src_len > ring_buffer_size - start_write_offset) {
+ /* wrap-around detected! */
+ fragLen = ring_buffer_size - start_write_offset;
+ rte_memcpy(ring_buf + start_write_offset, src, fragLen);
+ rte_memcpy(ring_buf, src + fragLen, src_len - fragLen);
+ } else {
+ rte_memcpy(ring_buf + start_write_offset, src, src_len);
+ }
+
+ start_write_offset += src_len;
+ start_write_offset %= ring_buffer_size;
+
+ return start_write_offset;
+}
+
+/*
+ * Helper routine to copy to dest from ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in src case only!
+ */
+static uint32_t
+copy_from_ring_buffer(
+ struct hv_data *hv,
+ struct hv_vmbus_ring_buffer *ring_buffer,
+ char *dest,
+ uint32_t dest_len,
+ uint32_t start_read_offset)
+{
+ uint32_t fragLen;
+ char *ring_buf = (char *)ring_buffer->buffer;
+
+ if (dest_len > hv->rb_data_size - start_read_offset) {
+ /* wrap-around detected at the src */
+ fragLen = hv->rb_data_size - start_read_offset;
+ rte_memcpy(dest, ring_buf + start_read_offset, fragLen);
+ rte_memcpy(dest + fragLen, ring_buf, dest_len - fragLen);
+ } else {
+ rte_memcpy(dest, ring_buf + start_read_offset, dest_len);
+ }
+
+ start_read_offset += dest_len;
+ start_read_offset %= hv->rb_data_size;
+
+ return start_read_offset;
+}
+
+/*
+ * Write to the ring buffer.
+ */
+static int
+hv_ring_buffer_write(
+ struct hv_data *hv,
+ struct hv_vmbus_sg_buffer_list sg_buffers[],
+ uint32_t sg_buffer_count)
+{
+ struct hv_vmbus_ring_buffer *ring_buffer = hv->out;
+ uint32_t i = 0;
+ uint32_t byte_avail_to_write;
+ uint32_t byte_avail_to_read;
+ uint32_t total_bytes_to_write = 0;
+ volatile uint32_t next_write_location;
+ uint64_t prev_indices = 0;
+
+ for (i = 0; i < sg_buffer_count; i++)
+ total_bytes_to_write += sg_buffers[i].length;
+
+ total_bytes_to_write += sizeof(uint64_t);
+
+ get_ring_buffer_avail_bytes(hv, ring_buffer, &byte_avail_to_read,
+ &byte_avail_to_write);
+
+ /*
+ * If there is only room for the packet, assume it is full.
+ * Otherwise, the next time around, we think the ring buffer
+ * is empty since the read index == write index
+ */
+ if (byte_avail_to_write <= total_bytes_to_write) {
+ PMD_PERROR_LOG(hv, DBG_RB,
+ "byte_avail_to_write = %u, total_bytes_to_write = %u",
+ byte_avail_to_write, total_bytes_to_write);
+ return -EAGAIN;
+ }
+
+ /*
+ * Write to the ring buffer
+ */
+ next_write_location = ring_buffer->write_index;
+
+ for (i = 0; i < sg_buffer_count; i++) {
+ next_write_location = copy_to_ring_buffer(ring_buffer,
+ hv->rb_data_size, next_write_location,
+ (char *) sg_buffers[i].data, sg_buffers[i].length);
+ }
+
+ /*
+ * Set previous packet start
+ */
+ prev_indices = (uint64_t)ring_buffer->write_index << 32;
+
+ next_write_location = copy_to_ring_buffer(
+ ring_buffer, hv->rb_data_size, next_write_location,
+ (char *) &prev_indices, sizeof(uint64_t));
+
+ /*
+ * Make sure we flush all writes before updating the writeIndex
+ */
+ rte_compiler_barrier();
+
+ /*
+ * Now, update the write location
+ */
+ ring_buffer->write_index = next_write_location;
+
+ return 0;
+}
+
+/*
+ * Read without advancing the read index.
+ */
+static int
+hv_ring_buffer_peek(
+ struct hv_data *hv,
+ void* buffer,
+ uint32_t buffer_len)
+{
+ struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
+ uint32_t bytesAvailToWrite;
+ uint32_t bytesAvailToRead;
+
+ get_ring_buffer_avail_bytes(hv, ring_buffer,
+ &bytesAvailToRead,
+ &bytesAvailToWrite);
+
+ /* Make sure there is something to read */
+ if (bytesAvailToRead < buffer_len)
+ return -EAGAIN;
+
+ copy_from_ring_buffer(
+ hv, ring_buffer,
+ (char *)buffer, buffer_len, ring_buffer->read_index);
+
+ return 0;
+}
+
+/*
+ * Read and advance the read index.
+ */
+static int
+hv_ring_buffer_read(
+ struct hv_data *hv,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t offset)
+{
+ struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
+ uint32_t bytes_avail_to_write;
+ uint32_t bytes_avail_to_read;
+ uint32_t next_read_location = 0;
+ uint64_t prev_indices = 0;
+
+ if (buffer_len <= 0)
+ return -EINVAL;
+
+ get_ring_buffer_avail_bytes(
+ hv,
+ ring_buffer,
+ &bytes_avail_to_read,
+ &bytes_avail_to_write);
+
+ /*
+ * Make sure there is something to read
+ */
+ if (bytes_avail_to_read < buffer_len) {
+ PMD_PERROR_LOG(hv, DBG_RB, "bytes_avail_to_read = %u, buffer_len = %u",
+ bytes_avail_to_read, buffer_len);
+ return -EAGAIN;
+ }
+
+ next_read_location = (ring_buffer->read_index + offset) % hv->rb_data_size;
+
+ next_read_location = copy_from_ring_buffer(
+ hv,
+ ring_buffer,
+ (char *) buffer,
+ buffer_len,
+ next_read_location);
+
+ next_read_location = copy_from_ring_buffer(
+ hv,
+ ring_buffer,
+ (char *) &prev_indices,
+ sizeof(uint64_t),
+ next_read_location);
+
+ /*
+ * Make sure all reads are done before we update the read index since
+ * the writer may start writing to the read area once the read index
+ * is updated.
+ */
+ rte_compiler_barrier();
+
+ /*
+ * Update the read index
+ */
+ ring_buffer->read_index = next_read_location;
+
+ return 0;
+}
+
+/*
+ * VMBus
+ */
+
+/*
+ * Retrieve the raw packet on the specified channel
+ */
+static int
+hv_vmbus_channel_recv_packet_raw(
+ struct hv_data *hv,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t *buffer_actual_len,
+ uint64_t *request_id,
+ int mode)
+{
+ int ret;
+ uint32_t packetLen;
+ struct hv_vm_packet_descriptor desc;
+
+ *buffer_actual_len = 0;
+ *request_id = 0;
+
+ ret = hv_ring_buffer_peek(hv, &desc,
+ sizeof(struct hv_vm_packet_descriptor));
+
+ if (ret != 0)
+ return 0;
+
+ if ((desc.type == HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
+ && !(mode & 1)) ||
+ ((desc.type == HV_VMBUS_PACKET_TYPE_COMPLETION) && !(mode & 2))) {
+ return -1;
+ }
+
+ packetLen = desc.length8 << 3;
+
+ *buffer_actual_len = packetLen;
+
+ if (unlikely(packetLen > buffer_len)) {
+ PMD_PERROR_LOG(hv, DBG_RX, "The buffer desc is too big, will drop it");
+ return -ENOMEM;
+ }
+
+ *request_id = desc.transaction_id;
+
+ /* Copy over the entire packet to the user buffer */
+ ret = hv_ring_buffer_read(hv, buffer, packetLen, 0);
+
+ return 0;
+}
+
+/*
+ * Trigger an event notification on the specified channel
+ */
+static void
+vmbus_channel_set_event(struct hv_data *hv)
+{
+ /* Here we assume that channel->offer_msg.monitor_allocated == 1,
+ * in another case our driver will not work */
+ /* Each uint32_t represents 32 channels */
+ __sync_or_and_fetch(((uint32_t *)hv->send_interrupt_page
+ + ((hv->vmbus_device >> 5))), 1 << (hv->vmbus_device & 31)
+ );
+ __sync_or_and_fetch((uint32_t *)&hv->monitor_pages->
+ trigger_group[hv->monitor_group].u.pending, 1 << hv->monitor_bit);
+}
+
+/**
+ * @brief Send the specified buffer on the given channel
+ */
+static int
+hv_vmbus_channel_send_packet(
+ struct hv_data *hv,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id,
+ enum hv_vmbus_packet_type type,
+ uint32_t flags)
+{
+ struct hv_vmbus_sg_buffer_list buffer_list[3];
+ struct hv_vm_packet_descriptor desc;
+ uint32_t packet_len_aligned;
+ uint64_t aligned_data;
+ uint32_t packet_len;
+ int ret = 0;
+ uint32_t old_write = hv->out->write_index;
+
+ packet_len = sizeof(struct hv_vm_packet_descriptor) + buffer_len;
+ packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+ aligned_data = 0;
+
+ /* Setup the descriptor */
+ desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */
+ desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
+ /* in 8-bytes granularity */
+ desc.data_offset8 = sizeof(struct hv_vm_packet_descriptor) >> 3;
+ desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+ desc.transaction_id = request_id;
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = sizeof(struct hv_vm_packet_descriptor);
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &aligned_data;
+ buffer_list[2].length = packet_len_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(hv, buffer_list, 3);
+
+ rte_mb();
+ if (!ret && !hv->out->interrupt_mask && hv->out->read_index == old_write)
+ vmbus_channel_set_event(hv);
+
+ return ret;
+}
+
+/*
+ * Send a range of single-page buffer packets using
+ * a GPADL Direct packet type
+ */
+static int
+hv_vmbus_channel_send_packet_pagebuffer(
+ struct hv_data *hv,
+ struct hv_vmbus_page_buffer page_buffers[],
+ uint32_t page_count,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id)
+{
+
+ int ret = 0;
+ uint32_t packet_len, packetLen_aligned, descSize, i = 0;
+ struct hv_vmbus_sg_buffer_list buffer_list[3];
+ struct hv_vmbus_channel_packet_page_buffer desc;
+ uint64_t alignedData = 0;
+ uint32_t old_write = hv->out->write_index;
+
+ if (page_count > HV_MAX_PAGE_BUFFER_COUNT) {
+ PMD_PERROR_LOG(hv, DBG_VMBUS, "page_count %u goes out of the limit",
+ page_count);
+ return -EINVAL;
+ }
+
+ /*
+ * Adjust the size down since hv_vmbus_channel_packet_page_buffer
+ * is the largest size we support
+ */
+ descSize = sizeof(struct hv_vmbus_channel_packet_page_buffer) -
+ ((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
+ sizeof(struct hv_vmbus_page_buffer));
+ packet_len = descSize + buffer_len;
+ packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+ /* Setup the descriptor */
+ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+ desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+ desc.length8 = (uint16_t) (packetLen_aligned >> 3);
+ desc.transaction_id = request_id;
+ desc.range_count = page_count;
+
+ for (i = 0; i < page_count; i++) {
+ desc.range[i].length = page_buffers[i].length;
+ desc.range[i].offset = page_buffers[i].offset;
+ desc.range[i].pfn = page_buffers[i].pfn;
+ }
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = descSize;
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &alignedData;
+ buffer_list[2].length = packetLen_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(hv, buffer_list, 3);
+ if (likely(ret == 0))
+ ++hv->num_outstanding_sends;
+
+ rte_mb();
+ if (!ret && !hv->out->interrupt_mask &&
+ hv->out->read_index == old_write)
+ vmbus_channel_set_event(hv);
+
+ return ret;
+}
+
+/*
+ * NetVSC
+ */
+
+/*
+ * Net VSC on send
+ * Sends a packet on the specified Hyper-V device.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int
+hv_nv_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+ struct nvsp_msg send_msg;
+ int ret;
+
+ send_msg.msg_type = nvsp_msg_1_type_send_rndis_pkt;
+ if (pkt->is_data_pkt) {
+ /* 0 is RMC_DATA */
+ send_msg.msgs.send_rndis_pkt.chan_type = 0;
+ } else {
+ /* 1 is RMC_CONTROL */
+ send_msg.msgs.send_rndis_pkt.chan_type = 1;
+ }
+
+ /* Not using send buffer section */
+ send_msg.msgs.send_rndis_pkt.send_buf_section_idx =
+ 0xFFFFFFFF;
+ send_msg.msgs.send_rndis_pkt.send_buf_section_size = 0;
+
+ if (likely(pkt->page_buf_count)) {
+ ret = hv_vmbus_channel_send_packet_pagebuffer(hv,
+ pkt->page_buffers, pkt->page_buf_count,
+ &send_msg, sizeof(struct nvsp_msg),
+ (uint64_t)pkt->is_data_pkt ? (hv->txq->tx_tail + 1) : 0);
+ } else {
+ PMD_PERROR_LOG(hv, DBG_TX, "pkt->page_buf_count value can't be zero");
+ ret = -1;
+ }
+
+ return ret;
+}
+
+/*
+ * Net VSC on receive
+ *
+ * This function deals exclusively with virtual addresses.
+ */
+static void
+hv_nv_on_receive(struct hv_data *hv, struct hv_vm_packet_descriptor *pkt)
+{
+ struct hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
+ struct nvsp_msg *nvsp_msg_pkt;
+ struct netvsc_packet *net_vsc_pkt = NULL;
+ unsigned long start;
+ int count, i;
+
+ nvsp_msg_pkt = (struct nvsp_msg *)((unsigned long)pkt
+ + (pkt->data_offset8 << 3));
+
+ /* Make sure this is a valid nvsp packet */
+ if (unlikely(nvsp_msg_pkt->msg_type != nvsp_msg_1_type_send_rndis_pkt)) {
+ PMD_PERROR_LOG(hv, DBG_RX, "NVSP packet is not valid");
+ return;
+ }
+
+ vm_xfer_page_pkt = (struct hv_vm_transfer_page_packet_header *)pkt;
+
+ if (unlikely(vm_xfer_page_pkt->transfer_page_set_id
+ != NETVSC_RECEIVE_BUFFER_ID)) {
+ PMD_PERROR_LOG(hv, DBG_RX, "transfer_page_set_id is not valid");
+ return;
+ }
+
+ count = vm_xfer_page_pkt->range_count;
+
+ /*
+ * Initialize the netvsc packet
+ */
+ for (i = 0; i < count; ++i) {
+ net_vsc_pkt = hv->netvsc_packet;
+
+ net_vsc_pkt->tot_data_buf_len =
+ vm_xfer_page_pkt->ranges[i].byte_count;
+ net_vsc_pkt->page_buf_count = 1;
+
+ net_vsc_pkt->page_buffers[0].length =
+ vm_xfer_page_pkt->ranges[i].byte_count;
+
+ /* The virtual address of the packet in the receive buffer */
+ start = ((unsigned long)hv->recv_buf +
+ vm_xfer_page_pkt->ranges[i].byte_offset);
+
+ /* Page number of the virtual page containing packet start */
+ net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
+
+ /* Calculate the page relative offset */
+ net_vsc_pkt->page_buffers[0].offset =
+ vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
+
+ /*
+ * In this implementation, we are dealing with virtual
+ * addresses exclusively. Since we aren't using physical
+ * addresses at all, we don't care if a packet crosses a
+ * page boundary. For this reason, the original code to
+ * check for and handle page crossings has been removed.
+ */
+
+ /*
+ * Pass it to the upper layer. The receive completion call
+ * has been moved into this function.
+ */
+ hv_rf_on_receive(hv, net_vsc_pkt);
+ }
+ /* Send a receive completion packet to RNDIS device (ie NetVsp) */
+ hv_vmbus_channel_send_packet(hv, hv->rx_comp_msg, sizeof(struct nvsp_msg),
+ vm_xfer_page_pkt->d.transaction_id,
+ HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
+}
+
+/*
+ * Net VSC on send completion
+ */
+static void
+hv_nv_on_send_completion(struct hv_data *hv, struct hv_vm_packet_descriptor *pkt)
+{
+ struct nvsp_msg *nvsp_msg_pkt;
+
+ nvsp_msg_pkt =
+ (struct nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+ if (likely(nvsp_msg_pkt->msg_type ==
+ nvsp_msg_1_type_send_rndis_pkt_complete)) {
+
+ if (unlikely(hv->hlt_req_pending))
+ hv->hlt_req_sent = 1;
+ else
+ if (pkt->transaction_id)
+ ++hv->txq->tx_free;
+ --hv->num_outstanding_sends;
+ return;
+ }
+ PMD_PINFO_LOG(hv, DBG_TX, "unhandled completion (for kernel req or so)");
+}
+
+/*
+ * Analogue of bsd hv_nv_on_channel_callback
+ */
+static void
+hv_nv_complete_request(struct hv_data *hv, struct rndis_request *request)
+{
+ uint32_t bytes_rxed, cnt = 0;
+ uint64_t request_id;
+ struct hv_vm_packet_descriptor *desc;
+ uint8_t *buffer;
+ int bufferlen = NETVSC_PACKET_SIZE;
+ int ret = 0;
+
+ PMD_INIT_FUNC_TRACE();
+
+ hv->req = request;
+
+ buffer = rte_malloc(NULL, bufferlen, RTE_CACHE_LINE_SIZE);
+ if (!buffer) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate packet");
+ return;
+ }
+
+ do {
+ rte_delay_us(1);
+ ret = hv_vmbus_channel_recv_packet_raw(hv,
+ buffer, bufferlen, &bytes_rxed, &request_id, 3);
+ if (ret == 0) {
+ if (bytes_rxed > 0) {
+ desc = (struct hv_vm_packet_descriptor *)buffer;
+
+ switch (desc->type) {
+ case HV_VMBUS_PACKET_TYPE_COMPLETION:
+ hv_nv_on_send_completion(hv, desc);
+ break;
+ case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
+ hv_nv_on_receive(hv, desc);
+ break;
+ default:
+ break;
+ }
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "Did %d attempts"
+ " until non-empty data was"
+ " receieved", cnt);
+ cnt = 0;
+ } else {
+ cnt++;
+ }
+ } else if (ret == -ENOMEM) {
+ /* Handle large packet */
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "recv_packet_raw returned -ENOMEM");
+ rte_free(buffer);
+ buffer = rte_malloc(NULL, bytes_rxed, RTE_CACHE_LINE_SIZE);
+ if (buffer == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate buffer");
+ break;
+ }
+ bufferlen = bytes_rxed;
+ } else {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Unexpected return code (%d)", ret);
+ }
+ if (!hv->req) {
+ PMD_PINFO_LOG(hv, DBG_LOAD, "Single request processed");
+ break;
+ }
+ if (cnt >= LOOP_CNT) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Emergency break from the loop");
+ break;
+ }
+ if (hv->hlt_req_sent) {
+ PMD_PINFO_LOG(hv, DBG_LOAD, "Halt request processed");
+ break;
+ }
+ /* The field hv->req->response_msg.ndis_msg_type
+ * should be set to non-zero value when response received
+ */
+ } while (!hv->req->response_msg.ndis_msg_type);
+
+ rte_free(buffer);
+}
+
+/*
+ * RNDIS
+ */
+
+/*
+ * Create new RNDIS request
+ */
+static inline struct rndis_request *
+hv_rndis_request(struct hv_data *hv, uint32_t message_type,
+ uint32_t message_length)
+{
+ struct rndis_request *request;
+ struct rndis_msg *rndis_mesg;
+ struct rndis_set_request *set;
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+ uint32_t size;
+
+ PMD_INIT_FUNC_TRACE();
+
+ request = rte_zmalloc("rndis_req", sizeof(struct rndis_request),
+ RTE_CACHE_LINE_SIZE);
+
+ if (!request)
+ return NULL;
+
+ sprintf(mz_name, "hv_%d_%u_%d_%p", hv->vmbus_device, message_type,
+ hv->new_request_id, request);
+
+ size = MAX(message_length, sizeof(struct rndis_msg));
+
+ request->request_msg_memzone = rte_memzone_reserve_aligned(mz_name,
+ size, rte_lcore_to_socket_id(rte_lcore_id()), 0, PAGE_SIZE);
+ if (!request->request_msg_memzone) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "memzone_reserve failed");
+ rte_free(request);
+ return NULL;
+ }
+ request->request_msg = request->request_msg_memzone->addr;
+ rndis_mesg = request->request_msg;
+ rndis_mesg->ndis_msg_type = message_type;
+ rndis_mesg->msg_len = message_length;
+
+ /*
+ * Set the request id. This field is always after the rndis header
+ * for request/response packet types so we just use the set_request
+ * as a template.
+ */
+ set = &rndis_mesg->msg.set_request;
+ hv->new_request_id++;
+ set->request_id = hv->new_request_id;
+
+ return request;
+}
+
+/*
+ * RNDIS filter
+ */
+
+static void
+hv_rf_receive_response(
+ struct hv_data *hv,
+ struct rndis_msg *response)
+{
+ struct rndis_request *request = hv->req;
+ PMD_INIT_FUNC_TRACE();
+
+ if (response->msg_len <= sizeof(struct rndis_msg)) {
+ rte_memcpy(&request->response_msg, response,
+ response->msg_len);
+ } else {
+ if (response->ndis_msg_type == REMOTE_NDIS_INITIALIZE_CMPLT) {
+ request->response_msg.msg.init_complete.status =
+ STATUS_BUFFER_OVERFLOW;
+ }
+ PMD_PERROR_LOG(hv, DBG_LOAD, "response buffer overflow\n");
+ }
+}
+
+/*
+ * RNDIS filter receive indicate status
+ */
+static void
+hv_rf_receive_indicate_status(struct hv_data *hv, struct rndis_msg *response)
+{
+ struct rndis_indicate_status *indicate = &response->msg.indicate_status;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT)
+ hv->link_status = 1;
+ else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT)
+ hv->link_status = 0;
+ else if (indicate->status == RNDIS_STATUS_INVALID_DATA)
+ PMD_PERROR_LOG(hv, DBG_RX, "Invalid data in RNDIS message");
+ else
+ PMD_PERROR_LOG(hv, DBG_RX, "Unsupported status: %u", indicate->status);
+}
+
+/*
+ * RNDIS filter receive data
+ */
+static void
+hv_rf_receive_data(struct hv_data *hv, struct rndis_msg *msg,
+ struct netvsc_packet *pkt)
+{
+ struct rte_mbuf *m_new;
+ struct hv_rx_queue *rxq = hv->rxq;
+ struct rndis_packet *rndis_pkt;
+ uint32_t data_offset;
+
+ if (unlikely(hv->closed))
+ return;
+
+ rndis_pkt = &msg->msg.packet;
+
+ if (unlikely(hv->max_rx_pkt_len < rndis_pkt->data_length)) {
+ PMD_PWARN_LOG(hv, DBG_RX, "Packet is too large (%db), dropping.",
+ rndis_pkt->data_length);
+ ++hv->stats.ierrors;
+ return;
+ }
+
+ /* Remove rndis header, then pass data packet up the stack */
+ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+
+ /* L2 frame length, with L2 header, not including CRC */
+ pkt->tot_data_buf_len = rndis_pkt->data_length;
+ pkt->page_buffers[0].offset += data_offset;
+ /* Buffer length now L2 frame length plus trailing junk */
+ pkt->page_buffers[0].length -= data_offset;
+
+ pkt->vlan_tci = 0;
+
+ /*
+ * Just put data into appropriate mbuf, all further work will be done
+ * by the upper layer (mbuf replacement, index adjustment, etc)
+ */
+ m_new = rxq->sw_ring[rxq->rx_tail];
+ if (++rxq->rx_tail == rxq->nb_rx_desc)
+ rxq->rx_tail = 0;
+
+ /*
+ * Copy the received packet to mbuf.
+ * The copy is required since the memory pointed to by netvsc_packet
+ * cannot be reallocated
+ */
+ uint8_t *vaddr = (uint8_t *)
+ (pkt->page_buffers[0].pfn << PAGE_SHIFT)
+ + pkt->page_buffers[0].offset;
+
+ m_new->nb_segs = 1;
+ m_new->pkt_len = m_new->data_len = pkt->tot_data_buf_len;
+ rte_memcpy(rte_pktmbuf_mtod(m_new, void *), vaddr, m_new->data_len);
+
+ if (pkt->vlan_tci) {
+ m_new->vlan_tci = pkt->vlan_tci;
+ m_new->ol_flags |= PKT_RX_VLAN_PKT;
+ }
+
+ hv->pkt_rxed = 1;
+}
+
+/*
+ * RNDIS filter receive data, jumbo frames support
+ */
+static void
+hv_rf_receive_data_sg(struct hv_data *hv, struct rndis_msg *msg,
+ struct netvsc_packet *pkt)
+{
+ struct rte_mbuf *m_new;
+ struct hv_rx_queue *rxq = hv->rxq;
+ struct rndis_packet *rndis_pkt;
+ uint32_t data_offset;
+
+ if (unlikely(hv->closed))
+ return;
+
+ rndis_pkt = &msg->msg.packet;
+
+ /* Remove rndis header, then pass data packet up the stack */
+ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+
+ /* L2 frame length, with L2 header, not including CRC */
+ pkt->tot_data_buf_len = rndis_pkt->data_length;
+ pkt->page_buffers[0].offset += data_offset;
+ /* Buffer length now L2 frame length plus trailing junk */
+ pkt->page_buffers[0].length -= data_offset;
+
+ pkt->vlan_tci = 0;
+
+ /*
+ * Just put data into appropriate mbuf, all further work will be done
+ * by the upper layer (mbuf replacement, index adjustment, etc)
+ */
+ m_new = rxq->sw_ring[rxq->rx_tail];
+ if (++rxq->rx_tail == rxq->nb_rx_desc)
+ rxq->rx_tail = 0;
+
+ /*
+ * Copy the received packet to mbuf.
+ * The copy is required since the memory pointed to by netvsc_packet
+ * cannot be reallocated
+ */
+ uint8_t *vaddr = (uint8_t *)
+ (pkt->page_buffers[0].pfn << PAGE_SHIFT)
+ + pkt->page_buffers[0].offset;
+
+ /* Scatter-gather emulation */
+ uint32_t carry_len = pkt->tot_data_buf_len;
+ struct rte_mbuf *m_next;
+ m_new->pkt_len = carry_len;
+ m_new->nb_segs = (carry_len - 1) / hv->max_rx_pkt_len + 1;
+
+ while (1) {
+ m_new->data_len = MIN(carry_len, hv->max_rx_pkt_len);
+ rte_memcpy(rte_pktmbuf_mtod(m_new, void *),
+ vaddr, m_new->data_len);
+ vaddr += m_new->data_len;
+
+ if (carry_len <= hv->max_rx_pkt_len)
+ break;
+
+ carry_len -= hv->max_rx_pkt_len;
+ m_next = rxq->sw_ring[rxq->rx_tail];
+ if (++rxq->rx_tail == rxq->nb_rx_desc)
+ rxq->rx_tail = 0;
+ m_new->next = m_next;
+ m_new = m_next;
+ }
+
+ if (pkt->vlan_tci) {
+ m_new->vlan_tci = pkt->vlan_tci;
+ m_new->ol_flags |= PKT_RX_VLAN_PKT;
+ }
+
+ hv->pkt_rxed = 1;
+}
+
+static int
+hv_rf_send_request(struct hv_data *hv, struct rndis_request *request)
+{
+ struct netvsc_packet *packet;
+
+ PMD_INIT_FUNC_TRACE();
+ /* Set up the packet to send it */
+ packet = &request->pkt;
+
+ packet->is_data_pkt = 0;
+ packet->tot_data_buf_len = request->request_msg->msg_len;
+ packet->page_buf_count = 1;
+
+ packet->page_buffers[0].pfn =
+ (request->request_msg_memzone->phys_addr) >> PAGE_SHIFT;
+ packet->page_buffers[0].length = request->request_msg->msg_len;
+ packet->page_buffers[0].offset =
+ (unsigned long)request->request_msg & (PAGE_SIZE - 1);
+
+ return hv_nv_on_send(hv, packet);
+}
+
+static void u8_to_u16(const char *src, int len, char *dst)
+{
+ int i;
+
+ for (i = 0; i < len; ++i) {
+ dst[2 * i] = src[i];
+ dst[2 * i + 1] = 0;
+ }
+}
+
+int
+hv_rf_set_device_mac(struct hv_data *hv, uint8_t *macaddr)
+{
+ struct rndis_request *request;
+ struct rndis_set_request *set_request;
+ struct rndis_config_parameter_info *info;
+ struct rndis_set_complete *set_complete;
+ char mac_str[2*ETHER_ADDR_LEN+1];
+ wchar_t *param_value, *param_name;
+ uint32_t status;
+ uint32_t message_len = sizeof(struct rndis_config_parameter_info) +
+ 2 * MAC_STRLEN + 4 * ETHER_ADDR_LEN;
+ int ret, i;
+
+ request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
+ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + message_len);
+ if (!request)
+ return -ENOMEM;
+
+ set_request = &request->request_msg->msg.set_request;
+ set_request->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER;
+ set_request->device_vc_handle = 0;
+ set_request->info_buffer_offset = sizeof(struct rndis_set_request);
+ set_request->info_buffer_length = message_len;
+
+ info = (struct rndis_config_parameter_info *)((ulong)set_request +
+ set_request->info_buffer_offset);
+ info->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING;
+ info->parameter_name_offset =
+ sizeof(struct rndis_config_parameter_info);
+ info->parameter_name_length = 2 * MAC_STRLEN;
+ info->parameter_value_offset =
+ info->parameter_name_offset + info->parameter_name_length;
+ /* Multiply by 2 because of string representation and by 2
+ * because of utf16 representaion
+ */
+ info->parameter_value_length = 4 * ETHER_ADDR_LEN;
+ param_name = (wchar_t *)((ulong)info + info->parameter_name_offset);
+ param_value = (wchar_t *)((ulong)info + info->parameter_value_offset);
+
+ u8_to_u16(MAC_PARAM_STR, MAC_STRLEN, (char *)param_name);
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ mac_str[2*i] = high(macaddr[i]);
+ mac_str[2*i+1] = low(macaddr[i]);
+ }
+
+ u8_to_u16((const char *)mac_str, 2 * ETHER_ADDR_LEN, (char *)param_value);
+
+ ret = hv_rf_send_request(hv, request);
+ if (ret)
+ goto cleanup;
+
+ request->response_msg.msg.set_complete.status = 0xFFFF;
+ hv_nv_complete_request(hv, request);
+ set_complete = &request->response_msg.msg.set_complete;
+ if (set_complete->status == 0xFFFF) {
+ /* Host is not responding, we can't free request in this case */
+ ret = -1;
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Host is not responding");
+ goto exit;
+ }
+ /* Response received, check status */
+ status = set_complete->status;
+ if (status) {
+ /* Bad response status, return error */
+ PMD_PERROR_LOG(hv, DBG_LOAD, "set_complete->status = %u\n", status);
+ ret = -EINVAL;
+ }
+
+cleanup:
+ rte_free(request);
+exit:
+ return ret;
+}
+
+/*
+ * RNDIS filter on receive
+ */
+static int
+hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+ struct rndis_msg rndis_mesg;
+ struct rndis_msg *rndis_hdr;
+
+ /* Shift virtual page number to form virtual page address */
+ rndis_hdr = (struct rndis_msg *)(pkt->page_buffers[0].pfn << PAGE_SHIFT);
+
+ rndis_hdr = (void *)((unsigned long)rndis_hdr
+ + pkt->page_buffers[0].offset);
+
+ /*
+ * Make sure we got a valid rndis message
+ * Fixme: There seems to be a bug in set completion msg where
+ * its msg_len is 16 bytes but the byte_count field in the
+ * xfer page range shows 52 bytes
+ */
+ if (unlikely(pkt->tot_data_buf_len != rndis_hdr->msg_len)) {
+ ++hv->stats.ierrors;
+ PMD_PERROR_LOG(hv, DBG_RX, "invalid rndis message? (expected %u "
+ "bytes got %u)... dropping this message",
+ rndis_hdr->msg_len, pkt->tot_data_buf_len);
+ return -1;
+ }
+
+ rte_memcpy(&rndis_mesg, rndis_hdr,
+ (rndis_hdr->msg_len > sizeof(struct rndis_msg)) ?
+ sizeof(struct rndis_msg) : rndis_hdr->msg_len);
+
+ switch (rndis_mesg.ndis_msg_type) {
+
+ /* data message */
+ case REMOTE_NDIS_PACKET_MSG:
+ hv->receive_callback(hv, &rndis_mesg, pkt);
+ break;
+ /* completion messages */
+ case REMOTE_NDIS_INITIALIZE_CMPLT:
+ case REMOTE_NDIS_QUERY_CMPLT:
+ case REMOTE_NDIS_SET_CMPLT:
+ case REMOTE_NDIS_RESET_CMPLT:
+ case REMOTE_NDIS_KEEPALIVE_CMPLT:
+ hv_rf_receive_response(hv, &rndis_mesg);
+ break;
+ /* notification message */
+ case REMOTE_NDIS_INDICATE_STATUS_MSG:
+ hv_rf_receive_indicate_status(hv, &rndis_mesg);
+ break;
+ default:
+ PMD_PERROR_LOG(hv, DBG_RX, "hv_rf_on_receive(): Unknown msg_type 0x%x",
+ rndis_mesg.ndis_msg_type);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * RNDIS filter on send
+ */
+int
+hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+ struct rndis_msg *rndis_mesg;
+ struct rndis_packet *rndis_pkt;
+ uint32_t rndis_msg_size;
+
+ /* Add the rndis header */
+ rndis_mesg = (struct rndis_msg *)pkt->extension;
+
+ memset(rndis_mesg, 0, sizeof(struct rndis_msg));
+
+ rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+
+ rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
+ rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
+
+ rndis_pkt = &rndis_mesg->msg.packet;
+ rndis_pkt->data_offset = sizeof(struct rndis_packet);
+ rndis_pkt->data_length = pkt->tot_data_buf_len;
+
+ pkt->is_data_pkt = 1;
+
+ /*
+ * Invoke netvsc send. If return status is bad, the caller now
+ * resets the context pointers before retrying.
+ */
+ return hv_nv_on_send(hv, pkt);
+}
+
+static int
+hv_rf_init_device(struct hv_data *hv)
+{
+ struct rndis_request *request;
+ struct rndis_initialize_request *init;
+ struct rndis_initialize_complete *init_complete;
+ uint32_t status;
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ request = hv_rndis_request(hv, REMOTE_NDIS_INITIALIZE_MSG,
+ RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
+ if (!request) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis set */
+ init = &request->request_msg->msg.init_request;
+ init->major_version = RNDIS_MAJOR_VERSION;
+ init->minor_version = RNDIS_MINOR_VERSION;
+ /*
+ * Per the RNDIS document, this should be set to the max MTU
+ * plus the header size. However, 2048 works fine, so leaving
+ * it as is.
+ */
+ init->max_xfer_size = 2048;
+
+ hv->rndis_dev_state = RNDIS_DEV_INITIALIZING;
+
+ ret = hv_rf_send_request(hv, request);
+ if (ret != 0) {
+ hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+ goto cleanup;
+ }
+
+ /* Putting -1 here to ensure that HyperV really answered us */
+ request->response_msg.msg.init_complete.status = -1;
+ hv_nv_complete_request(hv, request);
+
+ init_complete = &request->response_msg.msg.init_complete;
+ status = init_complete->status;
+ if (status == 0) {
+ PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device is initialized");
+ hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
+ ret = 0;
+ } else {
+ PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device left uninitialized");
+ hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+ ret = -1;
+ }
+
+cleanup:
+ rte_free(request);
+
+ return ret;
+}
+
+/*
+ * RNDIS filter query device
+ */
+static int
+hv_rf_query_device(struct hv_data *hv, uint32_t oid, void* result,
+ uint32_t result_size)
+{
+ struct rndis_request *request;
+ struct rndis_query_request *query;
+ struct rndis_query_complete *query_complete;
+ int ret = 0;
+
+ PMD_INIT_FUNC_TRACE();
+
+ request = hv_rndis_request(hv, REMOTE_NDIS_QUERY_MSG,
+ RNDIS_MESSAGE_SIZE(struct rndis_query_request));
+ if (request == NULL) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis query */
+ query = &request->request_msg->msg.query_request;
+ query->oid = oid;
+ query->info_buffer_offset = sizeof(struct rndis_query_request);
+ query->info_buffer_length = 0;
+ query->device_vc_handle = 0;
+
+ ret = hv_rf_send_request(hv, request);
+ if (ret != 0) {
+ PMD_PERROR_LOG(hv, DBG_TX, "RNDISFILTER request failed to Send!");
+ goto cleanup;
+ }
+
+ hv_nv_complete_request(hv, request);
+
+ /* Copy the response back */
+ query_complete = &request->response_msg.msg.query_complete;
+
+ if (query_complete->info_buffer_length > result_size) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ rte_memcpy(result, (void *)((unsigned long)query_complete +
+ query_complete->info_buffer_offset),
+ query_complete->info_buffer_length);
+
+cleanup:
+ rte_free(request);
+
+ return ret;
+}
+
+/*
+ * RNDIS filter query device MAC address
+ */
+static inline int
+hv_rf_query_device_mac(struct hv_data *hv)
+{
+ uint32_t size = HW_MACADDR_LEN;
+
+ int ret = hv_rf_query_device(hv, RNDIS_OID_802_3_PERMANENT_ADDRESS,
+ &hv->hw_mac_addr, size);
+ PMD_PDEBUG_LOG(hv, DBG_TX, "MAC: %02x:%02x:%02x:%02x:%02x:%02x, ret = %d",
+ hv->hw_mac_addr[0], hv->hw_mac_addr[1], hv->hw_mac_addr[2],
+ hv->hw_mac_addr[3], hv->hw_mac_addr[4], hv->hw_mac_addr[5],
+ ret);
+ return ret;
+}
+
+/*
+ * RNDIS filter query device link status
+ */
+static inline int
+hv_rf_query_device_link_status(struct hv_data *hv)
+{
+ uint32_t size = sizeof(uint32_t);
+ /* Set all bits to 1, it's to ensure that the response is actual */
+ uint32_t status = -1;
+
+ int ret = hv_rf_query_device(hv, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+ &status, size);
+ hv->link_status = status ? 0 : 1;
+ PMD_PDEBUG_LOG(hv, DBG_TX, "Link Status: %s",
+ hv->link_status ? "Up" : "Down");
+ return ret;
+}
+
+int
+hv_rf_on_device_add(struct hv_data *hv)
+{
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ hv->closed = 0;
+ hv->rb_data_size = hv->rb_size - sizeof(struct hv_vmbus_ring_buffer);
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "hv->rb_data_size = %u", hv->rb_data_size);
+
+ if (unlikely(hv->in->interrupt_mask == 0)) {
+ PMD_PINFO_LOG(hv, DBG_LOAD, "Disabling interrupts from host");
+ hv->in->interrupt_mask = 1;
+ rte_mb();
+ }
+
+ hv->netvsc_packet = rte_zmalloc("", sizeof(struct netvsc_packet),
+ RTE_CACHE_LINE_SIZE);
+ if (hv->netvsc_packet == NULL)
+ return -ENOMEM;
+ hv->netvsc_packet->is_data_pkt = 1;
+
+ hv->rx_comp_msg = rte_zmalloc("", sizeof(struct nvsp_msg),
+ RTE_CACHE_LINE_SIZE);
+ if (hv->rx_comp_msg == NULL)
+ return -ENOMEM;
+
+ hv->rx_comp_msg->msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
+ hv->rx_comp_msg->msgs.send_rndis_pkt_complete.status =
+ nvsp_status_success;
+
+ memset(&hv->stats, 0, sizeof(struct hv_stats));
+
+ hv->receive_callback = hv_rf_receive_data;
+
+ /* It's for completion of requests which were sent from kernel-space part */
+ hv_nv_complete_request(hv, NULL);
+ hv_nv_complete_request(hv, NULL);
+
+ hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+
+ /* Send the rndis initialization message */
+ ret = hv_rf_init_device(hv);
+ if (ret != 0) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "rndis init failed!");
+ hv_rf_on_device_remove(hv);
+ return ret;
+ }
+
+ /* Get the mac address */
+ ret = hv_rf_query_device_mac(hv);
+ if (ret != 0) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "rndis query mac failed!");
+ hv_rf_on_device_remove(hv);
+ return ret;
+ }
+
+ return ret;
+}
+
+#define HALT_COMPLETION_WAIT_COUNT 25
+
+/*
+ * RNDIS filter halt device
+ */
+static int
+hv_rf_halt_device(struct hv_data *hv)
+{
+ struct rndis_request *request;
+ struct rndis_halt_request *halt;
+ int i, ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ /* Attempt to do a rndis device halt */
+ request = hv_rndis_request(hv, REMOTE_NDIS_HALT_MSG,
+ RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
+ if (!request) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Unable to create RNDIS_HALT request");
+ return -1;
+ }
+
+ /* initialize "poor man's semaphore" */
+ hv->hlt_req_sent = 0;
+
+ /* Set up the rndis set */
+ halt = &request->request_msg->msg.halt_request;
+ hv->new_request_id++;
+ halt->request_id = hv->new_request_id;
+
+ ret = hv_rf_send_request(hv, request);
+ if (ret) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to send RNDIS_HALT request: %d",
+ ret);
+ return ret;
+ }
+
+ /*
+ * Wait for halt response from halt callback. We must wait for
+ * the transaction response before freeing the request and other
+ * resources.
+ */
+ for (i = HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
+ hv_nv_complete_request(hv, request);
+ if (hv->hlt_req_sent != 0) {
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "Completed HALT request at %d try",
+ HALT_COMPLETION_WAIT_COUNT - i + 1);
+ break;
+ }
+ }
+ hv->hlt_req_sent = 0;
+ if (i == 0) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS_HALT request was not completed!");
+ rte_free(request);
+ return -1;
+ }
+
+ hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+
+ rte_free(request);
+
+ return 0;
+}
+
+#define HV_TX_DRAIN_TRIES 50
+static inline int
+hyperv_tx_drain(struct hv_data *hv)
+{
+ int i = HV_TX_DRAIN_TRIES;
+
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "Waiting for TXs to be completed...");
+ while (hv->num_outstanding_sends > 0 && --i) {
+ hv_nv_complete_request(hv, NULL);
+ rte_delay_ms(100);
+ }
+
+ return hv->num_outstanding_sends;
+}
+
+/*
+ * RNDIS filter on device remove
+ */
+int
+hv_rf_on_device_remove(struct hv_data *hv)
+{
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+ hv->closed = 1;
+ if (hyperv_tx_drain(hv) > 0) {
+ /* Hypervisor is not responding, exit with error here */
+ PMD_PWARN_LOG(hv, DBG_LOAD, "Can't drain TX queue: no response");
+ return -EAGAIN;
+ }
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "TX queue is empty, can halt the device");
+
+ /* Halt and release the rndis device */
+ hv->hlt_req_pending = 1;
+ ret = hv_rf_halt_device(hv);
+ hv->hlt_req_pending = 0;
+
+ rte_free(hv->netvsc_packet);
+
+ return ret;
+}
+
+/*
+ * RNDIS filter set packet filter
+ * Sends an rndis request with the new filter, then waits for a response
+ * from the host.
+ * Returns zero on success, non-zero on failure.
+ */
+static int
+hv_rf_set_packet_filter(struct hv_data *hv, uint32_t new_filter)
+{
+ struct rndis_request *request;
+ struct rndis_set_request *set;
+ struct rndis_set_complete *set_complete;
+ uint32_t status;
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
+ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + sizeof(uint32_t));
+ if (!request) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis set */
+ set = &request->request_msg->msg.set_request;
+ set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
+ set->info_buffer_length = sizeof(uint32_t);
+ set->info_buffer_offset = sizeof(struct rndis_set_request);
+
+ rte_memcpy((void *)((unsigned long)set + sizeof(struct rndis_set_request)),
+ &new_filter, sizeof(uint32_t));
+
+ ret = hv_rf_send_request(hv, request);
+ if (ret)
+ goto cleanup;
+
+ /*
+ * Wait for the response from the host.
+ */
+ request->response_msg.msg.set_complete.status = 0xFFFF;
+ hv_nv_complete_request(hv, request);
+
+ set_complete = &request->response_msg.msg.set_complete;
+ if (set_complete->status == 0xFFFF) {
+ /* Host is not responding, we can't free request in this case */
+ ret = -1;
+ goto exit;
+ }
+ /* Response received, check status */
+ status = set_complete->status;
+ if (status)
+ /* Bad response status, return error */
+ ret = -2;
+
+cleanup:
+ rte_free(request);
+exit:
+ return ret;
+}
+
+/*
+ * RNDIS filter open device
+ */
+int
+hv_rf_on_open(struct hv_data *hv)
+{
+ int ret;
+
+ if (hv->closed)
+ return 0;
+
+ if (hv->jumbo_frame_support)
+ hv->receive_callback = hv_rf_receive_data_sg;
+
+ ret = hyperv_set_rx_mode(hv, 1, 0);
+ if (!ret) {
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device opened");
+ hv->rndis_dev_state = RNDIS_DEV_DATAINITIALIZED;
+ } else
+ PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS device is left unopened");
+
+ return ret;
+}
+
+/*
+ * RNDIS filter on close
+ */
+int
+hv_rf_on_close(struct hv_data *hv)
+{
+ int ret;
+ PMD_INIT_FUNC_TRACE();
+
+ if (hv->closed)
+ return 0;
+
+ if (hv->rndis_dev_state != RNDIS_DEV_DATAINITIALIZED) {
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device state should be"
+ " RNDIS_DEV_DATAINITIALIZED, but now it is %u",
+ hv->rndis_dev_state);
+ return 0;
+ }
+
+ ret = hv_rf_set_packet_filter(hv, 0);
+ if (!ret) {
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device closed");
+ hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
+ } else
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device is left unclosed");
+
+ return ret;
+}
+
+/*
+ * RX Flow
+ */
+int
+hyperv_get_buffer(struct hv_data *hv, void* buffer, uint32_t bufferlen)
+{
+ uint32_t bytes_rxed;
+ uint64_t request_id;
+ struct hv_vm_packet_descriptor *desc;
+
+ int ret = hv_vmbus_channel_recv_packet_raw(hv, buffer, bufferlen,
+ &bytes_rxed, &request_id, 1);
+ if (likely(ret == 0)) {
+ if (bytes_rxed) {
+ desc = (struct hv_vm_packet_descriptor *)buffer;
+
+ if (likely(desc->type ==
+ HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)) {
+ hv->pkt_rxed = 0;
+ hv_nv_on_receive(hv, desc);
+ return hv->pkt_rxed;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * TX completions handler
+ */
+void
+hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop)
+{
+ uint32_t bytes_rxed;
+ uint64_t request_id;
+
+ while (1) {
+ int ret = hv_vmbus_channel_recv_packet_raw(hv, hv->desc, PAGE_SIZE,
+ &bytes_rxed, &request_id, 2 | allow_rx_drop);
+
+ if (ret != 0 || !bytes_rxed)
+ break;
+
+ if (likely(hv->desc->type == HV_VMBUS_PACKET_TYPE_COMPLETION))
+ hv_nv_on_send_completion(hv, hv->desc);
+ }
+}
+
+/*
+ * Get link status
+ */
+uint8_t
+hyperv_get_link_status(struct hv_data *hv)
+{
+ if (hv_rf_query_device_link_status(hv))
+ return 2;
+ return hv->link_status;
+}
+
+/*
+ * Set/Reset RX mode
+ */
+int
+hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ if (!promisc) {
+ return hv_rf_set_packet_filter(hv,
+ NDIS_PACKET_TYPE_BROADCAST |
+ (mcast ? NDIS_PACKET_TYPE_ALL_MULTICAST : 0) |
+ NDIS_PACKET_TYPE_DIRECTED);
+ }
+
+ return hv_rf_set_packet_filter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
+}
diff --git a/lib/librte_pmd_hyperv/hyperv_drv.h b/lib/librte_pmd_hyperv/hyperv_drv.h
new file mode 100644
index 0000000..602b3d5
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_drv.h
@@ -0,0 +1,558 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _HYPERV_DRV_H_
+#define _HYPERV_DRV_H_
+
+/*
+ * Definitions from hyperv.h
+ */
+#define HW_MACADDR_LEN 6
+#define HV_MAX_PAGE_BUFFER_COUNT 19
+
+#define HV_ALIGN_UP(value, align) \
+ (((value) & (align-1)) ? \
+ (((value) + (align-1)) & ~(align-1)) : (value))
+
+/*
+ * Connection identifier type
+ */
+union hv_vmbus_connection_id {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u;
+
+} __attribute__((packed));
+
+union hv_vmbus_monitor_trigger_state {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t group_enable:4;
+ uint32_t rsvd_z:28;
+ } u;
+};
+
+union hv_vmbus_monitor_trigger_group {
+ uint64_t as_uint64_t;
+ struct {
+ uint32_t pending;
+ uint32_t armed;
+ } u;
+};
+
+struct hv_vmbus_monitor_parameter {
+ union hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+};
+
+/*
+ * hv_vmbus_monitor_page Layout
+ * ------------------------------------------------------
+ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) |
+ * | 8 | trigger_group[0] |
+ * | 10 | trigger_group[1] |
+ * | 18 | trigger_group[2] |
+ * | 20 | trigger_group[3] |
+ * | 28 | Rsvd2[0] |
+ * | 30 | Rsvd2[1] |
+ * | 38 | Rsvd2[2] |
+ * | 40 | next_check_time[0][0] | next_check_time[0][1] |
+ * | ... |
+ * | 240 | latency[0][0..3] |
+ * | 340 | Rsvz3[0] |
+ * | 440 | parameter[0][0] |
+ * | 448 | parameter[0][1] |
+ * | ... |
+ * | 840 | Rsvd4[0] |
+ * ------------------------------------------------------
+ */
+
+struct hv_vmbus_monitor_page {
+ union hv_vmbus_monitor_trigger_state trigger_state;
+ uint32_t rsvd_z1;
+
+ union hv_vmbus_monitor_trigger_group trigger_group[4];
+ uint64_t rsvd_z2[3];
+
+ int32_t next_check_time[4][32];
+
+ uint16_t latency[4][32];
+ uint64_t rsvd_z3[32];
+
+ struct hv_vmbus_monitor_parameter parameter[4][32];
+
+ uint8_t rsvd_z4[1984];
+};
+
+enum hv_vmbus_packet_type {
+ HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7,
+ HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9,
+ HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb,
+};
+
+#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1
+
+struct hv_vm_packet_descriptor {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+} __attribute__((packed));
+
+struct hv_vm_transfer_page {
+ uint32_t byte_count;
+ uint32_t byte_offset;
+} __attribute__((packed));
+
+struct hv_vm_transfer_page_packet_header {
+ struct hv_vm_packet_descriptor d;
+ uint16_t transfer_page_set_id;
+ uint8_t sender_owns_set;
+ uint8_t reserved;
+ uint32_t range_count;
+ struct hv_vm_transfer_page ranges[1];
+} __attribute__((packed));
+
+struct hv_vmbus_ring_buffer {
+ volatile uint32_t write_index;
+ volatile uint32_t read_index;
+ /*
+ * NOTE: The interrupt_mask field is used only for channels, but
+ * vmbus connection also uses this data structure
+ */
+ volatile uint32_t interrupt_mask;
+ /* pad it to PAGE_SIZE so that data starts on a page */
+ uint8_t reserved[4084];
+
+ /*
+ * WARNING: Ring data starts here + ring_data_start_offset
+ * !!! DO NOT place any fields below this !!!
+ */
+ uint8_t buffer[0]; /* doubles as interrupt mask */
+} __attribute__((packed));
+
+struct hv_vmbus_page_buffer {
+ uint32_t length;
+ uint32_t offset;
+ uint64_t pfn;
+} __attribute__((packed));
+
+/*
+ * Definitions from hv_vmbus_priv.h
+ */
+struct hv_vmbus_sg_buffer_list {
+ void* data;
+ uint32_t length;
+};
+
+struct hv_vmbus_channel_packet_page_buffer {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+ uint32_t reserved;
+ uint32_t range_count;
+ struct hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT];
+} __attribute__((packed));
+
+/*
+ * Definitions from hv_net_vsc.h
+ */
+#define NETVSC_PACKET_MAXPAGE 16
+#define NETVSC_PACKET_SIZE 256
+
+/*
+ * This message is used by both the VSP and the VSC to complete
+ * a RNDIS message to the opposite channel endpoint. At this
+ * point, the initiator of this message cannot use any resources
+ * associated with the original RNDIS packet.
+ */
+enum nvsp_status_ {
+ nvsp_status_none = 0,
+ nvsp_status_success,
+ nvsp_status_failure,
+};
+
+struct nvsp_1_msg_send_rndis_pkt_complete {
+ uint32_t status;
+} __attribute__((packed));
+
+enum nvsp_msg_type {
+ /*
+ * Version 1 Messages
+ */
+ nvsp_msg_1_type_send_ndis_vers = 100,
+
+ nvsp_msg_1_type_send_rx_buf,
+ nvsp_msg_1_type_send_rx_buf_complete,
+ nvsp_msg_1_type_revoke_rx_buf,
+
+ nvsp_msg_1_type_send_send_buf,
+ nvsp_msg_1_type_send_send_buf_complete,
+ nvsp_msg_1_type_revoke_send_buf,
+
+ nvsp_msg_1_type_send_rndis_pkt,
+ nvsp_msg_1_type_send_rndis_pkt_complete,
+};
+
+struct nvsp_1_msg_send_rndis_pkt {
+ /*
+ * This field is specified by RNDIS. They assume there's
+ * two different channels of communication. However,
+ * the Network VSP only has one. Therefore, the channel
+ * travels with the RNDIS packet.
+ */
+ uint32_t chan_type;
+
+ /*
+ * This field is used to send part or all of the data
+ * through a send buffer. This value specifies an
+ * index into the send buffer. If the index is
+ * 0xFFFFFFFF, then the send buffer is not being used
+ * and all of the data was sent through other VMBus
+ * mechanisms.
+ */
+ uint32_t send_buf_section_idx;
+ uint32_t send_buf_section_size;
+} __attribute__((packed));
+
+/*
+ * ALL Messages
+ */
+struct nvsp_msg {
+ uint32_t msg_type;
+ union {
+ struct nvsp_1_msg_send_rndis_pkt send_rndis_pkt;
+ struct nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete;
+ /* size is set like in linux kernel driver */
+ uint8_t raw[24];
+ } msgs;
+} __attribute__((packed));
+
+#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
+
+struct netvsc_packet {
+ uint8_t is_data_pkt; /* One byte */
+ uint8_t ext_pages;
+ uint16_t vlan_tci;
+
+ void *extension;
+ uint64_t extension_phys_addr;
+ uint32_t tot_data_buf_len;
+ uint32_t page_buf_count;
+ struct hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE];
+};
+
+/*
+ * Definitions from hv_rndis.h
+ */
+#define RNDIS_MAJOR_VERSION 0x00000001
+#define RNDIS_MINOR_VERSION 0x00000000
+
+#define STATUS_BUFFER_OVERFLOW (0x80000005L)
+
+/*
+ * Remote NDIS message types
+ */
+#define REMOTE_NDIS_PACKET_MSG 0x00000001
+#define REMOTE_NDIS_INITIALIZE_MSG 0x00000002
+#define REMOTE_NDIS_HALT_MSG 0x00000003
+#define REMOTE_NDIS_QUERY_MSG 0x00000004
+#define REMOTE_NDIS_SET_MSG 0x00000005
+#define REMOTE_NDIS_RESET_MSG 0x00000006
+#define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007
+#define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008
+/*
+ * Remote NDIS message completion types
+ */
+#define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002
+#define REMOTE_NDIS_QUERY_CMPLT 0x80000004
+#define REMOTE_NDIS_SET_CMPLT 0x80000005
+#define REMOTE_NDIS_RESET_CMPLT 0x80000006
+#define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008
+
+#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
+#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
+#define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101
+#define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102
+#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+
+#define RNDIS_CONFIG_PARAM_TYPE_STRING 2
+/* extended info after the RNDIS request message */
+#define RNDIS_EXT_LEN 100
+/*
+ * Packet extension field contents associated with a Data message.
+ */
+struct rndis_per_packet_info {
+ uint32_t size;
+ uint32_t type;
+ uint32_t per_packet_info_offset;
+};
+
+#define ieee_8021q_info 6
+
+struct ndis_8021q_info {
+ union {
+ struct {
+ uint32_t user_pri:3; /* User Priority */
+ uint32_t cfi:1; /* Canonical Format ID */
+ uint32_t vlan_id:12;
+ uint32_t reserved:16;
+ } s1;
+ uint32_t value;
+ } u1;
+};
+
+/* Format of Information buffer passed in a SetRequest for the OID */
+/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+struct rndis_config_parameter_info {
+ uint32_t parameter_name_offset;
+ uint32_t parameter_name_length;
+ uint32_t parameter_type;
+ uint32_t parameter_value_offset;
+ uint32_t parameter_value_length;
+};
+
+/*
+ * NdisInitialize message
+ */
+struct rndis_initialize_request {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ uint32_t major_version;
+ uint32_t minor_version;
+ uint32_t max_xfer_size;
+};
+
+/*
+ * Response to NdisInitialize
+ */
+struct rndis_initialize_complete {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t major_version;
+ uint32_t minor_version;
+ uint32_t device_flags;
+ /* RNDIS medium */
+ uint32_t medium;
+ uint32_t max_pkts_per_msg;
+ uint32_t max_xfer_size;
+ uint32_t pkt_align_factor;
+ uint32_t af_list_offset;
+ uint32_t af_list_size;
+};
+
+/*
+ * NdisSetRequest message
+ */
+struct rndis_set_request {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS OID */
+ uint32_t oid;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+};
+
+/*
+ * Response to NdisSetRequest
+ */
+struct rndis_set_complete {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+};
+
+/*
+ * NdisQueryRequest message
+ */
+struct rndis_query_request {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS OID */
+ uint32_t oid;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+};
+
+/*
+ * Response to NdisQueryRequest
+ */
+struct rndis_query_complete {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+};
+
+/*
+ * Data message. All offset fields contain byte offsets from the beginning
+ * of the rndis_packet structure. All length fields are in bytes.
+ * VcHandle is set to 0 for connectionless data, otherwise it
+ * contains the VC handle.
+ */
+struct rndis_packet {
+ uint32_t data_offset;
+ uint32_t data_length;
+ uint32_t oob_data_offset;
+ uint32_t oob_data_length;
+ uint32_t num_oob_data_elements;
+ uint32_t per_pkt_info_offset;
+ uint32_t per_pkt_info_length;
+ /* RNDIS handle */
+ uint32_t vc_handle;
+ uint32_t reserved;
+};
+
+/*
+ * NdisHalt message
+ */
+struct rndis_halt_request {
+ /* RNDIS request ID */
+ uint32_t request_id;
+};
+
+/*
+ * NdisMIndicateStatus message
+ */
+struct rndis_indicate_status {
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t status_buf_length;
+ uint32_t status_buf_offset;
+};
+
+#define RNDIS_STATUS_MEDIA_CONNECT (0x4001000BL)
+#define RNDIS_STATUS_MEDIA_DISCONNECT (0x4001000CL)
+#define RNDIS_STATUS_INVALID_DATA (0xC0010015L)
+
+/*
+ * union with all of the RNDIS messages
+ */
+union rndis_msg_container {
+ struct rndis_initialize_request init_request;
+ struct rndis_initialize_complete init_complete;
+ struct rndis_set_request set_request;
+ struct rndis_set_complete set_complete;
+ struct rndis_query_request query_request;
+ struct rndis_query_complete query_complete;
+ struct rndis_packet packet;
+ struct rndis_halt_request halt_request;
+ struct rndis_indicate_status indicate_status;
+#if 0
+ rndis_keepalive_request keepalive_request;
+ rndis_reset_request reset_request;
+ rndis_reset_complete reset_complete;
+ rndis_keepalive_complete keepalive_complete;
+ rcondis_mp_create_vc co_miniport_create_vc;
+ rcondis_mp_delete_vc co_miniport_delete_vc;
+ rcondis_indicate_status co_miniport_status;
+ rcondis_mp_activate_vc_request co_miniport_activate_vc;
+ rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc;
+ rcondis_mp_create_vc_complete co_miniport_create_vc_complete;
+ rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete;
+ rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete;
+ rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete;
+#endif
+ uint32_t packet_ex[16]; /* to pad the union size */
+};
+
+struct rndis_msg {
+ uint32_t ndis_msg_type;
+
+ /*
+ * Total length of this message, from the beginning
+ * of the rndis_msg struct, in bytes.
+ */
+ uint32_t msg_len;
+
+ /* Actual message */
+ union rndis_msg_container msg;
+};
+
+#define RNDIS_HEADER_SIZE (sizeof(struct rndis_msg) - sizeof(union rndis_msg_container))
+
+#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
+
+/*
+ * get the size of an RNDIS message. Pass in the message type,
+ * rndis_set_request, rndis_packet for example
+ */
+#define RNDIS_MESSAGE_SIZE(message) \
+ (sizeof(message) + (sizeof(struct rndis_msg) - sizeof(union rndis_msg_container)))
+
+
+/*
+ * Definitions from hv_rndis_filter.h
+ */
+enum {
+ RNDIS_DEV_UNINITIALIZED = 0,
+ RNDIS_DEV_INITIALIZING,
+ RNDIS_DEV_INITIALIZED,
+ RNDIS_DEV_DATAINITIALIZED,
+};
+
+struct rndis_request {
+ /* assumed a fixed size response here. */
+ struct rndis_msg response_msg;
+
+ /* Simplify allocation by having a netvsc packet inline */
+ struct netvsc_packet pkt;
+ /* set additional buffer since packet can cross page boundary */
+ struct hv_vmbus_page_buffer buffer;
+ /* assumed a fixed size request here. */
+ struct rndis_msg *request_msg;
+ const struct rte_memzone *request_msg_memzone;
+};
+
+struct rndis_filter_packet {
+ struct rndis_msg message;
+};
+
+#endif /* _HYPERV_DRV_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_ethdev.c b/lib/librte_pmd_hyperv/hyperv_ethdev.c
new file mode 100644
index 0000000..cf6d12f
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_ethdev.c
@@ -0,0 +1,334 @@
+/*-
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <assert.h>
+#include <unistd.h>
+#include "hyperv.h"
+
+static struct rte_vmbus_id vmbus_id_hyperv_map[] = {
+ {
+ .device_id = 0x0,
+ },
+};
+
+static void
+hyperv_dev_info_get(__rte_unused struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ PMD_INIT_FUNC_TRACE();
+ dev_info->max_rx_queues = HV_MAX_RX_QUEUES;
+ dev_info->max_tx_queues = HV_MAX_TX_QUEUES;
+ dev_info->min_rx_bufsize = HV_MIN_RX_BUF_SIZE;
+ dev_info->max_rx_pktlen = HV_MAX_RX_PKT_LEN;
+ dev_info->max_mac_addrs = HV_MAX_MAC_ADDRS;
+}
+
+inline int
+rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+{
+ struct rte_eth_link *dst = &(dev->data->dev_link);
+ struct rte_eth_link *src = link;
+
+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+ *(uint64_t *)src) == 0)
+ return -1;
+
+ return 0;
+}
+
+inline int
+rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+ struct rte_eth_link *link)
+{
+ struct rte_eth_link *dst = link;
+ struct rte_eth_link *src = &(dev->data->dev_link);
+
+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+ *(uint64_t *)src) == 0)
+ return -1;
+
+ return 0;
+}
+
+/* return 0 means link status changed, -1 means not changed */
+static int
+hyperv_dev_link_update(struct rte_eth_dev *dev,
+ __rte_unused int wait_to_complete)
+{
+ uint8_t ret;
+ struct rte_eth_link old, link;
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ memset(&old, 0, sizeof(old));
+ memset(&link, 0, sizeof(link));
+ rte_hv_dev_atomic_read_link_status(dev, &old);
+ if (!hv->link_status && (hv->link_req_cnt == HV_MAX_LINK_REQ)) {
+ ret = hyperv_get_link_status(hv);
+ if (ret > 1)
+ return -1;
+ hv->link_req_cnt = 0;
+ }
+ link.link_duplex = ETH_LINK_FULL_DUPLEX;
+ link.link_speed = ETH_LINK_SPEED_10000;
+ link.link_status = hv->link_status;
+ hv->link_req_cnt++;
+ rte_hv_dev_atomic_write_link_status(dev, &link);
+
+ return (old.link_status == link.link_status) ? -1 : 0;
+}
+
+static int
+hyperv_dev_configure(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+ const struct rte_eth_rxmode *rxmode
+ = &dev->data->dev_conf.rxmode;
+
+ PMD_INIT_FUNC_TRACE();
+
+ rte_memcpy(dev->data->mac_addrs->addr_bytes, hv->hw_mac_addr,
+ ETHER_ADDR_LEN);
+ hv->jumbo_frame_support = rxmode->jumbo_frame;
+
+ return 0;
+}
+
+static int
+hyperv_init(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+ struct rte_vmbus_device *vmbus_dev;
+
+ vmbus_dev = dev->vmbus_dev;
+ hv->uio_fd = vmbus_dev->uio_fd;
+ hv->kernel_initialized = 1;
+ hv->vmbus_device = vmbus_dev->id.device_id;
+ hv->monitor_bit = (uint8_t)(vmbus_dev->vmbus_monitor_id % 32);
+ hv->monitor_group = (uint8_t)(vmbus_dev->vmbus_monitor_id / 32);
+ PMD_PDEBUG_LOG(hv, DBG_LOAD, "hyperv_init for vmbus device %d",
+ vmbus_dev->id.device_id);
+
+ /* get the memory mappings */
+ hv->ring_pages = vmbus_dev->mem_resource[TXRX_RING_MAP].addr;
+ hv->int_page = vmbus_dev->mem_resource[INT_PAGE_MAP].addr;
+ hv->monitor_pages =
+ (struct hv_vmbus_monitor_page *)
+ vmbus_dev->mem_resource[MON_PAGE_MAP].addr;
+ hv->recv_buf = vmbus_dev->mem_resource[RECV_BUF_MAP].addr;
+ assert(hv->ring_pages);
+ assert(hv->int_page);
+ assert(hv->monitor_pages);
+ assert(hv->recv_buf);
+
+ /* separate send/recv int_pages */
+ hv->recv_interrupt_page = hv->int_page;
+
+ hv->send_interrupt_page =
+ ((uint8_t *) hv->int_page + (PAGE_SIZE >> 1));
+
+ /* retrieve in/out ring_buffers */
+ hv->out = hv->ring_pages;
+ hv->in = (void*)((uint64_t)hv->out +
+ (vmbus_dev->mem_resource[TXRX_RING_MAP].len / 2));
+ hv->rb_size = (vmbus_dev->mem_resource[TXRX_RING_MAP].len / 2);
+
+ dev->rx_pkt_burst = hyperv_recv_pkts;
+ dev->tx_pkt_burst = hyperv_xmit_pkts;
+
+ return hv_rf_on_device_add(hv);
+}
+
+#define HV_DEV_ID (hv->vmbus_device << 1)
+#define HV_MTU (dev->data->dev_conf.rxmode.max_rx_pkt_len << 9)
+
+static int
+hyperv_dev_start(struct rte_eth_dev *dev)
+{
+ int ret;
+ uint32_t cmd;
+ size_t bytes;
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ if (!hv->kernel_initialized) {
+ cmd = HV_DEV_ID | HV_MTU;
+ bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
+ if (bytes < sizeof(uint32_t)) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "write on uio_fd %d failed",
+ hv->uio_fd);
+ return -1;
+ }
+ ret = vmbus_uio_map_resource(dev->vmbus_dev);
+ if (ret < 0) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to map resources");
+ return ret;
+ }
+ ret = hyperv_init(dev);
+ if (ret)
+ return ret;
+ }
+ ret = hv_rf_on_open(hv);
+ if (ret) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "hv_rf_on_open failed");
+ return ret;
+ }
+ hv->link_req_cnt = HV_MAX_LINK_REQ;
+
+ return ret;
+}
+
+static void
+hyperv_dev_stop(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+ uint32_t cmd;
+ size_t bytes;
+
+ PMD_INIT_FUNC_TRACE();
+ if (!hv->closed) {
+ hv_rf_on_close(hv);
+ hv_rf_on_device_remove(hv);
+ if (hv->kernel_initialized) {
+ cmd = 1 | HV_DEV_ID;
+ bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
+ if (bytes)
+ hv->kernel_initialized = 0;
+ else
+ PMD_PWARN_LOG(hv, DBG_LOAD, "write to uio_fd %d failed: (%zu)b",
+ hv->uio_fd, bytes);
+ }
+ hv->link_status = 0;
+ }
+}
+
+static void
+hyperv_dev_close(struct rte_eth_dev *dev)
+{
+ PMD_INIT_FUNC_TRACE();
+ hyperv_dev_stop(dev);
+}
+
+static void
+hyperv_dev_promisc_enable(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ hyperv_set_rx_mode(hv, 1, dev->data->all_multicast);
+}
+
+static void
+hyperv_dev_promisc_disable(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ hyperv_set_rx_mode(hv, 0, dev->data->all_multicast);
+}
+
+static void
+hyperv_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ hyperv_set_rx_mode(hv, dev->data->promiscuous, 1);
+}
+
+static void
+hyperv_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+ hyperv_set_rx_mode(hv, dev->data->promiscuous, 0);
+}
+
+static void
+hyperv_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+ struct hv_data *hv = dev->data->dev_private;
+ struct hv_stats *st = &hv->stats;
+
+ PMD_INIT_FUNC_TRACE();
+
+ memset(stats, 0, sizeof(struct rte_eth_stats));
+
+ stats->opackets = st->opkts;
+ stats->obytes = st->obytes;
+ stats->oerrors = st->oerrors;
+ stats->ipackets = st->ipkts;
+ stats->ibytes = st->ibytes;
+ stats->ierrors = st->ierrors;
+ stats->rx_nombuf = st->rx_nombuf;
+}
+
+static struct eth_dev_ops hyperv_eth_dev_ops = {
+ .dev_configure = hyperv_dev_configure,
+ .dev_start = hyperv_dev_start,
+ .dev_stop = hyperv_dev_stop,
+ .dev_infos_get = hyperv_dev_info_get,
+ .rx_queue_release = hyperv_dev_rx_queue_release,
+ .tx_queue_release = hyperv_dev_tx_queue_release,
+ .rx_queue_setup = hyperv_dev_rx_queue_setup,
+ .tx_queue_setup = hyperv_dev_tx_queue_setup,
+ .dev_close = hyperv_dev_close,
+ .promiscuous_enable = hyperv_dev_promisc_enable,
+ .promiscuous_disable = hyperv_dev_promisc_disable,
+ .allmulticast_enable = hyperv_dev_allmulticast_enable,
+ .allmulticast_disable = hyperv_dev_allmulticast_disable,
+ .link_update = hyperv_dev_link_update,
+ .stats_get = hyperv_dev_stats_get,
+};
+
+static int
+eth_hyperv_dev_init(__rte_unused struct eth_driver *eth_drv,
+ struct rte_eth_dev *eth_dev)
+{
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ eth_dev->dev_ops = &hyperv_eth_dev_ops;
+ eth_dev->data->mac_addrs = rte_malloc("mac_addrs",
+ sizeof(struct ether_addr),
+ RTE_CACHE_LINE_SIZE);
+ if (!eth_dev->data->mac_addrs) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "unable to allocate memory for mac addrs");
+ return -1;
+ }
+
+ ret = hyperv_init(eth_dev);
+
+ return ret;
+}
+
+static struct eth_driver rte_hyperv_pmd = {
+ .vmbus_drv = {
+ .name = "rte_hyperv_pmd",
+ .module_name = "hv_uio",
+ .id_table = vmbus_id_hyperv_map,
+ },
+ .bus_type = RTE_BUS_VMBUS,
+ .eth_dev_init = eth_hyperv_dev_init,
+ .dev_private_size = sizeof(struct hv_data),
+};
+
+static int
+rte_hyperv_pmd_init(const char *name __rte_unused,
+ const char *param __rte_unused)
+{
+ rte_eth_driver_register(&rte_hyperv_pmd);
+ return 0;
+}
+
+static struct rte_driver rte_hyperv_driver = {
+ .type = PMD_PDEV,
+ .init = rte_hyperv_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_hyperv_driver);
diff --git a/lib/librte_pmd_hyperv/hyperv_logs.h b/lib/librte_pmd_hyperv/hyperv_logs.h
new file mode 100644
index 0000000..e203aa3
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_logs.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _HYPERV_LOGS_H_
+#define _HYPERV_LOGS_H_
+
+#ifdef RTE_LIBRTE_HV_DEBUG_INIT
+#define PMD_INIT_LOG(level, fmt, args...) \
+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args); fflush(stdout);
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#else
+#define PMD_INIT_LOG(level, fmt, args...) do { } while (0)
+#define PMD_INIT_FUNC_TRACE() do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_HV_DEBUG
+
+#define RTE_DBG_LOAD INIT
+#define RTE_DBG_STATS STATS
+#define RTE_DBG_TX TX
+#define RTE_DBG_RX RX
+#define RTE_DBG_MBUF MBUF
+#define RTE_DBG_ASSERT ASRT
+#define RTE_DBG_RB RB
+#define RTE_DBG_VMBUS VMBUS
+#define RTE_DBG_ALL ALL
+
+#define STR(x) #x
+
+#define HV_RTE_LOG(hv, codepath, level, fmt, args...) \
+ RTE_LOG(level, PMD, "[%d]: %-6s: %s: " fmt "\n", hv->vmbus_device, STR(codepath), __func__, ## args);
+
+#define PMD_PDEBUG_LOG(hv, codepath, fmt, args...) \
+do { \
+ if (unlikely(hv->debug & (codepath))) \
+ HV_RTE_LOG(hv, RTE_##codepath, DEBUG, fmt, ## args) \
+} while (0)
+
+#define PMD_PINFO_LOG(hv, codepath, fmt, args...) \
+do { \
+ if (unlikely(hv->debug & (codepath))) \
+ HV_RTE_LOG(hv, RTE_##codepath, INFO, fmt, ## args) \
+} while (0)
+
+#define PMD_PWARN_LOG(hv, codepath, fmt, args...) \
+do { \
+ if (unlikely(hv->debug & (codepath))) \
+ HV_RTE_LOG(hv, RTE_##codepath, WARNING, fmt, ## args) \
+} while (0)
+
+#define PMD_PERROR_LOG(hv, codepath, fmt, args...) \
+do { \
+ if (unlikely(hv->debug & (codepath))) \
+ HV_RTE_LOG(hv, RTE_##codepath, ERR, fmt, ## args) \
+} while (0)
+#else
+#define HV_RTE_LOG(level, fmt, args...) do { } while (0)
+#define PMD_PDEBUG_LOG(fmt, args...) do { } while (0)
+#define PMD_PINFO_LOG(fmt, args...) do { } while (0)
+#define PMD_PWARN_LOG(fmt, args...) do { } while (0)
+#define PMD_PERROR_LOG(fmt, args...) do { } while (0)
+#undef RTE_LIBRTE_HV_DEBUG_TX
+#undef RTE_LIBRTE_HV_DEBUG_RX
+#endif
+
+#endif /* _HYPERV_LOGS_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.c b/lib/librte_pmd_hyperv/hyperv_rxtx.c
new file mode 100644
index 0000000..a9f80f9
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_rxtx.c
@@ -0,0 +1,402 @@
+/*-
+ * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "hyperv.h"
+#include "hyperv_rxtx.h"
+#include "hyperv_drv.h"
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+ (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+#define RPPI_SIZE (sizeof(struct rndis_per_packet_info)\
+ + sizeof(struct ndis_8021q_info))
+#define RNDIS_OFF (sizeof(struct netvsc_packet) + RPPI_SIZE)
+#define TX_PKT_SIZE (RNDIS_OFF + sizeof(struct rndis_filter_packet) * 2)
+
+static inline struct rte_mbuf *
+hv_rxmbuf_alloc(struct rte_mempool *mp)
+{
+ return __rte_mbuf_raw_alloc(mp);
+}
+
+static inline int
+hyperv_has_rx_work(struct hv_data *hv)
+{
+ return hv->in->read_index != hv->in->write_index;
+}
+
+#ifndef DEFAULT_TX_FREE_THRESHOLD
+#define DEFAULT_TX_FREE_THRESHOLD 32
+#endif
+
+int
+hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+
+{
+ struct hv_data *hv = dev->data->dev_private;
+ const struct rte_memzone *tz;
+ struct hv_tx_queue *txq;
+ char tz_name[RTE_MEMZONE_NAMESIZE];
+ uint32_t i, delta = 0, new_delta;
+ struct netvsc_packet *pkt;
+
+ PMD_INIT_FUNC_TRACE();
+
+ txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct hv_tx_queue),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for tx_queue failed");
+ return -ENOMEM;
+ }
+
+ if (tx_conf->tx_free_thresh >= nb_desc) {
+ PMD_PERROR_LOG(hv, DBG_LOAD,
+ "tx_free_thresh should be less then nb_desc");
+ return -EINVAL;
+ }
+ txq->tx_free_thresh = (tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
+ DEFAULT_TX_FREE_THRESHOLD);
+ txq->pkts = rte_calloc_socket("TX pkts", sizeof(void*), nb_desc,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->pkts == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for pkts failed");
+ return -ENOMEM;
+ }
+ sprintf(tz_name, "hv_%d_%u_%u", hv->vmbus_device, queue_idx, socket_id);
+ tz = rte_memzone_reserve_aligned(tz_name,
+ (uint32_t)nb_desc * TX_PKT_SIZE,
+ rte_lcore_to_socket_id(rte_lcore_id()),
+ 0, PAGE_SIZE);
+ if (tz == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "netvsc packet ring alloc fail");
+ return -ENOMEM;
+ }
+ for (i = 0; i < nb_desc; i++) {
+ pkt = txq->pkts[i] = (struct netvsc_packet *)((uint8_t *)tz->addr +
+ i * TX_PKT_SIZE + delta);
+ pkt->extension = (uint8_t *)tz->addr + i * TX_PKT_SIZE + RNDIS_OFF + delta;
+ if (!pkt->extension) {
+ PMD_PERROR_LOG(hv, DBG_TX,
+ "pkt->extension is NULL for %d-th pkt", i);
+ return -EINVAL;
+ }
+ pkt->extension_phys_addr =
+ tz->phys_addr + i * TX_PKT_SIZE + RNDIS_OFF + delta;
+ pkt->ext_pages = 1;
+ pkt->page_buffers[0].pfn = pkt->extension_phys_addr >> PAGE_SHIFT;
+ pkt->page_buffers[0].offset =
+ (unsigned long)pkt->extension & (PAGE_SIZE - 1);
+ pkt->page_buffers[0].length = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+ if (pkt->page_buffers[0].offset + pkt->page_buffers[0].length
+ > PAGE_SIZE) {
+ new_delta = PAGE_SIZE - pkt->page_buffers[0].offset;
+ pkt->page_buffers[0].pfn++;
+ delta += new_delta;
+ pkt->page_buffers[0].offset = 0;
+ pkt->extension = (uint8_t *)pkt->extension + new_delta;
+ pkt->extension_phys_addr += new_delta;
+ }
+ }
+ txq->sw_ring = rte_calloc_socket("txq_sw_ring",
+ sizeof(struct rte_mbuf *), nb_desc,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq->sw_ring == NULL) {
+ hyperv_dev_tx_queue_release(txq);
+ return -ENOMEM;
+ }
+ txq->port_id = dev->data->port_id;
+ txq->nb_tx_desc = txq->tx_avail = nb_desc;
+ txq->tx_free_thresh = tx_conf->tx_free_thresh;
+ txq->hv = hv;
+ dev->data->tx_queues[queue_idx] = txq;
+ hv->txq = txq;
+
+ return 0;
+}
+
+void
+hyperv_dev_tx_queue_release(void *ptxq)
+{
+ struct hv_tx_queue *txq = ptxq;
+
+ PMD_INIT_FUNC_TRACE();
+ if (txq == NULL)
+ return;
+ rte_free(txq->sw_ring);
+ rte_free(txq->pkts);
+ rte_free(txq);
+}
+
+int
+hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx,
+ uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ uint16_t i;
+ struct hv_rx_queue *rxq;
+ struct rte_mbuf *mbuf;
+ struct hv_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+
+ rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct hv_rx_queue),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (rxq == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD,
+ "rte_zmalloc for rx_queue failed!");
+ return -ENOMEM;
+ }
+ hv->desc = rxq->desc = rte_zmalloc_socket(NULL, PAGE_SIZE,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (rxq->desc == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD,
+ "rte_zmalloc for vmbus_desc failed!");
+ hyperv_dev_rx_queue_release(rxq);
+ return -ENOMEM;
+ }
+ rxq->sw_ring = rte_calloc_socket("rxq->sw_ring",
+ sizeof(struct mbuf *), nb_desc,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (rxq->sw_ring == NULL) {
+ hyperv_dev_rx_queue_release(rxq);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nb_desc; i++) {
+ mbuf = hv_rxmbuf_alloc(mp);
+ if (mbuf == NULL) {
+ PMD_PERROR_LOG(hv, DBG_LOAD, "RX mbuf alloc failed");
+ return -ENOMEM;
+ }
+
+ mbuf->nb_segs = 1;
+ mbuf->next = NULL;
+ mbuf->port = rxq->port_id;
+ rxq->sw_ring[i] = mbuf;
+ }
+
+ rxq->mb_pool = mp;
+ rxq->nb_rx_desc = nb_desc;
+ rxq->rx_head = 0;
+ rxq->rx_tail = 0;
+ rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+ rxq->port_id = dev->data->port_id;
+ rxq->hv = hv;
+ dev->data->rx_queues[queue_idx] = rxq;
+ hv->rxq = rxq;
+ hv->max_rx_pkt_len = mp->elt_size -
+ (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
+
+ return 0;
+}
+
+void
+hyperv_dev_rx_queue_release(void *prxq)
+{
+ struct hv_rx_queue *rxq = prxq;
+
+ PMD_INIT_FUNC_TRACE();
+ if (rxq == NULL)
+ return;
+ rte_free(rxq->sw_ring);
+ rte_free(rxq->desc);
+ rte_free(rxq);
+}
+
+uint16_t
+hyperv_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct hv_rx_queue *rxq = prxq;
+ struct hv_data *hv = rxq->hv;
+ struct rte_mbuf *new_mb, *rx_mbuf, *first_mbuf;
+ uint16_t nb_rx = 0;
+ uint16_t segs, i;
+
+ if (unlikely(hv->closed))
+ return 0;
+
+ nb_pkts = MIN(nb_pkts, HV_MAX_PKT_BURST);
+ hyperv_scan_comps(hv, 0);
+
+ while (nb_rx < nb_pkts) {
+ /*
+ * if there are no mbufs in sw_ring,
+ * we need to trigger receive procedure
+ */
+ if (rxq->rx_head == rxq->rx_tail) {
+ if (!hyperv_has_rx_work(hv))
+ break;
+
+ if (unlikely(!hyperv_get_buffer(hv, rxq->desc, PAGE_SIZE))) {
+ hyperv_scan_comps(hv, 0);
+ continue;
+ }
+ }
+
+ /*
+ * Now the received data is in sw_ring of our rxq
+ * we need to extract it and replace in sw_ring with new mbuf
+ */
+ rx_mbuf = first_mbuf = rxq->sw_ring[rxq->rx_head];
+ segs = first_mbuf->nb_segs;
+ for (i = 0; i < segs; ++i) {
+ new_mb = hv_rxmbuf_alloc(rxq->mb_pool);
+ if (unlikely(!new_mb)) {
+ PMD_PERROR_LOG(hv, DBG_RX, "mbuf alloc fail");
+ ++hv->stats.rx_nombuf;
+ return nb_rx;
+ }
+
+ rx_mbuf = rxq->sw_ring[rxq->rx_head];
+ rxq->sw_ring[rxq->rx_head] = new_mb;
+
+ if (++rxq->rx_head == rxq->nb_rx_desc)
+ rxq->rx_head = 0;
+
+ rx_mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+ rx_mbuf->port = rxq->port_id;
+ }
+ rx_mbuf->next = NULL;
+
+ rx_pkts[nb_rx++] = first_mbuf;
+ ++hv->stats.ipkts;
+ hv->stats.ibytes += first_mbuf->pkt_len;
+ }
+
+ return nb_rx;
+}
+
+static void hyperv_txeof(struct hv_tx_queue *txq)
+{
+ struct rte_mbuf *mb, *mb_next;
+
+ txq->tx_avail += txq->tx_free;
+ while (txq->tx_free) {
+ --txq->tx_free;
+ mb = txq->sw_ring[txq->tx_head];
+ while (mb) {
+ mb_next = mb->next;
+ rte_mempool_put(mb->pool, mb);
+ mb = mb_next;
+ }
+ if (++txq->tx_head == txq->nb_tx_desc)
+ txq->tx_head = 0;
+ }
+}
+
+uint16_t
+hyperv_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct hv_tx_queue *txq = ptxq;
+ struct hv_data *hv = txq->hv;
+ struct netvsc_packet *packet;
+ struct rte_mbuf *m;
+ uint32_t data_pages;
+ uint64_t first_data_page;
+ uint32_t total_len;
+ uint32_t len;
+ uint16_t i, nb_tx;
+ uint8_t rndis_pages;
+ int ret;
+
+ if (unlikely(hv->closed))
+ return 0;
+
+ for (nb_tx = 0; nb_tx < nb_pkts; ++nb_tx) {
+ hyperv_scan_comps(hv, 0);
+ /* Determine if the descriptor ring needs to be cleaned. */
+ if (txq->tx_free > txq->tx_free_thresh)
+ hyperv_txeof(txq);
+
+ if (!txq->tx_avail) {
+ hyperv_scan_comps(hv, 1);
+ hyperv_txeof(txq);
+ if (!txq->tx_avail) {
+ PMD_PWARN_LOG(hv, DBG_TX, "No TX mbuf available");
+ break;
+ }
+ }
+ m = tx_pkts[nb_tx];
+ len = m->data_len;
+ total_len = m->pkt_len;
+ first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+ data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1) >> PAGE_SHIFT) -
+ first_data_page + 1;
+
+ packet = txq->pkts[txq->tx_tail];
+ rndis_pages = packet->ext_pages;
+
+ txq->sw_ring[txq->tx_tail] = m;
+ packet->tot_data_buf_len = total_len;
+ packet->page_buffers[rndis_pages].pfn =
+ RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+ packet->page_buffers[rndis_pages].offset =
+ RTE_MBUF_DATA_DMA_ADDR(m) & (PAGE_SIZE - 1);
+ if (data_pages == 1)
+ packet->page_buffers[rndis_pages].length = len;
+ else
+ packet->page_buffers[rndis_pages].length = PAGE_SIZE -
+ packet->page_buffers[rndis_pages].offset;
+
+ for (i = 1; i < data_pages; ++i) {
+ packet->page_buffers[rndis_pages + i].pfn = first_data_page + i;
+ packet->page_buffers[rndis_pages + i].offset = 0;
+ packet->page_buffers[rndis_pages + i].length = PAGE_SIZE;
+ }
+ if (data_pages > 1)
+ packet->page_buffers[rndis_pages - 1 + data_pages].length =
+ ((rte_pktmbuf_mtod(m, unsigned long) + len - 1)
+ & (PAGE_SIZE - 1)) + 1;
+
+ uint16_t index = data_pages + rndis_pages;
+ for (i = 1; i < m->nb_segs; ++i) {
+ m = m->next;
+ len = m->data_len;
+ first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+ data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1) >> PAGE_SHIFT) -
+ first_data_page + 1;
+ packet->page_buffers[index].pfn =
+ RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+ packet->page_buffers[index].offset =
+ rte_pktmbuf_mtod(m, unsigned long)
+ & (PAGE_SIZE - 1);
+ packet->page_buffers[index].length = m->data_len;
+ if (data_pages > 1) {
+ /* It can be 2 in case of usual mbuf_size=2048 */
+ packet->page_buffers[index].length = PAGE_SIZE -
+ packet->page_buffers[index].offset;
+ packet->page_buffers[++index].offset = 0;
+ packet->page_buffers[index].pfn =
+ packet->page_buffers[index - 1].pfn + 1;
+ packet->page_buffers[index].length =
+ m->data_len
+ - packet->page_buffers[index - 1].length;
+ }
+ ++index;
+ }
+ packet->page_buf_count = index;
+
+ ret = hv_rf_on_send(hv, packet);
+ if (likely(ret == 0)) {
+ ++hv->stats.opkts;
+ hv->stats.obytes += total_len;
+ if (++txq->tx_tail == txq->nb_tx_desc)
+ txq->tx_tail = 0;
+ --txq->tx_avail;
+ } else {
+ ++hv->stats.oerrors;
+ PMD_PERROR_LOG(hv, DBG_TX, "TX ring buffer is busy");
+ }
+ }
+
+ return nb_tx;
+}
diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.h b/lib/librte_pmd_hyperv/hyperv_rxtx.h
new file mode 100644
index 0000000..c45a704
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_rxtx.h
@@ -0,0 +1,35 @@
+/*-
+ * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct hv_tx_queue {
+ struct netvsc_packet **pkts;
+ struct rte_mbuf **sw_ring;
+ uint16_t nb_tx_desc;
+ uint16_t tx_avail;
+ uint16_t tx_head;
+ uint16_t tx_tail;
+ uint16_t tx_free_thresh;
+ uint16_t tx_free;
+ uint8_t port_id;
+ struct hv_data *hv;
+} __rte_cache_aligned;
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct hv_rx_queue {
+ struct rte_mempool *mb_pool;
+ struct rte_mbuf **sw_ring;
+ uint16_t nb_rx_desc;
+ uint16_t rx_head;
+ uint16_t rx_tail;
+ uint16_t rx_free_thresh;
+ uint8_t port_id;
+ struct hv_data *hv;
+ struct hv_vm_packet_descriptor *desc;
+} __rte_cache_aligned;
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 95dbb0b..8012922 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -199,6 +199,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
LDLIBS += -lrte_pmd_virtio_uio
endif

+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+LDLIBS += -lrte_pmd_hyperv
+endif
+
ifeq ($(CONFIG_RTE_LIBRTE_VHOST), y)
LDLIBS += -lrte_vhost
endif
--
2.1.4
Stephen Hemminger
2015-02-05 01:13:28 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

Add hyperv driver config to enable it.

Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
config/common_linuxapp | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index d428f84..f356453 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -205,6 +205,15 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_DRIVER=n

#
+# Compile burst-mode Hyperv PMD driver
+#
+CONFIG_RTE_LIBRTE_HV_PMD=y
+CONFIG_RTE_LIBRTE_HV_DEBUG=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_RX=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_TX=n
+
+#
# Compile example software rings based PMD
#
CONFIG_RTE_LIBRTE_PMD_RING=y
--
2.1.4
Stephen Hemminger
2015-02-05 01:13:29 UTC
Permalink
From: Stephen Hemminger <***@brocade.com>

For users using non latest kernels, put kernel patch in for
them to use.

Signed-off-by: Stephen Hemminger <***@networkplumber.org>
---
.../linuxapp/hv_uio/vmbus-get-pages.patch | 55 ++++++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch

diff --git a/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch b/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
new file mode 100644
index 0000000..43507a8
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
@@ -0,0 +1,55 @@
+hyper-v: allow access to vmbus from userspace driver
+
+This is patch from to allow access to hyper-v vmbus from UIO driver.
+
+Signed-off-by: Stas Egorov <***@mirantis.com>
+Signed-off-by: Stephen Hemminger <***@networkplumber.org>
+
+---
+v2 - simplify and rename to vmbus_get_monitor_pages
+
+ drivers/hv/connection.c | 20 +++++++++++++++++---
+ include/linux/hyperv.h | 3 +++
+ 2 files changed, 20 insertions(+), 3 deletions(-)
+
+--- a/drivers/hv/connection.c 2015-02-03 10:58:51.751752450 -0800
++++ b/drivers/hv/connection.c 2015-02-04 14:59:51.636194383 -0800
+@@ -64,6 +64,15 @@ static __u32 vmbus_get_next_version(__u3
+ }
+ }
+
++void vmbus_get_monitor_pages(unsigned long *int_page,
++ unsigned long monitor_pages[2])
++{
++ *int_page = (unsigned long)vmbus_connection.int_page;
++ monitor_pages[0] = (unsigned long)vmbus_connection.monitor_pages[0];
++ monitor_pages[1] = (unsigned long)vmbus_connection.monitor_pages[1];
++}
++EXPORT_SYMBOL_GPL(vmbus_get_monitor_pages);
++
+ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
+ __u32 version)
+ {
+@@ -347,10 +356,7 @@ static void process_chn_event(u32 relid)
+ else
+ bytes_to_read = 0;
+ } while (read_state && (bytes_to_read != 0));
+- } else {
+- pr_err("no channel callback for relid - %u\n", relid);
+ }
+-
+ }
+
+ /*
+--- a/include/linux/hyperv.h 2015-02-03 10:58:51.751752450 -0800
++++ b/include/linux/hyperv.h 2015-02-04 15:00:26.388355012 -0800
+@@ -868,6 +868,9 @@ extern int vmbus_recvpacket_raw(struct v
+
+ extern void vmbus_ontimer(unsigned long data);
+
++extern void vmbus_get_monitor_pages(unsigned long *int_page,
++ unsigned long monitor_pages[2]);
++
+ /* Base driver object */
+ struct hv_driver {
+ const char *name;
--
2.1.4
Continue reading on narkive:
Loading...