diff options
30 files changed, 6836 insertions, 9 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d858c49..b3552b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -760,6 +760,12 @@ S: Maintained F: hw/net/vmxnet* F: hw/scsi/vmw_pvscsi* +Rocker +M: Scott Feldman <sfeldma@gmail.com> +M: Jiri Pirko <jiri@resnulli.us> +S: Maintained +F: hw/net/rocker/ + Subsystems ---------- Audio diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 58a2c0a..7e10903 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -36,3 +36,4 @@ CONFIG_EDU=y CONFIG_VGA=y CONFIG_VGA_PCI=y CONFIG_IVSHMEM=$(CONFIG_KVM) +CONFIG_ROCKER=y diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt index c6732fe..e4a4490 100644 --- a/docs/specs/pci-ids.txt +++ b/docs/specs/pci-ids.txt @@ -45,6 +45,7 @@ PCI devices (other than virtio): 1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt) 1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt) 1b36:0005 PCI test device (docs/specs/pci-testdev.txt) +1b36:0006 PCI Rocker Ethernet switch device 1b36:0007 PCI SD Card Host Controller Interface (SDHCI) All these devices are documented in docs/specs. diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt new file mode 100644 index 0000000..1e7e1e1 --- /dev/null +++ b/docs/specs/rocker.txt @@ -0,0 +1,1009 @@ +Rocker Network Switch Register Programming Guide +Copyright (c) Scott Feldman <sfeldma@gmail.com> +Copyright (c) Neil Horman <nhorman@tuxdriver.com> +Version 0.11, 12/29/2014 + +LICENSE +======= + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +SECTION 1: Introduction +======================= + +Overview +-------- + +This document describes the hardware/software interface for the Rocker switch +device. The intended audience is authors of OS drivers and device emulation +software. + +Notations and Conventions +------------------------- + +o In register descriptions, [n:m] indicates a range from bit n to bit m, +inclusive. +o Use of leading 0x indicates a hexadecimal number. +o Use of leading 0b indicates a binary number. +o The use of RSVD or Reserved indicates that a bit or field is reserved for +future use. +o Field width is in bytes, unless otherwise noted. +o Register are (R) read-only, (R/W) read/write, (W) write-only, or (COR) clear +on read +o TLV values in network-byte-order are designated with (N). + + +SECTION 2: PCI Configuration Registers +====================================== + +PCI Configuration Space +----------------------- + +Each switch instance registers as a PCI device with PCI configuration space: + + offset width description value + --------------------------------------------- + 0x0 2 Vendor ID 0x1b36 + 0x2 2 Device ID 0x0006 + 0x4 4 Command/Status + 0x8 1 Revision ID 0x01 + 0x9 3 Class code 0x2800 + 0xC 1 Cache line size + 0xD 1 Latency timer + 0xE 1 Header type + 0xF 1 Built-in self test + 0x10 4 Base address low + 0x14 4 Base address high + 0x18-28 Reserved + 0x2C 2 Subsystem vendor ID * + 0x2E 2 Subsystem ID * + 0x30-38 Reserved + 0x3C 1 Interrupt line + 0x3D 1 Interrupt pin 0x00 + 0x3E 1 Min grant 0x00 + 0x3D 1 Max latency 0x00 + 0x40 1 TRDY timeout + 0x41 1 Retry count + 0x42 2 Reserved + + +* Assigned by sub-system implementation + +SECTION 3: Memory-Mapped Register Space +======================================= + +There are two memory-mapped BARs. BAR0 maps device register space and is +0x2000 in size. BAR1 maps MSI-X vector and PBA tables and is also 0x2000 in +size, allowing for 256 MSI-X vectors. + +All registers are 4 or 8 bytes long. It is assumed host software will access 4 +byte registers with one 4-byte access, and 8 byte registers with either two +4-byte accesses or a single 8-byte access. In the case of two 4-byte accesses, +access must be lower and then upper 4-bytes, in that order. + +BAR0 device register space is organized as follows: + + offset description + ------------------------------------------------------ + 0x0000-0x000f Bogus registers to catch misbehaving + drivers. Writes do nothing. Reads + back as 0xDEADBABE. + 0x0010-0x00ff Test registers + 0x0300-0x03ff General purpose registers + 0x1000-0x1fff Descriptor control + +Holes in register space are reserved. Writes to reserved registers do nothing. +Reads to reserved registers read back as 0. + +No fancy stuff like write-combining is enabled on any of the registers. + +BAR1 MSI-X register space is organized as follows: + + offset description + ------------------------------------------------------ + 0x0000-0x0fff MSI-X vector table (256 vectors total) + 0x1000-0x1fff MSI-X PBA table + + +SECTION 4: Interrupts, DMA, and Endianness +========================================== + +PCI Interrupts +-------------- + +The device supports only MSI-X interrupts. BAR1 memory-mapped region contains +the MSI-X vector and PBA tables, with support for up to 256 MSI-X vectors. + +The vector assignment is: + + vector description + ----------------------------------------------------- + 0 Command descriptor ring completion + 1 Event descriptor ring completion + 2 Test operation completion + 3 RSVD + 4-255 Tx and Rx descriptor ring completion + Tx vector is even + Rx vector is odd + +A MSI-X vector table entry is 16 bytes: + + field offset width description + ------------------------------------------------------------- + lower_addr 0x0 4 [31:2] message address[31:2] + [1:0] Rsvd (4 byte alignment + required) + upper_addr 0x4 4 [31:19] Rsvd + [14:0] message address[46:32] + data 0x8 4 message data[31:0] + control 0xc 4 [31:1] Rsvd + [0] mask (0 = enable, + 1 = masked) + +Software should install the Interrupt Service Routine (ISR) before any ports +are enabled or any commands are issued on the command ring. + +DMA Operations +-------------- + +DMA operations are used for packet DMA to/from the CPU, command and event +processing. Command processing includes statistical counters and table dumps, +table insertion/deletion, and more. Event processing provides an async +notification method for device-originating events. Each DMA operation has a +set of control registers to manage a descriptor ring. The descriptor rings are +allocated from contiguous host DMA-able memory and registers specify the rings +base address, size and current head and tail indices. Software always writes +the head, and hardware always writes the tail. + +The higher-order bit of DMA_DESC_COMP_ERR is used to mark hardware completion +of a descriptor. Software will clear this bit when posting a descriptor to the +ring, and hardware will set this bit when the descriptor is complete. + +Descriptor ring sizes must be a power of 2 and range from 2 to 64K entries. +Descriptor rings' base address must be 8-byte aligned. Descriptors must be +packed within ring. Each descriptor in each ring must also be aligned on an 8 +byte boundary. Each descriptor ring will have these registers: + + DMA_DESC_xxx_BASE_ADDR, offset 0x1000 + (x * 32), 64-bit, (R/W) + DMA_DESC_xxx_SIZE, offset 0x1008 + (x * 32), 32-bit, (R/W) + DMA_DESC_xxx_HEAD, offset 0x100c + (x * 32), 32-bit, (R/W) + DMA_DESC_xxx_TAIL, offset 0x1010 + (x * 32), 32-bit, (R) + DMA_DESC_xxx_CTRL, offset 0x1014 + (x * 32), 32-bit, (W) + DMA_DESC_xxx_CREDITS, offset 0x1018 + (x * 32), 32-bit, (R/W) + DMA_DESC_xxx_RSVD1, offset 0x101c + (x * 32), 32-bit, (R/W) + +Where x is descriptor ring index: + + index ring + -------------------- + 0 CMD + 1 EVENT + 2 TX (port 0) + 3 RX (port 0) + 4 TX (port 1) + 5 RX (port 1) + . + . + . + 124 TX (port 61) + 125 RX (port 61) + 126 Resv + 127 Resv + +Writing BASE_ADDR or SIZE will reset HEAD and TAIL to zero. HEAD cannot be +written past TAIL. To do so would wrap the ring. An empty ring is when HEAD +== TAIL. A full ring is when HEAD is one position behind TAIL. Both HEAD and +TAIL increment and modulo wrap at the ring size. + +CTRL register bits: + + bit name description + ------------------------------------------------------------------------ + [0] CTRL_RESET Reset the descriptor ring + [1:31] Reserved + +All descriptor types share some common fields: + + field width description + ------------------------------------------------------------------- + DMA_DESC_BUF_ADDR 8 Phys addr of desc payload, 8-byte + aligned + DMA_DESC_COOKIE 8 Desc cookie for completion matching, + upper-most bit is reserved + DMA_DESC_BUF_SIZE 2 Desc payload size in bytes + DMA_DESC_TLV_SIZE 2 Desc payload total size in bytes + used for TLVs. Must be <= + DMA_DESC_BUF_SIZE. + DMA_DESC_COMP_ERR 2 Completion status of associated + desc payload. High order bit is + clear on new descs, toggled by + hw for completed items. + +To support forward- and backward-compatibility, descriptor and completion +payloads are specified in TLV format. Fields are packed with Type=field name, +Length=field length, and Value=field value. Software will ignore unknown fields +filled in by the switch. Likewise, the switch will ignore unknown fields +filled in by software. + +Descriptor payload buffer is 8-byte aligned and TLVs are 8-byte aligned. The +value within a TLV is also 8-byte aligned. The (packed, 8 byte) TLV header is: + + field width description + ----------------------------- + type 4 TLV type + len 2 TLV value length + pad 2 Reserved + +The alignment requirements for descriptors and TLVs are to avoid unaligned +access exceptions in software. Note that the payload for each TLV is also +8 byte aligned. + +Figure 1 shows an example descriptor buffer with two TLVs. + + <------- 8 bytes -------> + + 8-byte +––––+ +–––––––––––+–––––+–––––+ +–+ + align | type | len | pad | TLV#1 hdr | + +–––––––––––+–––––+–––––+ (len=22) | + | | | + | value | TVL#1 value | + | | (padded to 8-byte | + | +–––––+ alignment) | + | |/////| | + 8-byte +––––+ +–––––––––––+–––––––––––+ | + align | type | len | pad | TLV#2 hdr DESC_BUF_SIZE + +–––––+–––––+–––––+–––––+ (len=2) | + |value|/////////////////| TLV#2 value | + +–––––+/////////////////| | + |///////////////////////| | + |///////////////////////| | + |///////////////////////| | + |////////unused/////////| | + |////////space//////////| | + |///////////////////////| | + |///////////////////////| | + |///////////////////////| | + +–––––––––––––––––––––––+ +–+ + + fig. 1 + +TLVs can be nested within the NEST TLV type. + +Interrupt credits +^^^^^^^^^^^^^^^^^ + +MSI-X vectors used for descriptor ring completions use a credit mechanism for +efficient device, PCIe bus, OS and driver operations. Each descriptor ring has +a credit count which represents the number of outstanding descriptors to be +processed by the driver. As the device marks descriptors complete, the credit +count is incremented. As the driver processes those outstanding descriptors, +it returns credits back to the device. This way, the device knows the driver's +progress and can make decisions about when to fire the next interrupt or not. +When the credit count is zero, and the first descriptors are posted for the +driver, a single interrupt is fired. Once the interrupt is fired, the +interrupt is disabled (auto-masked*). In response to the interrupt, the driver +will process descriptors and PIO write a returned credit value for that +descriptor ring. If the driver returns all credits (the driver caught up with +the device and there is no outstanding work), then the interrupt is unmasked, +but not fired. If only partial credits are returned, the interrupt remains +masked but the device generates an interrupt, signaling the driver that more +outstanding work is available. + +(* this masking is unrelated to to the MSI-X interrupt mask register) + +Endianness +---------- + +Device registers are hard-coded to little-endian (LE). The driver should +convert to/from host endianess to LE for device register accesses. + +Descriptors are LE. Descriptor buffer TLVs will have LE type and length +fields, but the value field can either be LE or network-byte-order, depending +on context. TLV values containing network packet data will be in network-byte +order. A TLV value containing a field or mask used to compare against network +packet data is network-byte order. For example, flow match fields (and masks) +are network-byte-order since they're matched directly, byte-by-byte, against +network packet data. All non-network-packet TLV multi-byte values will be LE. + +TLV values in network-byte-order are designated with (N). + + +SECTION 5: Test Registers +========================= + +Rocker has several test registers to support troubleshooting register access, +interrupt generation, and DMA operations: + + TEST_REG, offset 0x0010, 32-bit (R/W) + TEST_REG64, offset 0x0018, 64-bit (R/W) + TEST_IRQ, offset 0x0020, 32-bit (R/W) + TEST_DMA_ADDR, offset 0x0028, 64-bit (R/W) + TEST_DMA_SIZE, offset 0x0030, 32-bit (R/W) + TEST_DMA_CTRL, offset 0x0034, 32-bit (R/W) + +Reads to TEST_REG and TEST_REG64 will read a value equal to twice the last +value written to the register. The 32-bit and 64-bit versions are for testing +32-bit and 64-bit host accesses. + +A vector can be written to TEST_IRQ and the device will generate an interrupt +for that vector. + +To test basic DMA operations, allocate a DMA-able host buffer and put the +buffer address into TEST_DMA_ADDR and size into TEST_DMA_SIZE. Then, write to +TEST_DMA_CTRL to manipulate the buffer contents. TEST_DMA_CTRL operations are: + + operation value description + ----------------------------------------------------------- + TEST_DMA_CTRL_CLEAR 1 clear buffer + TEST_DMA_CTRL_FILL 2 fill buffer bytes with 0x96 + TEST_DMA_CTRL_INVERT 4 invert bytes in buffer + +Various buffer address and sizes should be tested to verify no address boundary +issue exists. In particular, buffers that start on odd-8-byte boundary and/or +span multiple PAGE sizes should be tested. + + +SECTION 6: Ports +================ + +Physical and Logical Ports +------------------------------------ + +The switch supports up to 62 physical (front-panel) ports. Register +PORT_PHYS_COUNT returns the actual number of physical ports available: + + PORT_PHYS_COUNT, offset 0x0304, 32-bit, (R) + +In addition to front-panel ports, the switch supports logical ports for +tunnels. + +Front-panel ports and logical tunnel ports are mapped into a single 32-bit port +space. A special CPU port is assigned port 0. The front-panel ports are +mapped to ports 1-62. A special loopback port is assigned port 63. Logical +tunnel ports are assigned ports 0x0001000-0x0001ffff. +To summarize the port assignments: + + port mapping + ------------------------------------------------------- + 0 CPU port (for packets to/from host CPU) + 1-62 front-panel physical ports + 63 loopback port + 64-0x0000ffff RSVD + 0x00010000-0x0001ffff logical tunnel ports + 0x00020000-0xffffffff RSVD + +Physical Port Mode +------------------ + +Switch front-panel ports operate in a mode. Currently, the only mode is +OF-DPA. OF-DPA[1] mode is based on OpenFlow Data Plane Abstraction (OF-DPA) +Abstract Switch Specification, Version 1.0, from Broadcom Corporation. To +set/get the mode for front-panel ports, see port settings, below. + +Port Settings +------------- + +Link status for all front-panel ports is available via PORT_PHYS_LINK_STATUS: + + PORT_PHYS_LINK_STATUS, offset 0x0310, 64-bit, (R) + + Value is port bitmap. Bits 0 and 63 always read 0. Bits 1-62 + read 1 for link UP and 0 for link DOWN for respective front-panel ports. + +Other properties for front-panel ports are available via DMA CMD descriptors: + + Get PORT_SETTINGS descriptor: + + field width description + ---------------------------------------------- + PORT_SETTINGS 2 CMD_GET + PPORT 4 Physical port # + + Get PORT_SETTINGS completion: + + field width description + ---------------------------------------------- + PPORT 4 Physical port # + SPEED 4 Current port interface speed, in Mbps + DUPLEX 1 1 = Full, 0 = Half + AUTONEG 1 1 = enabled, 0 = disabled + MACADDR 6 Port MAC address + MODE 1 0 = OF-DPA + LEARNING 1 MAC address learning on port + 1 = enabled + 0 = disabled + + Set PORT_SETTINGS descriptor: + + field width description + ---------------------------------------------- + PORT_SETTINGS 2 CMD_SET + PPORT 4 Physical port # + SPEED 4 Port interface speed, in Mbps + DUPLEX 1 1 = Full, 0 = Half + AUTONEG 1 1 = enabled, 0 = disabled + MACADDR 6 Port MAC address + MODE 1 0 = OF-DPA + +Port Enable +----------- + +Front-panel ports are initially disabled, which means port ingress and egress +packets will be dropped. To enable or disable a port, use PORT_PHYS_ENABLE: + + PORT_PHYS_ENABLE: offset 0x0318, 64-bit, (R/W) + + Value is bitmap of first 64 ports. Bits 0 and 63 are ignored + and always read as 0. Write 1 to enable port; write 0 to disable it. + Default is 0. + + +SECTION 7: Switch Control +========================= + +This section covers switch-wide register settings. + +Control +------- + +This register is used for low level control of the switch. + + CONTROL: offset 0x0300, 32-bit, (W) + + bit name description + ------------------------------------------------------------------------ + [0] CONTROL_RESET If set, device will perform reset + [1:31] Reserved + +Switch ID +--------- + +The switch has a SWITCH_ID to be used by software to uniquely identify the +switch: + + SWITCH_ID: offset 0x0320, 64-bit, (R) + + Value is opaque to switch software and no special encoding is implied. + + +SECTION 8: Events +================= + +Non-I/O asynchronous events from the device are notified to the host using the +event ring. The TLV structure for events is: + + field width description + --------------------------------------------------- + TYPE 4 Event type, one of: + 1: LINK_CHANGED + 2: MAC_VLAN_SEEN + INFO <nest> Event info (details below) + +Link Changed Event +------------------ + +When link status changes on a physical port, this event is generated. + + field width description + --------------------------------------------------- + INFO <nest> + PPORT 4 Physical port + LINKUP 1 Link status: + 0: down + 1: up + +MAC VLAN Seen Event +------------------- + +When a packet ingresses on a port and the source MAC/VLAN isn't known to the +device, the device will generate this event. In response to the event, the +driver should install to the device the MAC/VLAN on the port into the bridge +table. Once installed, the MAC/VLAN is known on the port and this event will +no longer be generated. + + field width description + --------------------------------------------------- + INFO <nest> + PPORT 4 Physical port + MAC 6 MAC address + VLAN 2 VLAN ID + + +SECTION 9: CPU Packet Processing +================================ + +Ingress packets directed to the host CPU for further processing are delivered +in the DMA RX ring. Likewise, host CPU originating packets destined to egress +on switch ports are scheduled by software using the DMA TX ring. + +Tx Packet Processing +-------------------- + +Software schedules packets for egress on switch ports using the DMA TX ring. A +TX descriptor buffer describes the packet location and size in host DMA-able +memory, the destination port, and any hardware-offload functions (such as L3 +payload checksum offload). Software then bumps the descriptor head to signal +hardware of new Tx work. In response, hardware will DMA read Tx descriptors up +to head, DMA read descriptor buffer and packet data, perform offloading +functions, and finally frame packet on wire (network). Once packet processing +is complete, hardware will writeback status to descriptor(s) to signal to +software that Tx is complete and software resources (e.g. skb) backing packet +can be released. + +Figure 2 shows an example 3-fragment packet queued with one Tx descriptor. A +TLV is used for each packet fragment. + + pkt frag 1 + +–––––––+ +–+ + +–––+ | | + desc buf | | | | + +––––––––+ | | | | + Tx ring +–––+ +–––––+ | | | + +–––––––––+ | | TLVs | +–––––––+ | + | +–––+ +––––––––+ pkt frag 2 | + | desc 0 | | +–––––+ +–––––––+ | + +–––––––––+ | TLVs | +–––+ | | + head+–+ | +––––––––+ | | | + | desc 1 | | +–––––+ +–––––––+ |pkt + +–––––––––+ | TLVs | | | + | | +––––––––+ | pkt frag 3 | + | | | +–––––––+ | + +–––––––––+ +–––+ | | + | | | | | + | | | | | + +–––––––––+ | | | + | | | | | + | | | | | + +–––––––––+ | | | + | | +–––––––+ +–+ + | | + +–––––––––+ + + fig 2. + +The TLVs for Tx descriptor buffer are: + + field width description + --------------------------------------------------------------------- + PPORT 4 Destination physical port # + TX_OFFLOAD 1 Hardware offload modes: + 0: no offload + 1: insert IP csum (ipv4 only) + 2: insert TCP/UDP csum + 3: L3 csum calc and insert + into csum offset (TX_L3_CSUM_OFF) + 16-bit 1's complement csum value. + IPv4 pseudo-header and IP + already calculated by OS + and inserted. + 4: TSO (TCP Segmentation Offload) + TX_L3_CSUM_OFF 2 For L3 csum offload mode, the offset, + from the beginning of the packet, + of the csum field in the L3 header + TX_TSO_MSS 2 For TSO offload mode, the + Maximum Segment Size in bytes + TX_TSO_HDR_LEN 2 For TSO offload mode, the + length of ethernet, IP, and + TCP/UDP headers, including IP + and TCP options. + TX_FRAGS <array> Packet fragments + TX_FRAG <nest> Packet fragment + TX_FRAG_ADDR 8 DMA address of packet fragment + TX_FRAG_LEN 2 Packet fragment length + +Possible status return codes in descriptor on completion are: + + DESC_COMP_ERR reason + -------------------------------------------------------------------- + 0 OK + -ROCKER_ENXIO address or data read err on desc buf or packet + fragment + -ROCKER_EINVAL bad pport or TSO or csum offloading error + -ROCKER_ENOMEM no memory for internal staging tx fragment + +Rx Packet Processing +-------------------- + +For packets ingressing on switch ports that are not forwarded by the switch but +rather directed to the host CPU for further processing are delivered in the DMA +RX ring. Rx descriptor buffers are allocated by software and placed on the +ring. Hardware will fill Rx descriptor buffers with packet data, write the +completion, and signal to software that a new packet is ready. Since Rx packet +size is not known a-priori, the Rx descriptor buffer must be allocated for +worst-case packet size. A single Rx descriptor will contain the entire Rx +packet data in one RX_FRAG. Other Rx TLVs describe and hardware offloads +performed on the packet, such as checksum validation. + +The TLVs for Rx descriptor buffer are: + + field width description + --------------------------------------------------- + PPORT 4 Source physical port # + RX_FLAGS 2 Packet parsing flags: + (1 << 0): IPv4 packet + (1 << 1): IPv6 packet + (1 << 2): csum calculated + (1 << 3): IPv4 csum good + (1 << 4): IP fragment + (1 << 5): TCP packet + (1 << 6): UDP packet + (1 << 7): TCP/UDP csum good + RX_CSUM 2 IP calculated checksum: + IPv4: IP payload csum + IPv6: header and payload csum + (Only valid is RX_FLAGS:csum calc is set) + RX_FRAG_ADDR 8 DMA address of packet fragment + RX_FRAG_MAX_LEN 2 Packet maximum fragment length + RX_FRAG_LEN 2 Actual packet fragment length after receive + +Possible status return codes in descriptor on completion are: + + DESC_COMP_ERR reason + -------------------------------------------------------------------- + 0 OK + -ROCKER_ENXIO address or data read err on desc buf + -ROCKER_ENOMEM no memory for internal staging desc buf + -ROCKER_EMSGSIZE Rx descriptor buffer wasn't big enough to contain + packet data TLV and other TLVs. + + +SECTION 10: OF-DPA Mode +====================== + +OF-DPA mode allows the switch to offload flow packet processing functions to +hardware. An OpenFlow controller would communicate with an OpenFlow agent +installed on the switch. The OpenFlow agent would (directly or indirectly) +communicate with the Rocker switch driver, which in turn would program switch +hardware with flow functionality, as defined in OF-DPA. The block diagram is: + + +–––––––––––––––----–––+ + | OF | + | Remote Controller | + +––––––––+––----–––––––+ + | + | + +––––––––+–––––––––+ + | OF | + | Local Agent | + +––––––––––––––––––+ + | | + | Rocker Driver | + +––––––––––––––––––+ + <this spec> + +––––––––––––––––––+ + | | + | Rocker Switch | + +––––––––––––––––––+ + +To participate in flow functions, ports must be configure for OF-DPA mode +during switch initialization. + +OF-DPA Flow Table Interface +--------------------------- + +There are commands to add, modify, delete, and get stats of flow table entries. +The commands are issued using the DMA CMD descriptor ring. The following +commands are defined: + + CMD_ADD: add an entry to flow table + CMD_MOD: modify an entry in flow table + CMD_DEL: delete an entry from flow table + CMD_GET_STATS: get stats for flow entry + +TLVs for add and modify commands are: + + field width description + ---------------------------------------------------- + OF_DPA_CMD 2 CMD_[ADD|MOD] + OF_DPA_TBL 2 Flow table ID + 0: ingress port + 10: vlan + 20: termination mac + 30: unicast routing + 40: multicast routing + 50: bridging + 60: ACL policy + OF_DPA_PRIORITY 4 Flow priority + OF_DPA_HARDTIME 4 Hard timeout for flow + OF_DPA_IDLETIME 4 Idle timeout for flow + OF_DPA_COOKIE 8 Cookie + +Additional TLVs based on flow table ID: + +Table ID 0: ingress port + + field width description + ---------------------------------------------------- + OF_DPA_IN_PPORT 4 ingress physical port number + OF_DPA_GOTO_TBL 2 goto table ID; zero to drop + +Table ID 10: vlan + + field width description + ---------------------------------------------------- + OF_DPA_IN_PPORT 4 ingress physical port number + OF_DPA_VLAN_ID 2 (N) vlan ID + OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask + OF_DPA_GOTO_TBL 2 goto table ID; zero to drop + OF_DPA_NEW_VLAN_ID 2 (N) new vlan ID + +Table ID 20: termination mac + + field width description + ---------------------------------------------------- + OF_DPA_IN_PPORT 4 ingress physical port number + OF_DPA_IN_PPORT_MASK 4 ingress physical port number mask + OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd + OF_DPA_DST_MAC 6 (N) destination MAC + OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask + OF_DPA_VLAN_ID 2 (N) vlan ID + OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask + OF_DPA_GOTO_TBL 2 only acceptable values are + unicast or multicast routing + table IDs + OF_DPA_OUT_PPORT 2 if specified, must be + controller, set zero otherwise + +Table ID 30: unicast routing + + field width description + ---------------------------------------------------- + OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd + OF_DPA_DST_IP 4 (N) destination IPv4 address. + Must be unicast address + OF_DPA_DST_IP_MASK 4 (N) IP mask. Must be prefix mask + OF_DPA_DST_IPV6 16 (N) destination IPv6 address. + Must be unicast address + OF_DPA_DST_IPV6_MASK 16 (N) IPv6 mask. Must be prefix mask + OF_DPA_GOTO_TBL 2 goto table ID; zero to drop + OF_DPA_GROUP_ID 4 data for GROUP action must + be an L3 Unicast group entry + +Table ID 40: multicast routing + + field width description + ---------------------------------------------------- + OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd + OF_DPA_VLAN_ID 2 (N) vlan ID + OF_DPA_SRC_IP 4 (N) source IPv4. Optional, + can contain IPv4 address, + must be completely masked + if not used + OF_DPA_SRC_IP_MASK 4 (N) IP Mask + OF_DPA_DST_IP 4 (N) destination IPv4 address. + Must be multicast address + OF_DPA_SRC_IPV6 16 (N) source IPv6 Address. Optional. + Can contain IPv6 address, + must be completely masked + if not used + OF_DPA_SRC_IPV6_MASK 16 (N) IPv6 mask. + OF_DPA_DST_IPV6 16 (N) destination IPv6 Address. Must + be multicast address + Must be multicast address + OF_DPA_GOTO_TBL 2 goto table ID; zero to drop + OF_DPA_GROUP_ID 4 data for GROUP action must + be an L3 multicast group entry + +Table ID 50: bridging + + field width description + ---------------------------------------------------- + OF_DPA_VLAN_ID 2 (N) vlan ID + OF_DPA_TUNNEL_ID 4 tunnel ID + OF_DPA_DST_MAC 6 (N) destination MAC + OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask + OF_DPA_GOTO_TBL 2 goto table ID; zero to drop + OF_DPA_GROUP_ID 4 data for GROUP action must + be a L2 Interface, L2 + Multicast, L2 Flood, + or L2 Overlay group entry + as appropriate + OF_DPA_TUNNEL_LPORT 4 unicast Tenant Bridging + flows specify a tunnel + logical port ID + OF_DPA_OUT_PPORT 2 data for OUTPUT action, + restricted to CONTROLLER, + set to 0 otherwise + +Table ID 60: acl policy + + field width description + ---------------------------------------------------- + OF_DPA_IN_PPORT 4 ingress physical port number + OF_DPA_IN_PPORT_MASK 4 ingress physical port number mask + OF_DPA_ETHERTYPE 2 (N) ethertype + OF_DPA_VLAN_ID 2 (N) vlan ID + OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask + OF_DPA_VLAN_PCP 2 (N) vlan Priority Code Point + OF_DPA_VLAN_PCP_MASK 2 (N) vlan Priority Code Point mask + OF_DPA_SRC_MAC 6 (N) source MAC + OF_DPA_SRC_MAC_MASK 6 (N) source MAC mask + OF_DPA_DST_MAC 6 (N) destination MAC + OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask + OF_DPA_TUNNEL_ID 4 tunnel ID + OF_DPA_SRC_IP 4 (N) source IPv4. Optional, + can contain IPv4 address, + must be completely masked + if not used + OF_DPA_SRC_IP_MASK 4 (N) IP Mask + OF_DPA_DST_IP 4 (N) destination IPv4 address. + Must be multicast address + OF_DPA_DST_IP_MASK 4 (N) IP Mask + OF_DPA_SRC_IPV6 16 (N) source IPv6 Address. Optional. + Can contain IPv6 address, + must be completely masked + if not used + OF_DPA_SRC_IPV6_MASK 16 (N) IPv6 mask + OF_DPA_DST_IPV6 16 (N) destination IPv6 Address. Must + be multicast address. + OF_DPA_DST_IPV6_MASK 16 (N) IPv6 mask + OF_DPA_SRC_ARP_IP 4 (N) source IPv4 address in the ARP + payload. Only used if ethertype + == 0x0806. + OF_DPA_SRC_ARP_IP_MASK 4 (N) IP Mask + OF_DPA_IP_PROTO 1 IP protocol + OF_DPA_IP_PROTO_MASK 1 IP protocol mask + OF_DPA_IP_DSCP 1 DSCP + OF_DPA_IP_DSCP_MASK 1 DSCP mask + OF_DPA_IP_ECN 1 ECN + OF_DPA_IP_ECN_MASK 1 ECN mask + OF_DPA_L4_SRC_PORT 2 (N) L4 source port, only for + TCP, UDP, or SCTP + OF_DPA_L4_SRC_PORT_MASK 2 (N) L4 source port mask + OF_DPA_L4_DST_PORT 2 (N) L4 source port, only for + TCP, UDP, or SCTP + OF_DPA_L4_DST_PORT_MASK 2 (N) L4 source port mask + OF_DPA_ICMP_TYPE 1 ICMP type, only if IP + protocol is 1 + OF_DPA_ICMP_TYPE_MASK 1 ICMP type mask + OF_DPA_ICMP_CODE 1 ICMP code + OF_DPA_ICMP_CODE_MASK 1 ICMP code mask + OF_DPA_IPV6_LABEL 4 (N) IPv6 flow label + OF_DPA_IPV6_LABEL_MASK 4 (N) IPv6 flow label mask + OF_DPA_GROUP_ID 4 data for GROUP action + OF_DPA_QUEUE_ID_ACTION 1 write the queue ID + OF_DPA_NEW_QUEUE_ID 1 queue ID + OF_DPA_VLAN_PCP_ACTION 1 write the VLAN priority + OF_DPA_NEW_VLAN_PCP 1 VLAN priority + OF_DPA_IP_DSCP_ACTION 1 write the DSCP + OF_DPA_NEW_IP_DSCP 1 new DSCP + OF_DPA_TUNNEL_LPORT 4 restrct to valid tunnel + logical port, set to 0 + otherwise. + OF_DPA_OUT_PPORT 2 data for OUTPUT action, + restricted to CONTROLLER, + set to 0 otherwise + OF_DPA_CLEAR_ACTIONS 4 if 1 packets matching flow are + dropped (all other instructions + ignored) + +TLVs for flow delete and get stats command are: + + field width description + --------------------------------------------------- + OF_DPA_CMD 2 CMD_[DEL|GET_STATS] + OF_DPA_COOKIE 8 Cookie + +On completion of get stats command, the descriptor buffer is written back with +the following TLVs: + + field width description + --------------------------------------------------- + OF_DPA_STAT_DURATION 4 Flow duration + OF_DPA_STAT_RX_PKTS 8 Received packets + OF_DPA_STAT_TX_PKTS 8 Transmit packets + +Possible status return codes in descriptor on completion are: + + DESC_COMP_ERR command reason + -------------------------------------------------------------------- + 0 all OK + -ROCKER_EFAULT all head or tail index outside + of ring + -ROCKER_ENXIO all address or data read err on + desc buf + -ROCKER_EMSGSIZE GET_STATS cmd descriptor buffer wasn't + big enough to contain write-back + TLVs + -ROCKER_EINVAL all invalid parameters passed in + -ROCKER_EEXIST ADD entry already exists + -ROCKER_ENOSPC ADD no space left in flow table + -ROCKER_ENOENT MOD|DEL|GET_STATS cookie invalid + +Group Table Interface +--------------------- + +There are commands to add, modify, delete, and get stats of group table +entries. The commands are issued using the DMA CMD descriptor ring. The +following commands are defined: + + CMD_ADD: add an entry to group table + CMD_MOD: modify an entry in group table + CMD_DEL: delete an entry from group table + CMD_GET_STATS: get stats for group entry + +TLVs for add and modify commands are: + + field width description + ----------------------------------------------------------- + FLOW_GROUP_CMD 2 CMD_[ADD|MOD] + FLOW_GROUP_ID 2 Flow group ID + FLOW_GROUP_TYPE 1 Group type: + 0: L2 interface + 1: L2 rewrite + 2: L3 unicast + 3: L2 multicast + 4: L2 flood + 5: L3 interface + 6: L3 multicast + 7: L3 ECMP + 8: L2 overlay + FLOW_VLAN_ID 2 Vlan ID (types 0, 3, 4, 6) + FLOW_L2_PORT 2 Port (types 0) + FLOW_INDEX 4 Index (all types but 0) + FLOW_OVERLAY_TYPE 1 Overlay sub-type (type 8): + 0: Flood unicast tunnel + 1: Flood multicast tunnel + 2: Multicast unicast tunnel + 3: Multicast multicast tunnel + FLOW_GROUP_ACTION nest + FLOW_GROUP_ID 2 next group ID in chain (all + types except 0) + FLOW_OUT_PORT 4 egress port (types 0, 8) + FLOW_POP_VLAN_TAG 1 strip outer VLAN tag (type 1 + only) + FLOW_VLAN_ID 2 (types 1, 5) + FLOW_SRC_MAC 6 (types 1, 2, 5) + FLOW_DST_MAC 6 (types 1, 2) + +TLVs for flow delete and get stats command are: + + field width description + ----------------------------------------------------------- + FLOW_GROUP_CMD 2 CMD_[DEL|GET_STATS] + FLOW_GROUP_ID 2 Flow group ID + +On completion of get stats command, the descriptor buffer is written back with +the following TLVs: + + field width description + --------------------------------------------------- + FLOW_GROUP_ID 2 Flow group ID + FLOW_STAT_DURATION 4 Flow duration + FLOW_STAT_REF_COUNT 4 Flow reference count + FLOW_STAT_BUCKET_COUNT 4 Flow bucket count + +Possible status return codes in descriptor on completion are: + + DESC_COMP_ERR command reason + -------------------------------------------------------------------- + 0 all OK + -ROCKER_EFAULT all head or tail index outside + of ring + -ROCKER_ENXIO all address or data read err on + desc buf + -ROCKER_ENOSPC GET_STATS cmd descriptor buffer wasn't + big enough to contain write-back + TLVs + -ROCKER_EINVAL ADD|MOD invalid parameters passed in + -ROCKER_EEXIST ADD entry already exists + -ROCKER_ENOSPC ADD no space left in flow table + -ROCKER_ENOENT MOD|DEL|GET_STATS group ID invalid + -ROCKER_EBUSY DEL group reference count non-zero + -ROCKER_ENODEV ADD next group ID doesn't exist + + + +References +========== + +[1] OpenFlow Data Plane Abstraction (OF-DPA) Abstract Switch Specification, +Version 1.0, from Broadcom Corporation, February 21, 2014. diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index ea93293..7b91c4e 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -35,3 +35,7 @@ obj-y += vhost_net.o obj-$(CONFIG_ETSEC) += fsl_etsec/etsec.o fsl_etsec/registers.o \ fsl_etsec/rings.o fsl_etsec/miim.o + +common-obj-$(CONFIG_ROCKER) += rocker/rocker.o rocker/rocker_fp.o \ + rocker/rocker_desc.o rocker/rocker_world.o \ + rocker/rocker_of_dpa.o diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c new file mode 100644 index 0000000..55b6c46 --- /dev/null +++ b/hw/net/rocker/rocker.c @@ -0,0 +1,1480 @@ +/* + * QEMU rocker switch emulation - PCI device + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/msix.h" +#include "net/net.h" +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/bitops.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_desc.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_of_dpa.h" + +struct rocker { + /* private */ + PCIDevice parent_obj; + /* public */ + + MemoryRegion mmio; + MemoryRegion msix_bar; + + /* switch configuration */ + char *name; /* switch name */ + uint32_t fp_ports; /* front-panel port count */ + NICPeers *fp_ports_peers; + MACAddr fp_start_macaddr; /* front-panel port 0 mac addr */ + uint64_t switch_id; /* switch id */ + + /* front-panel ports */ + FpPort *fp_port[ROCKER_FP_PORTS_MAX]; + + /* register backings */ + uint32_t test_reg; + uint64_t test_reg64; + dma_addr_t test_dma_addr; + uint32_t test_dma_size; + uint64_t lower32; /* lower 32-bit val in 2-part 64-bit access */ + + /* desc rings */ + DescRing **rings; + + /* switch worlds */ + World *worlds[ROCKER_WORLD_TYPE_MAX]; + World *world_dflt; + + QLIST_ENTRY(rocker) next; +}; + +#define ROCKER "rocker" + +#define to_rocker(obj) \ + OBJECT_CHECK(Rocker, (obj), ROCKER) + +static QLIST_HEAD(, rocker) rockers; + +Rocker *rocker_find(const char *name) +{ + Rocker *r; + + QLIST_FOREACH(r, &rockers, next) + if (strcmp(r->name, name) == 0) { + return r; + } + + return NULL; +} + +World *rocker_get_world(Rocker *r, enum rocker_world_type type) +{ + if (type < ROCKER_WORLD_TYPE_MAX) { + return r->worlds[type]; + } + return NULL; +} + +uint32_t rocker_fp_ports(Rocker *r) +{ + return r->fp_ports; +} + +static uint32_t rocker_get_pport_by_tx_ring(Rocker *r, + DescRing *ring) +{ + return (desc_ring_index(ring) - 2) / 2 + 1; +} + +static int tx_consume(Rocker *r, DescInfo *info) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf = desc_get_buf(info, true); + RockerTlv *tlv_frag; + RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1]; + struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, }; + uint32_t pport; + uint32_t port; + uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE; + uint16_t tx_l3_csum_off = 0; + uint16_t tx_tso_mss = 0; + uint16_t tx_tso_hdr_len = 0; + int iovcnt = 0; + int err = ROCKER_OK; + int rem; + int i; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_TX_FRAGS]) { + return -ROCKER_EINVAL; + } + + pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info)); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + if (tlvs[ROCKER_TLV_TX_OFFLOAD]) { + tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]); + } + + switch (tx_offload) { + case ROCKER_TX_OFFLOAD_L3_CSUM: + if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + return -ROCKER_EINVAL; + } + case ROCKER_TX_OFFLOAD_TSO: + if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || + !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + return -ROCKER_EINVAL; + } + } + + if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { + tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_MSS]) { + tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]); + } + + if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { + tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]); + } + + rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) { + hwaddr frag_addr; + uint16_t frag_len; + + if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) { + err = -ROCKER_EINVAL; + goto err_bad_attr; + } + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag); + + if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || + !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) { + err = -ROCKER_EINVAL; + goto err_bad_attr; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); + frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); + + iov[iovcnt].iov_len = frag_len; + iov[iovcnt].iov_base = g_malloc(frag_len); + if (!iov[iovcnt].iov_base) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base, + iov[iovcnt].iov_len)) { + err = -ROCKER_ENXIO; + goto err_bad_io; + } + + if (++iovcnt > ROCKER_TX_FRAGS_MAX) { + goto err_too_many_frags; + } + } + + if (iovcnt) { + /* XXX perform Tx offloads */ + /* XXX silence compiler for now */ + tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0; + } + + err = fp_port_eg(r->fp_port[port], iov, iovcnt); + +err_too_many_frags: +err_bad_io: +err_no_mem: +err_bad_attr: + for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) { + if (iov[i].iov_base) { + g_free(iov[i].iov_base); + } + } + + return err; +} + +static int cmd_get_port_settings(Rocker *r, + DescInfo *info, char *buf, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + RockerTlv *nest; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + MACAddr macaddr; + enum rocker_world_type mode; + size_t tlv_size; + int pos; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg); + if (err) { + return err; + } + + fp_port_get_macaddr(fp_port, &macaddr); + mode = world_type(fp_port_get_world(fp_port)); + learning = fp_port_get_learning(fp_port); + + tlv_size = rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* speed */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* duplex */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* autoneg */ + rocker_tlv_total_size(sizeof(macaddr.a)) + /* macaddr */ + rocker_tlv_total_size(sizeof(uint8_t)) + /* mode */ + rocker_tlv_total_size(sizeof(uint8_t)); /* learning */ + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg); + rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, + sizeof(macaddr.a), macaddr.a); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, + learning); + rocker_tlv_nest_end(buf, &pos, nest); + + return desc_set_buf(info, tlv_size); +} + +static int cmd_set_port_settings(Rocker *r, + RockerTlv *cmd_info_tlv) +{ + RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; + FpPort *fp_port; + uint32_t pport; + uint32_t port; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + MACAddr macaddr; + enum rocker_world_type mode; + int err; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + cmd_info_tlv); + + if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { + return -ROCKER_EINVAL; + } + + pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] && + tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) { + + speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); + duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); + autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); + + err = fp_port_set_settings(fp_port, speed, duplex, autoneg); + if (err) { + return err; + } + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) { + if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) != + sizeof(macaddr.a)) { + return -ROCKER_EINVAL; + } + memcpy(macaddr.a, + rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]), + sizeof(macaddr.a)); + fp_port_set_macaddr(fp_port, &macaddr); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) { + mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]); + fp_port_set_world(fp_port, r->worlds[mode]); + } + + if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) { + learning = + rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]); + fp_port_set_learning(fp_port, learning); + } + + return ROCKER_OK; +} + +static int cmd_consume(Rocker *r, DescInfo *info) +{ + char *buf = desc_get_buf(info, false); + RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1]; + RockerTlv *info_tlv; + World *world; + uint16_t cmd; + int err; + + if (!buf) { + return -ROCKER_ENXIO; + } + + rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) { + return -ROCKER_EINVAL; + } + + cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]); + info_tlv = tlvs[ROCKER_TLV_CMD_INFO]; + + /* This might be reworked to something like this: + * Every world will have an array of command handlers from + * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is + * up to each world to implement whatever command it want. + * It can reference "generic" commands as cmd_set_port_settings or + * cmd_get_port_settings + */ + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + err = world_do_cmd(world, info, buf, cmd, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS: + err = cmd_get_port_settings(r, info, buf, info_tlv); + break; + case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS: + err = cmd_set_port_settings(r, info_tlv); + break; + default: + err = -ROCKER_EINVAL; + break; + } + + return err; +} + +static void rocker_msix_irq(Rocker *r, unsigned vector) +{ + PCIDevice *dev = PCI_DEVICE(r); + + DPRINTF("MSI-X notify request for vector %d\n", vector); + if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) { + DPRINTF("incorrect vector %d\n", vector); + return; + } + msix_notify(dev, vector); +} + +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info = desc_ring_fetch_desc(ring); + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(sizeof(uint8_t)); /* link up */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport); + rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, + link_up ? 1 : 0); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id) +{ + DescRing *ring = r->rings[ROCKER_RING_EVENT]; + DescInfo *info; + FpPort *fp_port; + uint32_t port; + RockerTlv *nest; + char *buf; + size_t tlv_size; + int pos; + int err; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + fp_port = r->fp_port[port]; + if (!fp_port_get_learning(fp_port)) { + return ROCKER_OK; + } + + info = desc_ring_fetch_desc(ring); + if (!info) { + return -ROCKER_ENOBUFS; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ + rocker_tlv_total_size(0) + /* nest */ + rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ + rocker_tlv_total_size(ETH_ALEN) + /* mac addr */ + rocker_tlv_total_size(sizeof(uint16_t)); /* vlan_id */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto err_too_big; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENOMEM; + goto err_no_mem; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN); + nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport); + rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr); + rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id); + rocker_tlv_nest_end(buf, &pos, nest); + + err = desc_set_buf(info, tlv_size); + +err_too_big: +err_no_mem: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); + } + + return err; +} + +static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, + uint32_t pport) +{ + return r->rings[(pport - 1) * 2 + 3]; +} + +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + Rocker *r = world_rocker(world); + PCIDevice *dev = (PCIDevice *)r; + DescRing *ring = rocker_get_rx_ring_by_pport(r, pport); + DescInfo *info = desc_ring_fetch_desc(ring); + char *data; + size_t data_size = iov_size(iov, iovcnt); + char *buf; + uint16_t rx_flags = 0; + uint16_t rx_csum = 0; + size_t tlv_size; + RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1]; + hwaddr frag_addr; + uint16_t frag_max_len; + int pos; + int err; + + if (!info) { + return -ROCKER_ENOBUFS; + } + + buf = desc_get_buf(info, false); + if (!buf) { + err = -ROCKER_ENXIO; + goto out; + } + rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info)); + + if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] || + !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) { + err = -ROCKER_EINVAL; + goto out; + } + + frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]); + frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]); + + if (data_size > frag_max_len) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + /* XXX calc rx flags/csum */ + + tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */ + rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */ + rocker_tlv_total_size(sizeof(uint16_t)); /* frag len */ + + if (tlv_size > desc_buf_size(info)) { + err = -ROCKER_EMSGSIZE; + goto out; + } + + /* TODO: + * iov dma write can be optimized in similar way e1000 does it in + * e1000_receive_iov. But maybe if would make sense to introduce + * generic helper iov_dma_write. + */ + + data = g_malloc(data_size); + if (!data) { + err = -ROCKER_ENOMEM; + goto out; + } + iov_to_buf(iov, iovcnt, 0, data, data_size); + pci_dma_write(dev, frag_addr, data, data_size); + g_free(data); + + pos = 0; + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len); + rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size); + + err = desc_set_buf(info, tlv_size); + +out: + if (desc_ring_post_desc(ring, err)) { + rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1)); + } + + return err; +} + +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + FpPort *fp_port; + uint32_t port; + + if (!fp_port_from_pport(pport, &port)) { + return -ROCKER_EINVAL; + } + + fp_port = r->fp_port[port]; + + return fp_port_eg(fp_port, iov, iovcnt); +} + +static void rocker_test_dma_ctrl(Rocker *r, uint32_t val) +{ + PCIDevice *dev = PCI_DEVICE(r); + char *buf; + int i; + + buf = g_malloc(r->test_dma_size); + + if (!buf) { + DPRINTF("test dma buffer alloc failed"); + return; + } + + switch (val) { + case ROCKER_TEST_DMA_CTRL_CLEAR: + memset(buf, 0, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_FILL: + memset(buf, 0x96, r->test_dma_size); + break; + case ROCKER_TEST_DMA_CTRL_INVERT: + pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size); + for (i = 0; i < r->test_dma_size; i++) { + buf[i] = ~buf[i]; + } + break; + default: + DPRINTF("not test dma control val=0x%08x\n", val); + goto err_out; + } + pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size); + + rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST); + +err_out: + g_free(buf); +} + +static void rocker_reset(DeviceState *dev); + +static void rocker_control(Rocker *r, uint32_t val) +{ + if (val & ROCKER_CONTROL_RESET) { + rocker_reset(DEVICE(r)); + } +} + +static int rocker_pci_ring_count(Rocker *r) +{ + /* There are: + * - command ring + * - event ring + * - tx and rx ring per each port + */ + return 2 + (2 * r->fp_ports); +} + +static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr) +{ + hwaddr start = ROCKER_DMA_DESC_BASE; + hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r)); + + return addr >= start && addr < end; +} + +static void rocker_port_phys_enable_write(Rocker *r, uint64_t new) +{ + int i; + bool old_enabled; + bool new_enabled; + FpPort *fp_port; + + for (i = 0; i < r->fp_ports; i++) { + fp_port = r->fp_port[i]; + old_enabled = fp_port_enabled(fp_port); + new_enabled = (new >> (i + 1)) & 0x1; + if (new_enabled == old_enabled) { + continue; + } + if (new_enabled) { + fp_port_enable(r->fp_port[i]); + } else { + fp_port_disable(r->fp_port[i]); + } + } +} + +static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + r->lower32 = (uint64_t)val; + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + desc_ring_set_base_addr(r->rings[index], + ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + desc_ring_set_size(r->rings[index], val); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + if (desc_ring_set_head(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + case ROCKER_DMA_DESC_CTRL_OFFSET: + desc_ring_set_ctrl(r->rings[index], val); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + if (desc_ring_ret_credits(r->rings[index], val)) { + rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); + } + break; + default: + DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx + " val=0x%08x (ring %d, addr=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG: + r->test_reg = val; + break; + case ROCKER_TEST_REG64: + case ROCKER_TEST_DMA_ADDR: + case ROCKER_PORT_PHYS_ENABLE: + r->lower32 = (uint64_t)val; + break; + case ROCKER_TEST_REG64 + 4: + r->test_reg64 = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_IRQ: + rocker_msix_irq(r, val); + break; + case ROCKER_TEST_DMA_SIZE: + r->test_dma_size = val; + break; + case ROCKER_TEST_DMA_ADDR + 4: + r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32; + r->lower32 = 0; + break; + case ROCKER_TEST_DMA_CTRL: + rocker_test_dma_ctrl(r, val); + break; + case ROCKER_CONTROL: + rocker_control(r, val); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32); + r->lower32 = 0; + break; + default: + DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx + " val=0x%08x\n", addr, val); + break; + } +} + +static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + desc_ring_set_base_addr(r->rings[index], val); + break; + default: + DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx + " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n", + addr, val, index, offset); + break; + } + return; + } + + switch (addr) { + case ROCKER_TEST_REG64: + r->test_reg64 = val; + break; + case ROCKER_TEST_DMA_ADDR: + r->test_dma_addr = val; + break; + case ROCKER_PORT_PHYS_ENABLE: + rocker_port_phys_enable_write(r, val); + break; + default: + DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx + " val=0x" TARGET_FMT_plx "\n", addr, val); + break; + } +} + +#ifdef DEBUG_ROCKER +#define regname(reg) case (reg): return #reg +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + static char buf[100]; + char ring_name[10]; + + switch (index) { + case 0: + sprintf(ring_name, "cmd"); + break; + case 1: + sprintf(ring_name, "event"); + break; + default: + sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx", + (index - 2) / 2); + } + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + sprintf(buf, "Ring[%s] ADDR", ring_name); + return buf; + case ROCKER_DMA_DESC_ADDR_OFFSET+4: + sprintf(buf, "Ring[%s] ADDR+4", ring_name); + return buf; + case ROCKER_DMA_DESC_SIZE_OFFSET: + sprintf(buf, "Ring[%s] SIZE", ring_name); + return buf; + case ROCKER_DMA_DESC_HEAD_OFFSET: + sprintf(buf, "Ring[%s] HEAD", ring_name); + return buf; + case ROCKER_DMA_DESC_TAIL_OFFSET: + sprintf(buf, "Ring[%s] TAIL", ring_name); + return buf; + case ROCKER_DMA_DESC_CTRL_OFFSET: + sprintf(buf, "Ring[%s] CTRL", ring_name); + return buf; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + sprintf(buf, "Ring[%s] CREDITS", ring_name); + return buf; + default: + sprintf(buf, "Ring[%s] ???", ring_name); + return buf; + } + } else { + switch (addr) { + regname(ROCKER_BOGUS_REG0); + regname(ROCKER_BOGUS_REG1); + regname(ROCKER_BOGUS_REG2); + regname(ROCKER_BOGUS_REG3); + regname(ROCKER_TEST_REG); + regname(ROCKER_TEST_REG64); + regname(ROCKER_TEST_REG64+4); + regname(ROCKER_TEST_IRQ); + regname(ROCKER_TEST_DMA_ADDR); + regname(ROCKER_TEST_DMA_ADDR+4); + regname(ROCKER_TEST_DMA_SIZE); + regname(ROCKER_TEST_DMA_CTRL); + regname(ROCKER_CONTROL); + regname(ROCKER_PORT_PHYS_COUNT); + regname(ROCKER_PORT_PHYS_LINK_STATUS); + regname(ROCKER_PORT_PHYS_LINK_STATUS+4); + regname(ROCKER_PORT_PHYS_ENABLE); + regname(ROCKER_PORT_PHYS_ENABLE+4); + regname(ROCKER_SWITCH_ID); + regname(ROCKER_SWITCH_ID+4); + } + } + return "???"; +} +#else +static const char *rocker_reg_name(void *opaque, hwaddr addr) +{ + return NULL; +} +#endif + +static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + DPRINTF("Write %s addr " TARGET_FMT_plx + ", size %u, val " TARGET_FMT_plx "\n", + rocker_reg_name(opaque, addr), addr, size, val); + + switch (size) { + case 4: + rocker_io_writel(opaque, addr, val); + break; + case 8: + rocker_io_writeq(opaque, addr, val); + break; + } +} + +static uint64_t rocker_port_phys_link_status(Rocker *r) +{ + int i; + uint64_t status = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_get_link_up(port)) { + status |= 1 << (i + 1); + } + } + return status; +} + +static uint64_t rocker_port_phys_enable_read(Rocker *r) +{ + int i; + uint64_t ret = 0; + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + if (fp_port_enabled(port)) { + ret |= 1 << (i + 1); + } + } + return ret; +} + +static uint32_t rocker_io_readl(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint32_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (offset) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]); + break; + case ROCKER_DMA_DESC_ADDR_OFFSET + 4: + ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32); + break; + case ROCKER_DMA_DESC_SIZE_OFFSET: + ret = desc_ring_get_size(r->rings[index]); + break; + case ROCKER_DMA_DESC_HEAD_OFFSET: + ret = desc_ring_get_head(r->rings[index]); + break; + case ROCKER_DMA_DESC_TAIL_OFFSET: + ret = desc_ring_get_tail(r->rings[index]); + break; + case ROCKER_DMA_DESC_CREDITS_OFFSET: + ret = desc_ring_get_credits(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx + " (ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG1: + case ROCKER_BOGUS_REG2: + case ROCKER_BOGUS_REG3: + ret = 0xDEADBABE; + break; + case ROCKER_TEST_REG: + ret = r->test_reg * 2; + break; + case ROCKER_TEST_REG64: + ret = (uint32_t)(r->test_reg64 * 2); + break; + case ROCKER_TEST_REG64 + 4: + ret = (uint32_t)((r->test_reg64 * 2) >> 32); + break; + case ROCKER_TEST_DMA_SIZE: + ret = r->test_dma_size; + break; + case ROCKER_TEST_DMA_ADDR: + ret = (uint32_t)r->test_dma_addr; + break; + case ROCKER_TEST_DMA_ADDR + 4: + ret = (uint32_t)(r->test_dma_addr >> 32); + break; + case ROCKER_PORT_PHYS_COUNT: + ret = r->fp_ports; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = (uint32_t)rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_LINK_STATUS + 4: + ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = (uint32_t)rocker_port_phys_enable_read(r); + break; + case ROCKER_PORT_PHYS_ENABLE + 4: + ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32); + break; + case ROCKER_SWITCH_ID: + ret = (uint32_t)r->switch_id; + break; + case ROCKER_SWITCH_ID + 4: + ret = (uint32_t)(r->switch_id >> 32); + break; + default: + DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_io_readq(void *opaque, hwaddr addr) +{ + Rocker *r = opaque; + uint64_t ret; + + if (rocker_addr_is_desc_reg(r, addr)) { + unsigned index = ROCKER_RING_INDEX(addr); + unsigned offset = addr & ROCKER_DMA_DESC_MASK; + + switch (addr & ROCKER_DMA_DESC_MASK) { + case ROCKER_DMA_DESC_ADDR_OFFSET: + ret = desc_ring_get_base_addr(r->rings[index]); + break; + default: + DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx + " (ring %d, addr=0x%02x)\n", addr, index, offset); + ret = 0; + break; + } + return ret; + } + + switch (addr) { + case ROCKER_BOGUS_REG0: + case ROCKER_BOGUS_REG2: + ret = 0xDEADBABEDEADBABEULL; + break; + case ROCKER_TEST_REG64: + ret = r->test_reg64 * 2; + break; + case ROCKER_TEST_DMA_ADDR: + ret = r->test_dma_addr; + break; + case ROCKER_PORT_PHYS_LINK_STATUS: + ret = rocker_port_phys_link_status(r); + break; + case ROCKER_PORT_PHYS_ENABLE: + ret = rocker_port_phys_enable_read(r); + break; + case ROCKER_SWITCH_ID: + ret = r->switch_id; + break; + default: + DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr); + ret = 0; + break; + } + return ret; +} + +static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n", + rocker_reg_name(opaque, addr), addr, size); + + switch (size) { + case 4: + return rocker_io_readl(opaque, addr); + case 8: + return rocker_io_readq(opaque, addr); + } + + return -1; +} + +static const MemoryRegionOps rocker_mmio_ops = { + .read = rocker_mmio_read, + .write = rocker_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static void rocker_msix_vectors_unuse(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int i; + + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(dev, i); + } +} + +static int rocker_msix_vectors_use(Rocker *r, + unsigned int num_vectors) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + int i; + + for (i = 0; i < num_vectors; i++) { + err = msix_vector_use(dev, i); + if (err) { + goto rollback; + } + } + return 0; + +rollback: + rocker_msix_vectors_unuse(r, i); + return err; +} + +static int rocker_msix_init(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + int err; + + err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports), + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET, + &r->msix_bar, + ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET, + 0); + if (err) { + return err; + } + + err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); + if (err) { + goto err_msix_vectors_use; + } + + return 0; + +err_msix_vectors_use: + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + return err; +} + +static void rocker_msix_uninit(Rocker *r) +{ + PCIDevice *dev = PCI_DEVICE(r); + + msix_uninit(dev, &r->msix_bar, &r->msix_bar); + rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); +} + +static int pci_rocker_init(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; + const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } }; + static int sw_index; + int i, err = 0; + + /* allocate worlds */ + + r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r); + r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (!r->worlds[i]) { + goto err_world_alloc; + } + } + + /* set up memory-mapped region at BAR0 */ + + memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r, + "rocker-mmio", ROCKER_PCI_BAR0_SIZE); + pci_register_bar(dev, ROCKER_PCI_BAR0_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio); + + /* set up memory-mapped region for MSI-X */ + + memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar", + ROCKER_PCI_MSIX_BAR_SIZE); + pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar); + + /* MSI-X init */ + + err = rocker_msix_init(r); + if (err) { + goto err_msix_init; + } + + /* validate switch properties */ + + if (!r->name) { + r->name = g_strdup(ROCKER); + } + + if (rocker_find(r->name)) { + err = -EEXIST; + goto err_duplicate; + } + + if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) { + memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt)); + r->fp_start_macaddr.a[4] += (sw_index++); + } + + if (!r->switch_id) { + memcpy(&r->switch_id, &r->fp_start_macaddr, + sizeof(r->fp_start_macaddr)); + } + + if (r->fp_ports > ROCKER_FP_PORTS_MAX) { + r->fp_ports = ROCKER_FP_PORTS_MAX; + } + + r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r)); + if (!r->rings) { + goto err_rings_alloc; + } + + /* Rings are ordered like this: + * - command ring + * - event ring + * - port0 tx ring + * - port0 rx ring + * - port1 tx ring + * - port1 rx ring + * ..... + */ + + err = -ENOMEM; + for (i = 0; i < rocker_pci_ring_count(r); i++) { + DescRing *ring = desc_ring_alloc(r, i); + + if (!ring) { + goto err_ring_alloc; + } + + if (i == ROCKER_RING_CMD) { + desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD); + } else if (i == ROCKER_RING_EVENT) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT); + } else if (i % 2 == 0) { + desc_ring_set_consume(ring, tx_consume, + ROCKER_MSIX_VEC_TX((i - 2) / 2)); + } else if (i % 2 == 1) { + desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2)); + } + + r->rings[i] = ring; + } + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = + fp_port_alloc(r, r->name, &r->fp_start_macaddr, + i, &r->fp_ports_peers[i]); + + if (!port) { + goto err_port_alloc; + } + + r->fp_port[i] = port; + fp_port_set_world(port, r->world_dflt); + } + + QLIST_INSERT_HEAD(&rockers, r, next); + + return 0; + +err_port_alloc: + for (--i; i >= 0; i--) { + FpPort *port = r->fp_port[i]; + fp_port_free(port); + } + i = rocker_pci_ring_count(r); +err_ring_alloc: + for (--i; i >= 0; i--) { + desc_ring_free(r->rings[i]); + } + g_free(r->rings); +err_rings_alloc: +err_duplicate: + rocker_msix_uninit(r); +err_msix_init: + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); +err_world_alloc: + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + return err; +} + +static void pci_rocker_uninit(PCIDevice *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + QLIST_REMOVE(r, next); + + for (i = 0; i < r->fp_ports; i++) { + FpPort *port = r->fp_port[i]; + + fp_port_free(port); + r->fp_port[i] = NULL; + } + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + if (r->rings[i]) { + desc_ring_free(r->rings[i]); + } + } + g_free(r->rings); + + rocker_msix_uninit(r); + object_unparent(OBJECT(&r->msix_bar)); + object_unparent(OBJECT(&r->mmio)); + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_free(r->worlds[i]); + } + } + g_free(r->fp_ports_peers); +} + +static void rocker_reset(DeviceState *dev) +{ + Rocker *r = to_rocker(dev); + int i; + + for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { + if (r->worlds[i]) { + world_reset(r->worlds[i]); + } + } + for (i = 0; i < r->fp_ports; i++) { + fp_port_reset(r->fp_port[i]); + fp_port_set_world(r->fp_port[i], r->world_dflt); + } + + r->test_reg = 0; + r->test_reg64 = 0; + r->test_dma_addr = 0; + r->test_dma_size = 0; + + for (i = 0; i < rocker_pci_ring_count(r); i++) { + desc_ring_reset(r->rings[i]); + } + + DPRINTF("Reset done\n"); +} + +static Property rocker_properties[] = { + DEFINE_PROP_STRING("name", Rocker, name), + DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker, + fp_start_macaddr), + DEFINE_PROP_UINT64("switch_id", Rocker, + switch_id, 0), + DEFINE_PROP_ARRAY("ports", Rocker, fp_ports, + fp_ports_peers, qdev_prop_netdev, NICPeers), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription rocker_vmsd = { + .name = ROCKER, + .unmigratable = 1, +}; + +static void rocker_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pci_rocker_init; + k->exit = pci_rocker_uninit; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER; + k->revision = ROCKER_PCI_REVISION; + k->class_id = PCI_CLASS_NETWORK_OTHER; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->desc = "Rocker Switch"; + dc->reset = rocker_reset; + dc->props = rocker_properties; + dc->vmsd = &rocker_vmsd; +} + +static const TypeInfo rocker_info = { + .name = ROCKER, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(Rocker), + .class_init = rocker_class_init, +}; + +static void rocker_register_types(void) +{ + type_register_static(&rocker_info); +} + +type_init(rocker_register_types) diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h new file mode 100644 index 0000000..b3310b6 --- /dev/null +++ b/hw/net/rocker/rocker.h @@ -0,0 +1,84 @@ +/* + * QEMU rocker switch emulation + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014 Neil Horman <nhorman@tuxdriver.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_H_ +#define _ROCKER_H_ + +#include "qemu/sockets.h" + +#if defined(DEBUG_ROCKER) +# define DPRINTF(fmt, ...) \ + do { \ + struct timeval tv; \ + char timestr[64]; \ + time_t now; \ + gettimeofday(&tv, NULL); \ + now = tv.tv_sec; \ + strftime(timestr, sizeof(timestr), "%T", localtime(&now)); \ + fprintf(stderr, "%s.%06ld ", timestr, tv.tv_usec); \ + fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__); \ + } while (0) +#else +static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...) +{ + return 0; +} +#endif + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +#define __be16 uint16_t +#define __be32 uint32_t +#define __be64 uint64_t + +static inline bool ipv4_addr_is_multicast(__be32 addr) +{ + return (addr & htonl(0xf0000000)) == htonl(0xe0000000); +} + +typedef struct ipv6_addr { + union { + uint8_t addr8[16]; + __be16 addr16[8]; + __be32 addr32[4]; + }; +} Ipv6Addr; + +static inline bool ipv6_addr_is_multicast(const Ipv6Addr *addr) +{ + return (addr->addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); +} + +typedef struct rocker Rocker; +typedef struct world World; +typedef struct desc_info DescInfo; +typedef struct desc_ring DescRing; + +Rocker *rocker_find(const char *name); +uint32_t rocker_fp_ports(Rocker *r); +int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); +int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, + uint16_t vlan_id); +int rx_produce(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +int rocker_port_eg(Rocker *r, uint32_t pport, + const struct iovec *iov, int iovcnt); + +#endif /* _ROCKER_H_ */ diff --git a/hw/net/rocker/rocker_desc.c b/hw/net/rocker/rocker_desc.c new file mode 100644 index 0000000..9d896fe --- /dev/null +++ b/hw/net/rocker/rocker_desc.c @@ -0,0 +1,377 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/net.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_desc.h" + +struct desc_ring { + hwaddr base_addr; + uint32_t size; + uint32_t head; + uint32_t tail; + uint32_t ctrl; + uint32_t credits; + Rocker *r; + DescInfo *info; + int index; + desc_ring_consume *consume; + unsigned msix_vector; +}; + +struct desc_info { + DescRing *ring; + RockerDesc desc; + char *buf; + size_t buf_size; +}; + +uint16_t desc_buf_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.buf_size); +} + +uint16_t desc_tlv_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.tlv_size); +} + +char *desc_get_buf(DescInfo *info, bool read_only) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + size_t size = read_only ? le16_to_cpu(info->desc.tlv_size) : + le16_to_cpu(info->desc.buf_size); + + if (size > info->buf_size) { + info->buf = g_realloc(info->buf, size); + info->buf_size = size; + } + + if (!info->buf) { + return NULL; + } + + if (pci_dma_read(dev, le64_to_cpu(info->desc.buf_addr), info->buf, size)) { + return NULL; + } + + return info->buf; +} + +int desc_set_buf(DescInfo *info, size_t tlv_size) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + + if (tlv_size > info->buf_size) { + DPRINTF("ERROR: trying to write more to desc buf than it " + "can hold buf_size %zu tlv_size %zu\n", + info->buf_size, tlv_size); + return -ROCKER_EMSGSIZE; + } + + info->desc.tlv_size = cpu_to_le16(tlv_size); + pci_dma_write(dev, le64_to_cpu(info->desc.buf_addr), info->buf, tlv_size); + + return ROCKER_OK; +} + +DescRing *desc_get_ring(DescInfo *info) +{ + return info->ring; +} + +int desc_ring_index(DescRing *ring) +{ + return ring->index; +} + +static bool desc_ring_empty(DescRing *ring) +{ + return ring->head == ring->tail; +} + +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr) +{ + if (base_addr & 0x7) { + DPRINTF("ERROR: ring[%d] desc base addr (0x" TARGET_FMT_plx + ") not 8-byte aligned\n", ring->index, base_addr); + return false; + } + + ring->base_addr = base_addr; + + return true; +} + +uint64_t desc_ring_get_base_addr(DescRing *ring) +{ + return ring->base_addr; +} + +bool desc_ring_set_size(DescRing *ring, uint32_t size) +{ + int i; + + if (size < 2 || size > 0x10000 || (size & (size - 1))) { + DPRINTF("ERROR: ring[%d] size (%d) not a power of 2 " + "or in range [2, 64K]\n", ring->index, size); + return false; + } + + for (i = 0; i < ring->size; i++) { + if (ring->info[i].buf) { + g_free(ring->info[i].buf); + } + } + + ring->size = size; + ring->head = ring->tail = 0; + + ring->info = g_realloc(ring->info, size * sizeof(DescInfo)); + if (!ring->info) { + return false; + } + + memset(ring->info, 0, size * sizeof(DescInfo)); + + for (i = 0; i < size; i++) { + ring->info[i].ring = ring; + } + + return true; +} + +uint32_t desc_ring_get_size(DescRing *ring) +{ + return ring->size; +} + +static DescInfo *desc_read(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_read(dev, addr, &info->desc, sizeof(info->desc)); + + return info; +} + +static void desc_write(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_write(dev, addr, &info->desc, sizeof(info->desc)); +} + +static bool desc_ring_base_addr_check(DescRing *ring) +{ + if (!ring->base_addr) { + DPRINTF("ERROR: ring[%d] not-initialized desc base address!\n", + ring->index); + return false; + } + return true; +} + +static DescInfo *__desc_ring_fetch_desc(DescRing *ring) +{ + return desc_read(ring, ring->tail); +} + +DescInfo *desc_ring_fetch_desc(DescRing *ring) +{ + if (desc_ring_empty(ring) || !desc_ring_base_addr_check(ring)) { + return NULL; + } + + return desc_read(ring, ring->tail); +} + +static bool __desc_ring_post_desc(DescRing *ring, int err) +{ + uint16_t comp_err = 0x8000 | (uint16_t)-err; + DescInfo *info = &ring->info[ring->tail]; + + info->desc.comp_err = cpu_to_le16(comp_err); + desc_write(ring, ring->tail); + ring->tail = (ring->tail + 1) % ring->size; + + /* return true if starting credit count */ + + return ring->credits++ == 0; +} + +bool desc_ring_post_desc(DescRing *ring, int err) +{ + if (desc_ring_empty(ring)) { + DPRINTF("ERROR: ring[%d] trying to post desc to empty ring\n", + ring->index); + return false; + } + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + return __desc_ring_post_desc(ring, err); +} + +static bool ring_pump(DescRing *ring) +{ + DescInfo *info; + bool primed = false; + int err; + + /* If the ring has a consumer, call consumer for each + * desc starting at tail and stopping when tail reaches + * head (the empty ring condition). + */ + + if (ring->consume) { + while (ring->head != ring->tail) { + info = __desc_ring_fetch_desc(ring); + err = ring->consume(ring->r, info); + if (__desc_ring_post_desc(ring, err)) { + primed = true; + } + } + } + + return primed; +} + +bool desc_ring_set_head(DescRing *ring, uint32_t new) +{ + uint32_t tail = ring->tail; + uint32_t head = ring->head; + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + if (new >= ring->size) { + DPRINTF("ERROR: trying to set head (%d) past ring[%d] size (%d)\n", + new, ring->index, ring->size); + return false; + } + + if (((head < tail) && ((new >= tail) || (new < head))) || + ((head > tail) && ((new >= tail) && (new < head)))) { + DPRINTF("ERROR: trying to wrap ring[%d] " + "(head %d, tail %d, new head %d)\n", + ring->index, head, tail, new); + return false; + } + + if (new == ring->head) { + DPRINTF("WARNING: setting head (%d) to current head position\n", new); + } + + ring->head = new; + + return ring_pump(ring); +} + +uint32_t desc_ring_get_head(DescRing *ring) +{ + return ring->head; +} + +uint32_t desc_ring_get_tail(DescRing *ring) +{ + return ring->tail; +} + +void desc_ring_set_ctrl(DescRing *ring, uint32_t val) +{ + if (val & ROCKER_DMA_DESC_CTRL_RESET) { + DPRINTF("ring[%d] resetting\n", ring->index); + desc_ring_reset(ring); + } +} + +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits) +{ + if (credits > ring->credits) { + DPRINTF("ERROR: trying to return more credits (%d) " + "than are outstanding (%d)\n", credits, ring->credits); + ring->credits = 0; + return false; + } + + ring->credits -= credits; + + /* return true if credits are still outstanding */ + + return ring->credits > 0; +} + +uint32_t desc_ring_get_credits(DescRing *ring) +{ + return ring->credits; +} + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector) +{ + ring->consume = consume; + ring->msix_vector = vector; +} + +unsigned desc_ring_get_msix_vector(DescRing *ring) +{ + return ring->msix_vector; +} + +DescRing *desc_ring_alloc(Rocker *r, int index) +{ + DescRing *ring; + + ring = g_malloc0(sizeof(DescRing)); + if (!ring) { + return NULL; + } + + ring->r = r; + ring->index = index; + + return ring; +} + +void desc_ring_free(DescRing *ring) +{ + if (ring->info) { + g_free(ring->info); + } + g_free(ring); +} + +void desc_ring_reset(DescRing *ring) +{ + ring->base_addr = 0; + ring->size = 0; + ring->head = 0; + ring->tail = 0; + ring->ctrl = 0; + ring->credits = 0; +} diff --git a/hw/net/rocker/rocker_desc.h b/hw/net/rocker/rocker_desc.h new file mode 100644 index 0000000..d4041f5 --- /dev/null +++ b/hw/net/rocker/rocker_desc.h @@ -0,0 +1,53 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#ifndef _ROCKER_DESC_H_ +#define _ROCKER_DESC_H_ + +#include "rocker_hw.h" + +typedef int (desc_ring_consume)(Rocker *r, DescInfo *info); + +uint16_t desc_buf_size(DescInfo *info); +uint16_t desc_tlv_size(DescInfo *info); +char *desc_get_buf(DescInfo *info, bool read_only); +int desc_set_buf(DescInfo *info, size_t tlv_size); +DescRing *desc_get_ring(DescInfo *info); + +int desc_ring_index(DescRing *ring); +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr); +uint64_t desc_ring_get_base_addr(DescRing *ring); +bool desc_ring_set_size(DescRing *ring, uint32_t size); +uint32_t desc_ring_get_size(DescRing *ring); +bool desc_ring_set_head(DescRing *ring, uint32_t new); +uint32_t desc_ring_get_head(DescRing *ring); +uint32_t desc_ring_get_tail(DescRing *ring); +void desc_ring_set_ctrl(DescRing *ring, uint32_t val); +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits); +uint32_t desc_ring_get_credits(DescRing *ring); + +DescInfo *desc_ring_fetch_desc(DescRing *ring); +bool desc_ring_post_desc(DescRing *ring, int status); + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector); +unsigned desc_ring_get_msix_vector(DescRing *ring); +DescRing *desc_ring_alloc(Rocker *r, int index); +void desc_ring_free(DescRing *ring); +void desc_ring_reset(DescRing *ring); + +#endif diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c new file mode 100644 index 0000000..2f1e3b3 --- /dev/null +++ b/hw/net/rocker/rocker_fp.c @@ -0,0 +1,234 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/clients.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_world.h" + +enum duplex { + DUPLEX_HALF = 0, + DUPLEX_FULL +}; + +struct fp_port { + Rocker *r; + World *world; + unsigned int index; + char *name; + uint32_t pport; + bool enabled; + uint32_t speed; + uint8_t duplex; + uint8_t autoneg; + uint8_t learning; + NICState *nic; + NICConf conf; +}; + +bool fp_port_get_link_up(FpPort *port) +{ + return !qemu_get_queue(port->nic)->link_down; +} + +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr) +{ + memcpy(macaddr->a, port->conf.macaddr.a, sizeof(macaddr->a)); +} + +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr) +{ +/* XXX TODO implement and test setting mac addr + * XXX memcpy(port->conf.macaddr.a, macaddr.a, sizeof(port->conf.macaddr.a)); + */ +} + +uint8_t fp_port_get_learning(FpPort *port) +{ + return port->learning; +} + +void fp_port_set_learning(FpPort *port, uint8_t learning) +{ + port->learning = learning; +} + +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg) +{ + *speed = port->speed; + *duplex = port->duplex; + *autoneg = port->autoneg; + + return ROCKER_OK; +} + +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg) +{ + /* XXX validate inputs */ + + port->speed = speed; + port->duplex = duplex; + port->autoneg = autoneg; + + return ROCKER_OK; +} + +bool fp_port_from_pport(uint32_t pport, uint32_t *port) +{ + if (pport < 1 || pport > ROCKER_FP_PORTS_MAX) { + return false; + } + *port = pport - 1; + return true; +} + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) +{ + NetClientState *nc = qemu_get_queue(port->nic); + + if (port->enabled) { + qemu_sendv_packet(nc, iov, iovcnt); + } + + return ROCKER_OK; +} + +static int fp_port_can_receive(NetClientState *nc) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + return port->enabled; +} + +static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, + int iovcnt) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + return world_ingress(port->world, port->pport, iov, iovcnt); +} + +static ssize_t fp_port_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return fp_port_receive_iov(nc, &iov, 1); +} + +static void fp_port_cleanup(NetClientState *nc) +{ +} + +static void fp_port_set_link_status(NetClientState *nc) +{ + FpPort *port = qemu_get_nic_opaque(nc); + + rocker_event_link_changed(port->r, port->pport, !nc->link_down); +} + +static NetClientInfo fp_port_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = fp_port_can_receive, + .receive = fp_port_receive, + .receive_iov = fp_port_receive_iov, + .cleanup = fp_port_cleanup, + .link_status_changed = fp_port_set_link_status, +}; + +World *fp_port_get_world(FpPort *port) +{ + return port->world; +} + +void fp_port_set_world(FpPort *port, World *world) +{ + DPRINTF("port %d setting world \"%s\"\n", port->index, world_name(world)); + port->world = world; +} + +bool fp_port_enabled(FpPort *port) +{ + return port->enabled; +} + +void fp_port_enable(FpPort *port) +{ + port->enabled = true; + DPRINTF("port %d enabled\n", port->index); +} + +void fp_port_disable(FpPort *port) +{ + port->enabled = false; + DPRINTF("port %d disabled\n", port->index); +} + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, unsigned int index, + NICPeers *peers) +{ + FpPort *port = g_malloc0(sizeof(FpPort)); + + if (!port) { + return NULL; + } + + port->r = r; + port->index = index; + port->pport = index + 1; + + /* front-panel switch port names are 1-based */ + + port->name = g_strdup_printf("%s.%d", sw_name, port->pport); + + memcpy(port->conf.macaddr.a, start_mac, sizeof(port->conf.macaddr.a)); + port->conf.macaddr.a[5] += index; + port->conf.bootindex = -1; + port->conf.peers = *peers; + + port->nic = qemu_new_nic(&fp_port_info, &port->conf, + sw_name, NULL, port); + qemu_format_nic_info_str(qemu_get_queue(port->nic), + port->conf.macaddr.a); + + fp_port_reset(port); + + return port; +} + +void fp_port_free(FpPort *port) +{ + qemu_del_nic(port->nic); + g_free(port->name); + g_free(port); +} + +void fp_port_reset(FpPort *port) +{ + fp_port_disable(port); + port->speed = 10000; /* 10Gbps */ + port->duplex = DUPLEX_FULL; + port->autoneg = 0; +} diff --git a/hw/net/rocker/rocker_fp.h b/hw/net/rocker/rocker_fp.h new file mode 100644 index 0000000..a5f28f1 --- /dev/null +++ b/hw/net/rocker/rocker_fp.h @@ -0,0 +1,51 @@ +/* + * QEMU rocker switch emulation - front-panel ports + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_FP_H_ +#define _ROCKER_FP_H_ + +#include "net/net.h" +#include "qemu/iov.h" + +#define ROCKER_FP_PORTS_MAX 62 + +typedef struct fp_port FpPort; + +int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt); + +bool fp_port_get_link_up(FpPort *port); +void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr); +void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr); +uint8_t fp_port_get_learning(FpPort *port); +void fp_port_set_learning(FpPort *port, uint8_t learning); +int fp_port_get_settings(FpPort *port, uint32_t *speed, + uint8_t *duplex, uint8_t *autoneg); +int fp_port_set_settings(FpPort *port, uint32_t speed, + uint8_t duplex, uint8_t autoneg); +bool fp_port_from_pport(uint32_t pport, uint32_t *port); +World *fp_port_get_world(FpPort *port); +void fp_port_set_world(FpPort *port, World *world); +bool fp_port_enabled(FpPort *port); +void fp_port_enable(FpPort *port); +void fp_port_disable(FpPort *port); + +FpPort *fp_port_alloc(Rocker *r, char *sw_name, + MACAddr *start_mac, unsigned int index, + NICPeers *peers); +void fp_port_free(FpPort *port); +void fp_port_reset(FpPort *port); + +#endif /* _ROCKER_FP_H_ */ diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h new file mode 100644 index 0000000..c9c85a7 --- /dev/null +++ b/hw/net/rocker/rocker_hw.h @@ -0,0 +1,491 @@ +/* + * Rocker switch hardware register and descriptor definitions. + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + */ + +#ifndef _ROCKER_HW_ +#define _ROCKER_HW_ + +#define __le16 uint16_t +#define __le32 uint32_t +#define __le64 uint64_t + +/* + * Return codes + */ + +enum { + ROCKER_OK = 0, + ROCKER_ENOENT = 2, + ROCKER_ENXIO = 6, + ROCKER_ENOMEM = 12, + ROCKER_EEXIST = 17, + ROCKER_EINVAL = 22, + ROCKER_EMSGSIZE = 90, + ROCKER_ENOTSUP = 95, + ROCKER_ENOBUFS = 105, +}; + +/* + * PCI configuration space + */ + +#define ROCKER_PCI_REVISION 0x1 +#define ROCKER_PCI_BAR0_IDX 0 +#define ROCKER_PCI_BAR0_SIZE 0x2000 +#define ROCKER_PCI_MSIX_BAR_IDX 1 +#define ROCKER_PCI_MSIX_BAR_SIZE 0x2000 +#define ROCKER_PCI_MSIX_TABLE_OFFSET 0x0000 +#define ROCKER_PCI_MSIX_PBA_OFFSET 0x1000 + +/* + * MSI-X vectors + */ + +enum { + ROCKER_MSIX_VEC_CMD, + ROCKER_MSIX_VEC_EVENT, + ROCKER_MSIX_VEC_TEST, + ROCKER_MSIX_VEC_RESERVED0, + __ROCKER_MSIX_VEC_TX, + __ROCKER_MSIX_VEC_RX, +#define ROCKER_MSIX_VEC_TX(port) \ + (__ROCKER_MSIX_VEC_TX + ((port) * 2)) +#define ROCKER_MSIX_VEC_RX(port) \ + (__ROCKER_MSIX_VEC_RX + ((port) * 2)) +#define ROCKER_MSIX_VEC_COUNT(portcnt) \ + (ROCKER_MSIX_VEC_RX((portcnt) - 1) + 1) +}; + +/* + * Rocker bogus registers + */ +#define ROCKER_BOGUS_REG0 0x0000 +#define ROCKER_BOGUS_REG1 0x0004 +#define ROCKER_BOGUS_REG2 0x0008 +#define ROCKER_BOGUS_REG3 0x000c + +/* + * Rocker test registers + */ +#define ROCKER_TEST_REG 0x0010 +#define ROCKER_TEST_REG64 0x0018 /* 8-byte */ +#define ROCKER_TEST_IRQ 0x0020 +#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */ +#define ROCKER_TEST_DMA_SIZE 0x0030 +#define ROCKER_TEST_DMA_CTRL 0x0034 + +/* + * Rocker test register ctrl + */ +#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0) +#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1) +#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2) + +/* + * Rocker DMA ring register offsets + */ +#define ROCKER_DMA_DESC_BASE 0x1000 +#define ROCKER_DMA_DESC_SIZE 32 +#define ROCKER_DMA_DESC_MASK 0x1F +#define ROCKER_DMA_DESC_TOTAL_SIZE \ + (ROCKER_DMA_DESC_SIZE * 64) /* 62 ports + event + cmd */ +#define ROCKER_DMA_DESC_ADDR_OFFSET 0x00 /* 8-byte */ +#define ROCKER_DMA_DESC_SIZE_OFFSET 0x08 +#define ROCKER_DMA_DESC_HEAD_OFFSET 0x0c +#define ROCKER_DMA_DESC_TAIL_OFFSET 0x10 +#define ROCKER_DMA_DESC_CTRL_OFFSET 0x14 +#define ROCKER_DMA_DESC_CREDITS_OFFSET 0x18 +#define ROCKER_DMA_DESC_RSVD_OFFSET 0x1c + +/* + * Rocker dma ctrl register bits + */ +#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0) + +/* + * Rocker ring indices + */ +#define ROCKER_RING_CMD 0 +#define ROCKER_RING_EVENT 1 + +/* + * Helper macro to do convert a dma ring register + * to its index. Based on the fact that the register + * group stride is 32 bytes. + */ +#define ROCKER_RING_INDEX(reg) ((reg >> 5) & 0x7F) + +/* + * Rocker DMA Descriptor + */ + +typedef struct rocker_desc { + __le64 buf_addr; + uint64_t cookie; + __le16 buf_size; + __le16 tlv_size; + __le16 rsvd[5]; /* pad to 32 bytes */ + __le16 comp_err; +} __attribute__((packed, aligned(8))) RockerDesc; + +/* + * Rocker TLV type fields + */ + +typedef struct rocker_tlv { + __le32 type; + __le16 len; + __le16 rsvd; +} __attribute__((packed, aligned(8))) RockerTlv; + +/* cmd msg */ +enum { + ROCKER_TLV_CMD_UNSPEC, + ROCKER_TLV_CMD_TYPE, /* u16 */ + ROCKER_TLV_CMD_INFO, /* nest */ + + __ROCKER_TLV_CMD_MAX, + ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1, +}; + +enum { + ROCKER_TLV_CMD_TYPE_UNSPEC, + ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL, + ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS, + + __ROCKER_TLV_CMD_TYPE_MAX, + ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1, +}; + +/* cmd info nested for set/get port settings */ +enum { + ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC, + ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */ + ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */ + ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */ + ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */ + + __ROCKER_TLV_CMD_PORT_SETTINGS_MAX, + ROCKER_TLV_CMD_PORT_SETTINGS_MAX = __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1, +}; + +enum { + ROCKER_PORT_MODE_OF_DPA, +}; + +/* event msg */ +enum { + ROCKER_TLV_EVENT_UNSPEC, + ROCKER_TLV_EVENT_TYPE, /* u16 */ + ROCKER_TLV_EVENT_INFO, /* nest */ + + __ROCKER_TLV_EVENT_MAX, + ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1, +}; + +enum { + ROCKER_TLV_EVENT_TYPE_UNSPEC, + ROCKER_TLV_EVENT_TYPE_LINK_CHANGED, + ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN, + + __ROCKER_TLV_EVENT_TYPE_MAX, + ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1, +}; + +/* event info nested for link changed */ +enum { + ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC, + ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */ + ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */ + + __ROCKER_TLV_EVENT_LINK_CHANGED_MAX, + ROCKER_TLV_EVENT_LINK_CHANGED_MAX = __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1, +}; + +/* event info nested for MAC/VLAN */ +enum { + ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC, + ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */ + ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */ + ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */ + + __ROCKER_TLV_EVENT_MAC_VLAN_MAX, + ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1, +}; + +/* Rx msg */ +enum { + ROCKER_TLV_RX_UNSPEC, + ROCKER_TLV_RX_FLAGS, /* u16, see RX_FLAGS_ */ + ROCKER_TLV_RX_CSUM, /* u16 */ + ROCKER_TLV_RX_FRAG_ADDR, /* u64 */ + ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */ + ROCKER_TLV_RX_FRAG_LEN, /* u16 */ + + __ROCKER_TLV_RX_MAX, + ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, +}; + +#define ROCKER_RX_FLAGS_IPV4 (1 << 0) +#define ROCKER_RX_FLAGS_IPV6 (1 << 1) +#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3) +#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4) +#define ROCKER_RX_FLAGS_TCP (1 << 5) +#define ROCKER_RX_FLAGS_UDP (1 << 6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) + +/* Tx msg */ +enum { + ROCKER_TLV_TX_UNSPEC, + ROCKER_TLV_TX_OFFLOAD, /* u8, see TX_OFFLOAD_ */ + ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */ + ROCKER_TLV_TX_TSO_MSS, /* u16 */ + ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */ + ROCKER_TLV_TX_FRAGS, /* array */ + + __ROCKER_TLV_TX_MAX, + ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1, +}; + +#define ROCKER_TX_OFFLOAD_NONE 0 +#define ROCKER_TX_OFFLOAD_IP_CSUM 1 +#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2 +#define ROCKER_TX_OFFLOAD_L3_CSUM 3 +#define ROCKER_TX_OFFLOAD_TSO 4 + +#define ROCKER_TX_FRAGS_MAX 16 + +enum { + ROCKER_TLV_TX_FRAG_UNSPEC, + ROCKER_TLV_TX_FRAG, /* nest */ + + __ROCKER_TLV_TX_FRAG_MAX, + ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1, +}; + +enum { + ROCKER_TLV_TX_FRAG_ATTR_UNSPEC, + ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */ + ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */ + + __ROCKER_TLV_TX_FRAG_ATTR_MAX, + ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1, +}; + +/* + * cmd info nested for OF-DPA msgs + */ + +enum { + ROCKER_TLV_OF_DPA_UNSPEC, + ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */ + ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */ + ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */ + ROCKER_TLV_OF_DPA_COOKIE, /* u64 */ + ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */ + ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */ + ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */ + ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */ + ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */ + ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */ + ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */ + ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */ + ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */ + ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */ + ROCKER_TLV_OF_DPA_DST_MAC, /* binary */ + ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */ + ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */ + ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */ + ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */ + ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */ + ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */ + ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */ + ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */ + ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */ + ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */ + ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */ + ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */ + ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */ + ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */ + ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */ + ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */ + + __ROCKER_TLV_OF_DPA_MAX, + ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1, +}; + +/* + * OF-DPA table IDs + */ + +enum rocker_of_dpa_table_id { + ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0, + ROCKER_OF_DPA_TABLE_ID_VLAN = 10, + ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20, + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30, + ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40, + ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50, + ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60, +}; + +/* + * OF-DPA flow stats + */ + +enum { + ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC, + ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */ + ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */ + + __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX, + ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1, +}; + +/* + * OF-DPA group types + */ + +enum rocker_of_dpa_group_type { + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0, + ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE, + ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD, + ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE, + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST, + ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP, + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY, +}; + +/* + * OF-DPA group L2 overlay types + */ + +enum rocker_of_dpa_overlay_type { + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0, + ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST, + ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST, +}; + +/* + * OF-DPA group ID encoding + */ + +#define ROCKER_GROUP_TYPE_SHIFT 28 +#define ROCKER_GROUP_TYPE_MASK 0xf0000000 +#define ROCKER_GROUP_VLAN_ID_SHIFT 16 +#define ROCKER_GROUP_VLAN_ID_MASK 0x0fff0000 +#define ROCKER_GROUP_PORT_SHIFT 0 +#define ROCKER_GROUP_PORT_MASK 0x0000ffff +#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12 +#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000 +#define ROCKER_GROUP_SUBTYPE_SHIFT 10 +#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00 +#define ROCKER_GROUP_INDEX_SHIFT 0 +#define ROCKER_GROUP_INDEX_MASK 0x0000ffff +#define ROCKER_GROUP_INDEX_LONG_SHIFT 0 +#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff + +#define ROCKER_GROUP_TYPE_GET(group_id) \ + (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT) +#define ROCKER_GROUP_TYPE_SET(type) \ + (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK) +#define ROCKER_GROUP_VLAN_GET(group_id) \ + (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT) +#define ROCKER_GROUP_VLAN_SET(vlan_id) \ + (((vlan_id) << ROCKER_GROUP_VLAN_ID_SHIFT) & ROCKER_GROUP_VLAN_ID_MASK) +#define ROCKER_GROUP_PORT_GET(group_id) \ + (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT) +#define ROCKER_GROUP_PORT_SET(port) \ + (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK) +#define ROCKER_GROUP_INDEX_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT) +#define ROCKER_GROUP_INDEX_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK) +#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \ + (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \ + ROCKER_GROUP_INDEX_LONG_SHIFT) +#define ROCKER_GROUP_INDEX_LONG_SET(index) \ + (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \ + ROCKER_GROUP_INDEX_LONG_MASK) + +#define ROCKER_GROUP_NONE 0 +#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port)) +#define ROCKER_GROUP_L2_REWRITE(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) +#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\ + ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index)) +#define ROCKER_GROUP_L3_UNICAST(index) \ + (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\ + ROCKER_GROUP_INDEX_LONG_SET(index)) + +/* + * Rocker general purpose registers + */ +#define ROCKER_CONTROL 0x0300 +#define ROCKER_PORT_PHYS_COUNT 0x0304 +#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */ +#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */ +#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ + +/* + * Rocker control bits + */ +#define ROCKER_CONTROL_RESET (1 << 0) + +#endif /* _ROCKER_HW_ */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c new file mode 100644 index 0000000..1bcb7af --- /dev/null +++ b/hw/net/rocker/rocker_of_dpa.c @@ -0,0 +1,2315 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/eth.h" +#include "qemu/iov.h" +#include "qemu/timer.h" +#include "qmp-commands.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_fp.h" +#include "rocker_tlv.h" +#include "rocker_world.h" +#include "rocker_desc.h" +#include "rocker_of_dpa.h" + +static const MACAddr zero_mac = { .a = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; +static const MACAddr ff_mac = { .a = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }; + +typedef struct of_dpa { + World *world; + GHashTable *flow_tbl; + GHashTable *group_tbl; + unsigned int flow_tbl_max_size; + unsigned int group_tbl_max_size; +} OfDpa; + +/* flow_key stolen mostly from OVS + * + * Note: fields that compare with network packet header fields + * are stored in network order (BE) to avoid per-packet field + * byte-swaps. + */ + +typedef struct of_dpa_flow_key { + uint32_t in_pport; /* ingress port */ + uint32_t tunnel_id; /* overlay tunnel id */ + uint32_t tbl_id; /* table id */ + struct { + __be16 vlan_id; /* 0 if no VLAN */ + MACAddr src; /* ethernet source address */ + MACAddr dst; /* ethernet destination address */ + __be16 type; /* ethernet frame type */ + } eth; + struct { + uint8_t proto; /* IP protocol or ARP opcode */ + uint8_t tos; /* IP ToS */ + uint8_t ttl; /* IP TTL/hop limit */ + uint8_t frag; /* one of FRAG_TYPE_* */ + } ip; + union { + struct { + struct { + __be32 src; /* IP source address */ + __be32 dst; /* IP destination address */ + } addr; + union { + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + MACAddr sha; /* ARP source hardware address */ + MACAddr tha; /* ARP target hardware address */ + } arp; + }; + } ipv4; + struct { + struct { + Ipv6Addr src; /* IPv6 source address */ + Ipv6Addr dst; /* IPv6 destination address */ + } addr; + __be32 label; /* IPv6 flow label */ + struct { + __be16 src; /* TCP/UDP/SCTP source port */ + __be16 dst; /* TCP/UDP/SCTP destination port */ + __be16 flags; /* TCP flags */ + } tp; + struct { + Ipv6Addr target; /* ND target address */ + MACAddr sll; /* ND source link layer address */ + MACAddr tll; /* ND target link layer address */ + } nd; + } ipv6; + }; + int width; /* how many uint64_t's in key? */ +} OfDpaFlowKey; + +/* Width of key which includes field 'f' in u64s, rounded up */ +#define FLOW_KEY_WIDTH(f) \ + ((offsetof(OfDpaFlowKey, f) + \ + sizeof(((OfDpaFlowKey *)0)->f) + \ + sizeof(uint64_t) - 1) / sizeof(uint64_t)) + +typedef struct of_dpa_flow_action { + uint32_t goto_tbl; + struct { + uint32_t group_id; + uint32_t tun_log_lport; + __be16 vlan_id; + } write; + struct { + __be16 new_vlan_id; + uint32_t out_pport; + uint8_t copy_to_cpu; + __be16 vlan_id; + } apply; +} OfDpaFlowAction; + +typedef struct of_dpa_flow { + uint32_t lpm; + uint32_t priority; + uint32_t hardtime; + uint32_t idletime; + uint64_t cookie; + OfDpaFlowKey key; + OfDpaFlowKey mask; + OfDpaFlowAction action; + struct { + uint64_t hits; + int64_t install_time; + int64_t refresh_time; + uint64_t rx_pkts; + uint64_t tx_pkts; + } stats; +} OfDpaFlow; + +typedef struct of_dpa_flow_pkt_fields { + uint32_t tunnel_id; + struct eth_header *ethhdr; + __be16 *h_proto; + struct vlan_header *vlanhdr; + struct ip_header *ipv4hdr; + struct ip6_header *ipv6hdr; + Ipv6Addr *ipv6_src_addr; + Ipv6Addr *ipv6_dst_addr; +} OfDpaFlowPktFields; + +typedef struct of_dpa_flow_context { + uint32_t in_pport; + uint32_t tunnel_id; + struct iovec *iov; + int iovcnt; + struct eth_header ethhdr_rewrite; + struct vlan_header vlanhdr_rewrite; + struct vlan_header vlanhdr; + OfDpa *of_dpa; + OfDpaFlowPktFields fields; + OfDpaFlowAction action_set; +} OfDpaFlowContext; + +typedef struct of_dpa_flow_match { + OfDpaFlowKey value; + OfDpaFlow *best; +} OfDpaFlowMatch; + +typedef struct of_dpa_group { + uint32_t id; + union { + struct { + uint32_t out_pport; + uint8_t pop_vlan; + } l2_interface; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + } l2_rewrite; + struct { + uint16_t group_count; + uint32_t *group_ids; + } l2_flood; + struct { + uint32_t group_id; + MACAddr src_mac; + MACAddr dst_mac; + __be16 vlan_id; + uint8_t ttl_check; + } l3_unicast; + }; +} OfDpaGroup; + +static int of_dpa_mask2prefix(__be32 mask) +{ + int i; + int count = 32; + + for (i = 0; i < 32; i++) { + if (!(ntohl(mask) & ((2 << i) - 1))) { + count--; + } + } + + return count; +} + +#if defined(DEBUG_ROCKER) +static void of_dpa_flow_key_dump(OfDpaFlowKey *key, OfDpaFlowKey *mask) +{ + char buf[512], *b = buf, *mac; + + b += sprintf(b, " tbl %2d", key->tbl_id); + + if (key->in_pport || (mask && mask->in_pport)) { + b += sprintf(b, " in_pport %2d", key->in_pport); + if (mask && mask->in_pport != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->in_pport); + } + } + + if (key->tunnel_id || (mask && mask->tunnel_id)) { + b += sprintf(b, " tun %8d", key->tunnel_id); + if (mask && mask->tunnel_id != 0xffffffff) { + b += sprintf(b, "/0x%08x", key->tunnel_id); + } + } + + if (key->eth.vlan_id || (mask && mask->eth.vlan_id)) { + b += sprintf(b, " vlan %4d", ntohs(key->eth.vlan_id)); + if (mask && mask->eth.vlan_id != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(key->eth.vlan_id)); + } + } + + if (memcmp(key->eth.src.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.src.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.src.a); + b += sprintf(b, " src %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.src.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.src.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (memcmp(key->eth.dst.a, zero_mac.a, ETH_ALEN) || + (mask && memcmp(mask->eth.dst.a, zero_mac.a, ETH_ALEN))) { + mac = qemu_mac_strdup_printf(key->eth.dst.a); + b += sprintf(b, " dst %s", mac); + g_free(mac); + if (mask && memcmp(mask->eth.dst.a, ff_mac.a, ETH_ALEN)) { + mac = qemu_mac_strdup_printf(mask->eth.dst.a); + b += sprintf(b, "/%s", mac); + g_free(mac); + } + } + + if (key->eth.type || (mask && mask->eth.type)) { + b += sprintf(b, " type 0x%04x", ntohs(key->eth.type)); + if (mask && mask->eth.type != 0xffff) { + b += sprintf(b, "/0x%04x", ntohs(mask->eth.type)); + } + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + if (key->ip.proto || (mask && mask->ip.proto)) { + b += sprintf(b, " ip proto %2d", key->ip.proto); + if (mask && mask->ip.proto != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.proto); + } + } + if (key->ip.tos || (mask && mask->ip.tos)) { + b += sprintf(b, " ip tos %2d", key->ip.tos); + if (mask && mask->ip.tos != 0xff) { + b += sprintf(b, "/0x%02x", mask->ip.tos); + } + } + break; + } + switch (ntohs(key->eth.type)) { + case 0x0800: + if (key->ipv4.addr.dst || (mask && mask->ipv4.addr.dst)) { + b += sprintf(b, " dst %s", + inet_ntoa(*(struct in_addr *)&key->ipv4.addr.dst)); + if (mask) { + b += sprintf(b, "/%d", + of_dpa_mask2prefix(mask->ipv4.addr.dst)); + } + } + break; + } + } + + DPRINTF("%s\n", buf); +} +#else +#define of_dpa_flow_key_dump(k, m) +#endif + +static void _of_dpa_flow_match(void *key, void *value, void *user_data) +{ + OfDpaFlow *flow = value; + OfDpaFlowMatch *match = user_data; + uint64_t *k = (uint64_t *)&flow->key; + uint64_t *m = (uint64_t *)&flow->mask; + uint64_t *v = (uint64_t *)&match->value; + int i; + + if (flow->key.tbl_id == match->value.tbl_id) { + of_dpa_flow_key_dump(&flow->key, &flow->mask); + } + + if (flow->key.width > match->value.width) { + return; + } + + for (i = 0; i < flow->key.width; i++, k++, m++, v++) { + if ((~*k & *m & *v) | (*k & *m & ~*v)) { + return; + } + } + + DPRINTF("match\n"); + + if (!match->best || + flow->priority > match->best->priority || + flow->lpm > match->best->lpm) { + match->best = flow; + } +} + +static OfDpaFlow *of_dpa_flow_match(OfDpa *of_dpa, OfDpaFlowMatch *match) +{ + DPRINTF("\nnew search\n"); + of_dpa_flow_key_dump(&match->value, NULL); + + g_hash_table_foreach(of_dpa->flow_tbl, _of_dpa_flow_match, match); + + return match->best; +} + +static OfDpaFlow *of_dpa_flow_find(OfDpa *of_dpa, uint64_t cookie) +{ + return g_hash_table_lookup(of_dpa->flow_tbl, &cookie); +} + +static int of_dpa_flow_add(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_insert(of_dpa->flow_tbl, &flow->cookie, flow); + + return ROCKER_OK; +} + +static void of_dpa_flow_del(OfDpa *of_dpa, OfDpaFlow *flow) +{ + g_hash_table_remove(of_dpa->flow_tbl, &flow->cookie); +} + +static OfDpaFlow *of_dpa_flow_alloc(uint64_t cookie) +{ + OfDpaFlow *flow; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + + flow = g_malloc0(sizeof(OfDpaFlow)); + if (!flow) { + return NULL; + } + + flow->cookie = cookie; + flow->mask.tbl_id = 0xffffffff; + + flow->stats.install_time = flow->stats.refresh_time = now; + + return flow; +} + +static void of_dpa_flow_pkt_hdr_reset(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + fc->iov[0].iov_base = fields->ethhdr; + fc->iov[0].iov_len = sizeof(struct eth_header); + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = fields->vlanhdr ? sizeof(struct vlan_header) : 0; +} + +static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, + const struct iovec *iov, int iovcnt) +{ + OfDpaFlowPktFields *fields = &fc->fields; + size_t sofar = 0; + int i; + + sofar += sizeof(struct eth_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on eth_header\n"); + return; + } + + fields->ethhdr = iov->iov_base; + fields->h_proto = &fields->ethhdr->h_proto; + + if (ntohs(*fields->h_proto) == ETH_P_VLAN) { + sofar += sizeof(struct vlan_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on vlan_header\n"); + return; + } + fields->vlanhdr = (struct vlan_header *)(fields->ethhdr + 1); + fields->h_proto = &fields->vlanhdr->h_proto; + } + + switch (ntohs(*fields->h_proto)) { + case ETH_P_IP: + sofar += sizeof(struct ip_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip_header\n"); + return; + } + fields->ipv4hdr = (struct ip_header *)(fields->h_proto + 1); + break; + case ETH_P_IPV6: + sofar += sizeof(struct ip6_header); + if (iov->iov_len < sofar) { + DPRINTF("flow_pkt_parse underrun on ip6_header\n"); + return; + } + fields->ipv6hdr = (struct ip6_header *)(fields->h_proto + 1); + break; + } + + /* To facilitate (potential) VLAN tag insertion, Make a + * copy of the iov and insert two new vectors at the + * beginning for eth hdr and vlan hdr. No data is copied, + * just the vectors. + */ + + of_dpa_flow_pkt_hdr_reset(fc); + + fc->iov[2].iov_base = fields->h_proto + 1; + fc->iov[2].iov_len = iov->iov_len - fc->iov[0].iov_len - fc->iov[1].iov_len; + + for (i = 1; i < iovcnt; i++) { + fc->iov[i+2] = iov[i]; + } + + fc->iovcnt = iovcnt + 2; +} + +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + uint16_t h_proto = fields->ethhdr->h_proto; + + if (fields->vlanhdr) { + DPRINTF("flow_pkt_insert_vlan packet already has vlan\n"); + return; + } + + fields->ethhdr->h_proto = htons(ETH_P_VLAN); + fields->vlanhdr = &fc->vlanhdr; + fields->vlanhdr->h_tci = vlan_id; + fields->vlanhdr->h_proto = h_proto; + fields->h_proto = &fields->vlanhdr->h_proto; + + fc->iov[1].iov_base = fields->vlanhdr; + fc->iov[1].iov_len = sizeof(struct vlan_header); +} + +static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (!fields->vlanhdr) { + return; + } + + fc->iov[0].iov_len -= sizeof(fields->ethhdr->h_proto); + fc->iov[1].iov_base = fields->h_proto; + fc->iov[1].iov_len = sizeof(fields->ethhdr->h_proto); +} + +static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, + uint8_t *src_mac, uint8_t *dst_mac, + __be16 vlan_id) +{ + OfDpaFlowPktFields *fields = &fc->fields; + + if (src_mac || dst_mac) { + memcpy(&fc->ethhdr_rewrite, fields->ethhdr, sizeof(struct eth_header)); + if (src_mac && memcmp(src_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_source, src_mac, ETH_ALEN); + } + if (dst_mac && memcmp(dst_mac, zero_mac.a, ETH_ALEN)) { + memcpy(fc->ethhdr_rewrite.h_dest, dst_mac, ETH_ALEN); + } + fc->iov[0].iov_base = &fc->ethhdr_rewrite; + } + + if (vlan_id && fields->vlanhdr) { + fc->vlanhdr_rewrite = fc->vlanhdr; + fc->vlanhdr_rewrite.h_tci = vlan_id; + fc->iov[1].iov_base = &fc->vlanhdr_rewrite; + } +} + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id); + +static void of_dpa_ig_port_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + match->value.in_pport = fc->in_pport; + match->value.width = FLOW_KEY_WIDTH(tbl_id); +} + +static void of_dpa_ig_port_miss(OfDpaFlowContext *fc) +{ + uint32_t port; + + /* The default on miss is for packets from physical ports + * to go to the VLAN Flow Table. There is no default rule + * for packets from logical ports, which are dropped on miss. + */ + + if (fp_port_from_pport(fc->in_pport, &port)) { + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_VLAN); + } +} + +static void of_dpa_vlan_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + match->value.in_pport = fc->in_pport; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } + match->value.width = FLOW_KEY_WIDTH(eth.vlan_id); +} + +static void of_dpa_vlan_insert(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.apply.new_vlan_id) { + of_dpa_flow_pkt_insert_vlan(fc, flow->action.apply.new_vlan_id); + } +} + +static void of_dpa_term_mac_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + match->value.in_pport = fc->in_pport; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.type); +} + +static void of_dpa_term_mac_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_BRIDGING); +} + +static void of_dpa_apply_actions(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + fc->action_set.apply.copy_to_cpu = flow->action.apply.copy_to_cpu; + fc->action_set.apply.vlan_id = flow->key.eth.vlan_id; +} + +static void of_dpa_bridging_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + if (fc->fields.vlanhdr) { + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + } else if (fc->tunnel_id) { + match->value.tunnel_id = fc->tunnel_id; + } + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.width = FLOW_KEY_WIDTH(eth.dst); +} + +static void of_dpa_bridging_learn(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + uint8_t *addr; + uint16_t vlan_id; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int64_t refresh_delay = 1; + + /* Do a lookup in bridge table by src_mac/vlan */ + + addr = fc->fields.ethhdr->h_source; + vlan_id = fc->fields.vlanhdr->h_tci; + + match.value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + match.value.eth.vlan_id = vlan_id; + memcpy(match.value.eth.dst.a, addr, sizeof(match.value.eth.dst.a)); + match.value.width = FLOW_KEY_WIDTH(eth.dst); + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (flow) { + if (!memcmp(flow->mask.eth.dst.a, ff_mac.a, + sizeof(flow->mask.eth.dst.a))) { + /* src_mac/vlan already learned; if in_port and out_port + * don't match, the end station has moved and the port + * needs updating */ + /* XXX implement the in_port/out_port check */ + if (now - flow->stats.refresh_time < refresh_delay) { + return; + } + flow->stats.refresh_time = now; + } + } + + /* Let driver know about mac/vlan. This may be a new mac/vlan + * or a refresh of existing mac/vlan that's been hit after the + * refresh_delay. + */ + + rocker_event_mac_vlan_seen(world_rocker(fc->of_dpa->world), + fc->in_pport, addr, vlan_id); +} + +static void of_dpa_bridging_miss(OfDpaFlowContext *fc) +{ + of_dpa_bridging_learn(fc, NULL); + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_bridging_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.tun_log_lport = flow->action.write.tun_log_lport; +} + +static void of_dpa_unicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_unicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void of_dpa_unicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void +of_dpa_multicast_routing_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + if (fc->fields.ipv4hdr) { + match->value.ipv4.addr.src = fc->fields.ipv4hdr->ip_src; + match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst; + } + if (fc->fields.ipv6_src_addr) { + memcpy(&match->value.ipv6.addr.src, fc->fields.ipv6_src_addr, + sizeof(match->value.ipv6.addr.src)); + } + if (fc->fields.ipv6_dst_addr) { + memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr, + sizeof(match->value.ipv6.addr.dst)); + } + match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst); +} + +static void of_dpa_multicast_routing_miss(OfDpaFlowContext *fc) +{ + of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY); +} + +static void +of_dpa_multicast_routing_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } + fc->action_set.write.vlan_id = flow->action.write.vlan_id; +} + +static void of_dpa_acl_build_match(OfDpaFlowContext *fc, + OfDpaFlowMatch *match) +{ + match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + match->value.in_pport = fc->in_pport; + memcpy(match->value.eth.src.a, fc->fields.ethhdr->h_source, + sizeof(match->value.eth.src.a)); + memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest, + sizeof(match->value.eth.dst.a)); + match->value.eth.type = *fc->fields.h_proto; + match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci; + match->value.width = FLOW_KEY_WIDTH(eth.type); + if (fc->fields.ipv4hdr) { + match->value.ip.proto = fc->fields.ipv4hdr->ip_p; + match->value.ip.tos = fc->fields.ipv4hdr->ip_tos; + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } else if (fc->fields.ipv6hdr) { + match->value.ip.proto = + fc->fields.ipv6hdr->ip6_ctlun.ip6_un1.ip6_un1_nxt; + match->value.ip.tos = 0; /* XXX what goes here? */ + match->value.width = FLOW_KEY_WIDTH(ip.tos); + } +} + +static void of_dpa_eg(OfDpaFlowContext *fc); +static void of_dpa_acl_hit(OfDpaFlowContext *fc, + OfDpaFlow *dst_flow) +{ + of_dpa_eg(fc); +} + +static void of_dpa_acl_action_write(OfDpaFlowContext *fc, + OfDpaFlow *flow) +{ + if (flow->action.write.group_id != ROCKER_GROUP_NONE) { + fc->action_set.write.group_id = flow->action.write.group_id; + } +} + +static void of_dpa_drop(OfDpaFlowContext *fc) +{ + /* drop packet */ +} + +static OfDpaGroup *of_dpa_group_find(OfDpa *of_dpa, + uint32_t group_id) +{ + return g_hash_table_lookup(of_dpa->group_tbl, &group_id); +} + +static int of_dpa_group_add(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_insert(of_dpa->group_tbl, &group->id, group); + + return 0; +} + +#if 0 +static int of_dpa_group_mod(OfDpa *of_dpa, OfDpaGroup *group) +{ + OfDpaGroup *old_group = of_dpa_group_find(of_dpa, group->id); + + if (!old_group) { + return -ENOENT; + } + + /* XXX */ + + return 0; +} +#endif + +static int of_dpa_group_del(OfDpa *of_dpa, OfDpaGroup *group) +{ + g_hash_table_remove(of_dpa->group_tbl, &group->id); + + return 0; +} + +#if 0 +static int of_dpa_group_get_stats(OfDpa *of_dpa, uint32_t id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, id); + + if (!group) { + return -ENOENT; + } + + /* XXX get/return stats */ + + return 0; +} +#endif + +static OfDpaGroup *of_dpa_group_alloc(uint32_t id) +{ + OfDpaGroup *group = g_malloc0(sizeof(OfDpaGroup)); + + if (!group) { + return NULL; + } + + group->id = id; + + return group; +} + +static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + if (group->l2_interface.pop_vlan) { + of_dpa_flow_pkt_strip_vlan(fc); + } + + /* Note: By default, and as per the OpenFlow 1.3.1 + * specification, a packet cannot be forwarded back + * to the IN_PORT from which it came in. An action + * bucket that specifies the particular packet's + * egress port is not evaluated. + */ + + if (group->l2_interface.out_pport == 0) { + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + } else if (group->l2_interface.out_pport != fc->in_pport) { + rocker_port_eg(world_rocker(fc->of_dpa->world), + group->l2_interface.out_pport, + fc->iov, fc->iovcnt); + } +} + +static void of_dpa_output_l2_rewrite(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l2_rewrite.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l2_rewrite.src_mac.a, + group->l2_rewrite.dst_mac.a, + group->l2_rewrite.vlan_id); + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_output_l2_flood(OfDpaFlowContext *fc, + OfDpaGroup *group) +{ + OfDpaGroup *l2_group; + int i; + + for (i = 0; i < group->l2_flood.group_count; i++) { + of_dpa_flow_pkt_hdr_reset(fc); + l2_group = of_dpa_group_find(fc->of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + switch (ROCKER_GROUP_TYPE_GET(l2_group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, l2_group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, l2_group); + break; + } + } +} + +static void of_dpa_output_l3_unicast(OfDpaFlowContext *fc, OfDpaGroup *group) +{ + OfDpaGroup *l2_group = + of_dpa_group_find(fc->of_dpa, group->l3_unicast.group_id); + + if (!l2_group) { + return; + } + + of_dpa_flow_pkt_hdr_rewrite(fc, group->l3_unicast.src_mac.a, + group->l3_unicast.dst_mac.a, + group->l3_unicast.vlan_id); + /* XXX need ttl_check */ + of_dpa_output_l2_interface(fc, l2_group); +} + +static void of_dpa_eg(OfDpaFlowContext *fc) +{ + OfDpaFlowAction *set = &fc->action_set; + OfDpaGroup *group; + uint32_t group_id; + + /* send a copy of pkt to CPU (controller)? */ + + if (set->apply.copy_to_cpu) { + group_id = ROCKER_GROUP_L2_INTERFACE(set->apply.vlan_id, 0); + group = of_dpa_group_find(fc->of_dpa, group_id); + if (group) { + of_dpa_output_l2_interface(fc, group); + of_dpa_flow_pkt_hdr_reset(fc); + } + } + + /* process group write actions */ + + if (!set->write.group_id) { + return; + } + + group = of_dpa_group_find(fc->of_dpa, set->write.group_id); + if (!group) { + return; + } + + switch (ROCKER_GROUP_TYPE_GET(group->id)) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + of_dpa_output_l2_interface(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + of_dpa_output_l2_rewrite(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + of_dpa_output_l2_flood(fc, group); + break; + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + of_dpa_output_l3_unicast(fc, group); + break; + } +} + +typedef struct of_dpa_flow_tbl_ops { + void (*build_match)(OfDpaFlowContext *fc, OfDpaFlowMatch *match); + void (*hit)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*miss)(OfDpaFlowContext *fc); + void (*hit_no_goto)(OfDpaFlowContext *fc); + void (*action_apply)(OfDpaFlowContext *fc, OfDpaFlow *flow); + void (*action_write)(OfDpaFlowContext *fc, OfDpaFlow *flow); +} OfDpaFlowTblOps; + +static OfDpaFlowTblOps of_dpa_tbl_ops[] = { + [ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT] = { + .build_match = of_dpa_ig_port_build_match, + .miss = of_dpa_ig_port_miss, + .hit_no_goto = of_dpa_drop, + }, + [ROCKER_OF_DPA_TABLE_ID_VLAN] = { + .build_match = of_dpa_vlan_build_match, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_vlan_insert, + }, + [ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC] = { + .build_match = of_dpa_term_mac_build_match, + .miss = of_dpa_term_mac_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + }, + [ROCKER_OF_DPA_TABLE_ID_BRIDGING] = { + .build_match = of_dpa_bridging_build_match, + .hit = of_dpa_bridging_learn, + .miss = of_dpa_bridging_miss, + .hit_no_goto = of_dpa_drop, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_bridging_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING] = { + .build_match = of_dpa_unicast_routing_build_match, + .miss = of_dpa_unicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_unicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING] = { + .build_match = of_dpa_multicast_routing_build_match, + .miss = of_dpa_multicast_routing_miss, + .hit_no_goto = of_dpa_drop, + .action_write = of_dpa_multicast_routing_action_write, + }, + [ROCKER_OF_DPA_TABLE_ID_ACL_POLICY] = { + .build_match = of_dpa_acl_build_match, + .hit = of_dpa_acl_hit, + .miss = of_dpa_eg, + .action_apply = of_dpa_apply_actions, + .action_write = of_dpa_acl_action_write, + }, +}; + +static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id) +{ + OfDpaFlowTblOps *ops = &of_dpa_tbl_ops[tbl_id]; + OfDpaFlowMatch match = { { 0, }, }; + OfDpaFlow *flow; + + if (ops->build_match) { + ops->build_match(fc, &match); + } else { + return; + } + + flow = of_dpa_flow_match(fc->of_dpa, &match); + if (!flow) { + if (ops->miss) { + ops->miss(fc); + } + return; + } + + flow->stats.hits++; + + if (ops->action_apply) { + ops->action_apply(fc, flow); + } + + if (ops->action_write) { + ops->action_write(fc, flow); + } + + if (ops->hit) { + ops->hit(fc, flow); + } + + if (flow->action.goto_tbl) { + of_dpa_flow_ig_tbl(fc, flow->action.goto_tbl); + } else if (ops->hit_no_goto) { + ops->hit_no_goto(fc); + } + + /* drop packet */ +} + +static ssize_t of_dpa_ig(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + struct iovec iov_copy[iovcnt + 2]; + OfDpaFlowContext fc = { + .of_dpa = world_private(world), + .in_pport = pport, + .iov = iov_copy, + .iovcnt = iovcnt + 2, + }; + + of_dpa_flow_pkt_parse(&fc, iov, iovcnt); + of_dpa_flow_ig_tbl(&fc, ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT); + + return iov_size(iov, iovcnt); +} + +#define ROCKER_TUNNEL_LPORT 0x00010000 + +static int of_dpa_cmd_add_ig_port(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool overlay_tunnel; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT; + key->width = FLOW_KEY_WIDTH(tbl_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + overlay_tunnel = !!(key->in_pport & ROCKER_TUNNEL_LPORT); + + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (!overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_VLAN) { + return -ROCKER_EINVAL; + } + + if (overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_BRIDGING) { + return -ROCKER_EINVAL; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_vlan(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + uint32_t port; + bool untagged; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + DPRINTF("Must give in_pport and vlan_id to install VLAN tbl entry\n"); + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + DPRINTF("in_pport (%d) not a front-panel port\n", key->in_pport); + return -ROCKER_EINVAL; + } + mask->in_pport = 0xffffffff; + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + if (key->eth.vlan_id) { + untagged = false; /* filtering */ + } else { + untagged = true; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + DPRINTF("Goto tbl (%d) must be TERM_MAC\n", action->goto_tbl); + return -ROCKER_EINVAL; + } + } + + if (untagged) { + if (!flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]) { + DPRINTF("Must specify new vlan_id if untagged\n"); + return -ROCKER_EINVAL; + } + action->apply.new_vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]); + if (1 > ntohs(action->apply.new_vlan_id) || + ntohs(action->apply.new_vlan_id) > 4095) { + DPRINTF("New vlan_id (%d) must be between 1 and 4095\n", + ntohs(action->apply.new_vlan_id)); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_term_mac(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + const MACAddr ipv4_mcast = { .a = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 } }; + const MACAddr ipv4_mask = { .a = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 } }; + const MACAddr ipv6_mcast = { .a = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 } }; + const MACAddr ipv6_mask = { .a = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } }; + uint32_t port; + bool unicast = false; + bool multicast = false; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC] || + !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (!fp_port_from_pport(key->in_pport, &port)) { + return -ROCKER_EINVAL; + } + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type != htons(0x0800) && key->eth.type != htons(0x86dd)) { + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + + if ((key->eth.dst.a[0] & 0x01) == 0x00) { + unicast = true; + } + + /* only two wildcard rules are acceptable for IPv4 and IPv6 multicast */ + if (memcmp(key->eth.dst.a, ipv4_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv4_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + if (memcmp(key->eth.dst.a, ipv6_mcast.a, sizeof(key->eth.dst.a)) == 0 && + memcmp(mask->eth.dst.a, ipv6_mask.a, sizeof(mask->eth.dst.a)) == 0) { + multicast = true; + } + + if (!unicast && !multicast) { + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (unicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) { + return -ROCKER_EINVAL; + } + + if (multicast && + action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_bridging(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + bool unicast = false; + bool dst_mac = false; + bool dst_mac_mask = false; + enum { + BRIDGING_MODE_UNKNOWN, + BRIDGING_MODE_VLAN_UCAST, + BRIDGING_MODE_VLAN_MCAST, + BRIDGING_MODE_VLAN_DFLT, + BRIDGING_MODE_TUNNEL_UCAST, + BRIDGING_MODE_TUNNEL_MCAST, + BRIDGING_MODE_TUNNEL_DFLT, + } mode = BRIDGING_MODE_UNKNOWN; + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + mask->eth.vlan_id = 0xffff; + key->width = FLOW_KEY_WIDTH(eth.vlan_id); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + key->tunnel_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]); + mask->tunnel_id = 0xffffffff; + key->width = FLOW_KEY_WIDTH(tunnel_id); + } + + /* can't do VLAN bridging and tunnel bridging at same time */ + if (key->eth.vlan_id && key->tunnel_id) { + DPRINTF("can't do VLAN bridging and tunnel bridging at same time\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac = true; + unicast = (key->eth.dst.a[0] & 0x01) == 0x00; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + key->width = FLOW_KEY_WIDTH(eth.dst); + dst_mac_mask = true; + } else if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(mask->eth.dst.a, ff_mac.a, sizeof(mask->eth.dst.a)); + } + + if (key->eth.vlan_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_VLAN_UCAST : + BRIDGING_MODE_VLAN_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_VLAN_DFLT; + } + } else if (key->tunnel_id) { + if (dst_mac && !dst_mac_mask) { + mode = unicast ? BRIDGING_MODE_TUNNEL_UCAST : + BRIDGING_MODE_TUNNEL_MCAST; + } else if ((dst_mac && dst_mac_mask) || !dst_mac) { + mode = BRIDGING_MODE_TUNNEL_DFLT; + } + } + + if (mode == BRIDGING_MODE_UNKNOWN) { + DPRINTF("Unknown bridging mode\n"); + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + DPRINTF("Briding goto tbl must be ACL policy\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + switch (mode) { + case BRIDGING_MODE_VLAN_UCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("Bridging mode vlan ucast needs L2 " + "interface group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) { + DPRINTF("Bridging mode vlan mcast needs L2 " + "mcast group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_VLAN_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) { + DPRINTF("Bridging mode vlan dflt needs L2 " + "flood group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_MCAST: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel mcast needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + case BRIDGING_MODE_TUNNEL_DFLT: + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) { + DPRINTF("Bridging mode tunnel dflt needs L2 " + "overlay group (0x%08x)\n", + action->write.group_id); + return -ROCKER_EINVAL; + } + break; + default: + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]) { + action->write.tun_log_lport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]); + if (mode != BRIDGING_MODE_TUNNEL_UCAST) { + DPRINTF("Have tunnel logical port but not " + "in bridging tunnel mode\n"); + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_unicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + UNICAST_ROUTING_MODE_UNKNOWN, + UNICAST_ROUTING_MODE_IPV4, + UNICAST_ROUTING_MODE_IPV6, + } mode = UNICAST_ROUTING_MODE_UNKNOWN; + uint8_t type; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = UNICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = UNICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + mask->eth.type = htons(0xffff); + + switch (mode) { + case UNICAST_ROUTING_MODE_IPV4: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + flow->lpm = of_dpa_mask2prefix(htonl(0xffffffff)); + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]) { + mask->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]); + flow->lpm = of_dpa_mask2prefix(mask->ipv4.addr.dst); + } + break; + case UNICAST_ROUTING_MODE_IPV6: + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]), + sizeof(mask->ipv6.addr.dst)); + } + break; + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + type = ROCKER_GROUP_TYPE_GET(action->write.group_id); + if (type != ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST && + type != ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP) { + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_multicast_routing(OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + MULTICAST_ROUTING_MODE_UNKNOWN, + MULTICAST_ROUTING_MODE_IPV4, + MULTICAST_ROUTING_MODE_IPV6, + } mode = MULTICAST_ROUTING_MODE_UNKNOWN; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] || + !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING; + key->width = FLOW_KEY_WIDTH(ipv6.addr.dst); + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + switch (ntohs(key->eth.type)) { + case 0x0800: + mode = MULTICAST_ROUTING_MODE_IPV4; + break; + case 0x86dd: + mode = MULTICAST_ROUTING_MODE_IPV6; + break; + default: + return -ROCKER_EINVAL; + } + + key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + + switch (mode) { + case MULTICAST_ROUTING_MODE_IPV4: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + key->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]) { + mask->ipv4.addr.src = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) { + if (mask->ipv4.addr.src != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) { + return -ROCKER_EINVAL; + } + + key->ipv4.addr.dst = + rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]); + if (!ipv4_addr_is_multicast(key->ipv4.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + case MULTICAST_ROUTING_MODE_IPV6: + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + memcpy(&key->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]), + sizeof(key->ipv6.addr.src)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]) { + memcpy(&mask->ipv6.addr.src, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]), + sizeof(mask->ipv6.addr.src)); + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) { + if (mask->ipv6.addr.src.addr32[0] != 0 && + mask->ipv6.addr.src.addr32[1] != 0 && + mask->ipv6.addr.src.addr32[2] != 0 && + mask->ipv6.addr.src.addr32[3] != 0) { + return -ROCKER_EINVAL; + } + } + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) { + return -ROCKER_EINVAL; + } + + memcpy(&key->ipv6.addr.dst, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]), + sizeof(key->ipv6.addr.dst)); + if (!ipv6_addr_is_multicast(&key->ipv6.addr.dst)) { + return -ROCKER_EINVAL; + } + + break; + + default: + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) { + action->goto_tbl = + rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]); + if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) { + return -ROCKER_EINVAL; + } + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + if (ROCKER_GROUP_TYPE_GET(action->write.group_id) != + ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST) { + return -ROCKER_EINVAL; + } + action->write.vlan_id = key->eth.vlan_id; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl_ip(OfDpaFlowKey *key, OfDpaFlowKey *mask, + RockerTlv **flow_tlvs) +{ + key->width = FLOW_KEY_WIDTH(ip.tos); + + key->ip.proto = 0; + key->ip.tos = 0; + mask->ip.proto = 0; + mask->ip.tos = 0; + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]) { + key->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]) { + mask->ip.proto = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]) { + key->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]) { + mask->ip.tos = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]); + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) { + key->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) << 6; + } + if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) { + mask->ip.tos |= + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) << 6; + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_acl(OfDpaFlow *flow, RockerTlv **flow_tlvs) +{ + OfDpaFlowKey *key = &flow->key; + OfDpaFlowKey *mask = &flow->mask; + OfDpaFlowAction *action = &flow->action; + enum { + ACL_MODE_UNKNOWN, + ACL_MODE_IPV4_VLAN, + ACL_MODE_IPV6_VLAN, + ACL_MODE_IPV4_TENANT, + ACL_MODE_IPV6_TENANT, + ACL_MODE_NON_IP_VLAN, + ACL_MODE_NON_IP_TENANT, + ACL_MODE_ANY_VLAN, + ACL_MODE_ANY_TENANT, + } mode = ACL_MODE_UNKNOWN; + int err = ROCKER_OK; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] || + !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) { + return -ROCKER_EINVAL; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] && + flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) { + return -ROCKER_EINVAL; + } + + key->tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY; + key->width = FLOW_KEY_WIDTH(eth.type); + + key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]); + if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) { + mask->in_pport = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(key->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(key->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]) { + memcpy(mask->eth.src.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]), + sizeof(mask->eth.src.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(key->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(key->eth.dst.a)); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) { + memcpy(mask->eth.dst.a, + rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]), + sizeof(mask->eth.dst.a)); + } + + key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]); + if (key->eth.type) { + mask->eth.type = 0xffff; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + key->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) { + mask->eth.vlan_id = + rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]); + } + + switch (ntohs(key->eth.type)) { + case 0x0000: + mode = (key->eth.vlan_id) ? ACL_MODE_ANY_VLAN : ACL_MODE_ANY_TENANT; + break; + case 0x0800: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV4_VLAN : ACL_MODE_IPV4_TENANT; + break; + case 0x86dd: + mode = (key->eth.vlan_id) ? ACL_MODE_IPV6_VLAN : ACL_MODE_IPV6_TENANT; + break; + default: + mode = (key->eth.vlan_id) ? ACL_MODE_NON_IP_VLAN : + ACL_MODE_NON_IP_TENANT; + break; + } + + /* XXX only supporting VLAN modes for now */ + if (mode != ACL_MODE_IPV4_VLAN && + mode != ACL_MODE_IPV6_VLAN && + mode != ACL_MODE_NON_IP_VLAN && + mode != ACL_MODE_ANY_VLAN) { + return -ROCKER_EINVAL; + } + + switch (ntohs(key->eth.type)) { + case 0x0800: + case 0x86dd: + err = of_dpa_cmd_add_acl_ip(key, mask, flow_tlvs); + break; + } + + if (err) { + return err; + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + action->write.group_id = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + } + + if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) { + action->apply.copy_to_cpu = + rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_add_mod(OfDpa *of_dpa, OfDpaFlow *flow, + RockerTlv **flow_tlvs) +{ + enum rocker_of_dpa_table_id tbl; + int err = ROCKER_OK; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID] || + !flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY] || + !flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]) { + return -ROCKER_EINVAL; + } + + tbl = rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID]); + flow->priority = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY]); + flow->hardtime = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]); + + if (flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]) { + if (tbl == ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT || + tbl == ROCKER_OF_DPA_TABLE_ID_VLAN || + tbl == ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) { + return -ROCKER_EINVAL; + } + flow->idletime = + rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]); + } + + switch (tbl) { + case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT: + err = of_dpa_cmd_add_ig_port(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_VLAN: + err = of_dpa_cmd_add_vlan(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC: + err = of_dpa_cmd_add_term_mac(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_BRIDGING: + err = of_dpa_cmd_add_bridging(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING: + err = of_dpa_cmd_add_unicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING: + err = of_dpa_cmd_add_multicast_routing(flow, flow_tlvs); + break; + case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY: + err = of_dpa_cmd_add_acl(flow, flow_tlvs); + break; + } + + return err; +} + +static int of_dpa_cmd_flow_add(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + int err = ROCKER_OK; + + if (flow) { + return -ROCKER_EEXIST; + } + + flow = of_dpa_flow_alloc(cookie); + if (!flow) { + return -ROCKER_ENOMEM; + } + + err = of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs); + if (err) { + g_free(flow); + return err; + } + + return of_dpa_flow_add(of_dpa, flow); +} + +static int of_dpa_cmd_flow_mod(OfDpa *of_dpa, uint64_t cookie, + RockerTlv **flow_tlvs) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + return of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs); +} + +static int of_dpa_cmd_flow_del(OfDpa *of_dpa, uint64_t cookie) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + + if (!flow) { + return -ROCKER_ENOENT; + } + + of_dpa_flow_del(of_dpa, flow); + + return ROCKER_OK; +} + +static int of_dpa_cmd_flow_get_stats(OfDpa *of_dpa, uint64_t cookie, + struct desc_info *info, char *buf) +{ + OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie); + size_t tlv_size; + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000; + int pos; + + if (!flow) { + return -ROCKER_ENOENT; + } + + tlv_size = rocker_tlv_total_size(sizeof(uint32_t)) + /* duration */ + rocker_tlv_total_size(sizeof(uint64_t)) + /* rx_pkts */ + rocker_tlv_total_size(sizeof(uint64_t)); /* tx_ptks */ + + if (tlv_size > desc_buf_size(info)) { + return -ROCKER_EMSGSIZE; + } + + pos = 0; + rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, + (int32_t)(now - flow->stats.install_time)); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, + flow->stats.rx_pkts); + rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, + flow->stats.tx_pkts); + + return desc_set_buf(info, tlv_size); +} + +static int of_dpa_flow_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, + RockerTlv **flow_tlvs) +{ + uint64_t cookie; + + if (!flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]) { + return -ROCKER_EINVAL; + } + + cookie = rocker_tlv_get_le64(flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + return of_dpa_cmd_flow_add(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + return of_dpa_cmd_flow_mod(of_dpa, cookie, flow_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + return of_dpa_cmd_flow_del(of_dpa, cookie); + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_cmd_flow_get_stats(of_dpa, cookie, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_add_l2_interface(OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT] || + !group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]) { + return -ROCKER_EINVAL; + } + + group->l2_interface.out_pport = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT]); + group->l2_interface.pop_vlan = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]); + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_rewrite(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_interface_group; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l2_rewrite.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + l2_interface_group = of_dpa_group_find(of_dpa, group->l2_rewrite.group_id); + if (!l2_interface_group || + ROCKER_GROUP_TYPE_GET(l2_interface_group->id) != + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) { + DPRINTF("l2 rewrite group needs a valid l2 interface group\n"); + return -ROCKER_EINVAL; + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l2_rewrite.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l2_rewrite.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l2_rewrite.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l2_rewrite.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l2_rewrite.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + if (ROCKER_GROUP_VLAN_GET(l2_interface_group->id) != + (ntohs(group->l2_rewrite.vlan_id) & VLAN_VID_MASK)) { + DPRINTF("Set VLAN ID must be same as L2 interface group\n"); + return -ROCKER_EINVAL; + } + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + RockerTlv **group_tlvs) +{ + OfDpaGroup *l2_group; + RockerTlv **tlvs; + int err; + int i; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT] || + !group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]) { + return -ROCKER_EINVAL; + } + + group->l2_flood.group_count = + rocker_tlv_get_le16(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT]); + + tlvs = g_malloc0((group->l2_flood.group_count + 1) * + sizeof(RockerTlv *)); + if (!tlvs) { + return -ROCKER_ENOMEM; + } + + g_free(group->l2_flood.group_ids); + group->l2_flood.group_ids = + g_malloc0(group->l2_flood.group_count * sizeof(uint32_t)); + if (!group->l2_flood.group_ids) { + err = -ROCKER_ENOMEM; + goto err_out; + } + + rocker_tlv_parse_nested(tlvs, group->l2_flood.group_count, + group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]); + + for (i = 0; i < group->l2_flood.group_count; i++) { + group->l2_flood.group_ids[i] = rocker_tlv_get_le32(tlvs[i + 1]); + } + + /* All of the L2 interface groups referenced by the L2 flood + * must have same VLAN + */ + + for (i = 0; i < group->l2_flood.group_count; i++) { + l2_group = of_dpa_group_find(of_dpa, group->l2_flood.group_ids[i]); + if (!l2_group) { + continue; + } + if ((ROCKER_GROUP_TYPE_GET(l2_group->id) == + ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) && + (ROCKER_GROUP_VLAN_GET(l2_group->id) != + ROCKER_GROUP_VLAN_GET(group->id))) { + DPRINTF("l2 interface group 0x%08x VLAN doesn't match l2 " + "flood group 0x%08x\n", + group->l2_flood.group_ids[i], group->id); + err = -ROCKER_EINVAL; + goto err_out; + } + } + + g_free(tlvs); + return ROCKER_OK; + +err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); + g_free(tlvs); + + return err; +} + +static int of_dpa_cmd_add_l3_unicast(OfDpaGroup *group, RockerTlv **group_tlvs) +{ + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) { + return -ROCKER_EINVAL; + } + + group->l3_unicast.group_id = + rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]); + + if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) { + memcpy(group->l3_unicast.src_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]), + sizeof(group->l3_unicast.src_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) { + memcpy(group->l3_unicast.dst_mac.a, + rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]), + sizeof(group->l3_unicast.dst_mac.a)); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) { + group->l3_unicast.vlan_id = + rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]); + } + + if (group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]) { + group->l3_unicast.ttl_check = + rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]); + } + + return ROCKER_OK; +} + +static int of_dpa_cmd_group_do(OfDpa *of_dpa, uint32_t group_id, + OfDpaGroup *group, RockerTlv **group_tlvs) +{ + uint8_t type = ROCKER_GROUP_TYPE_GET(group_id); + + switch (type) { + case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE: + return of_dpa_cmd_add_l2_interface(group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE: + return of_dpa_cmd_add_l2_rewrite(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: + /* Treat L2 multicast group same as a L2 flood group */ + case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: + return of_dpa_cmd_add_l2_flood(of_dpa, group, group_tlvs); + case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST: + return of_dpa_cmd_add_l3_unicast(group, group_tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd_group_add(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + int err; + + if (group) { + return -ROCKER_EEXIST; + } + + group = of_dpa_group_alloc(group_id); + if (!group) { + return -ROCKER_ENOMEM; + } + + err = of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); + if (err) { + goto err_cmd_add; + } + + err = of_dpa_group_add(of_dpa, group); + if (err) { + goto err_cmd_add; + } + + return ROCKER_OK; + +err_cmd_add: + g_free(group); + return err; +} + +static int of_dpa_cmd_group_mod(OfDpa *of_dpa, uint32_t group_id, + RockerTlv **group_tlvs) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs); +} + +static int of_dpa_cmd_group_del(OfDpa *of_dpa, uint32_t group_id) +{ + OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id); + + if (!group) { + return -ROCKER_ENOENT; + } + + return of_dpa_group_del(of_dpa, group); +} + +static int of_dpa_cmd_group_get_stats(OfDpa *of_dpa, uint32_t group_id, + struct desc_info *info, char *buf) +{ + return -ROCKER_ENOTSUP; +} + +static int of_dpa_group_cmd(OfDpa *of_dpa, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv **group_tlvs) +{ + uint32_t group_id; + + if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) { + return -ROCKER_EINVAL; + } + + group_id = rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + return of_dpa_cmd_group_add(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + return of_dpa_cmd_group_mod(of_dpa, group_id, group_tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + return of_dpa_cmd_group_del(of_dpa, group_id); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_cmd_group_get_stats(of_dpa, group_id, info, buf); + } + + return -ROCKER_ENOTSUP; +} + +static int of_dpa_cmd(World *world, struct desc_info *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + OfDpa *of_dpa = world_private(world); + RockerTlv *tlvs[ROCKER_TLV_OF_DPA_MAX + 1]; + + rocker_tlv_parse_nested(tlvs, ROCKER_TLV_OF_DPA_MAX, cmd_info_tlv); + + switch (cmd) { + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: + return of_dpa_flow_cmd(of_dpa, info, buf, cmd, tlvs); + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: + case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: + return of_dpa_group_cmd(of_dpa, info, buf, cmd, tlvs); + } + + return -ROCKER_ENOTSUP; +} + +static gboolean rocker_int64_equal(gconstpointer v1, gconstpointer v2) +{ + return *((const uint64_t *)v1) == *((const uint64_t *)v2); +} + +static guint rocker_int64_hash(gconstpointer v) +{ + return (guint)*(const uint64_t *)v; +} + +static int of_dpa_init(World *world) +{ + OfDpa *of_dpa = world_private(world); + + of_dpa->world = world; + + of_dpa->flow_tbl = g_hash_table_new_full(rocker_int64_hash, + rocker_int64_equal, + NULL, g_free); + if (!of_dpa->flow_tbl) { + return -ENOMEM; + } + + of_dpa->group_tbl = g_hash_table_new_full(g_int_hash, g_int_equal, + NULL, g_free); + if (!of_dpa->group_tbl) { + goto err_group_tbl; + } + + /* XXX hardcode some artificial table max values */ + of_dpa->flow_tbl_max_size = 100; + of_dpa->group_tbl_max_size = 100; + + return 0; + +err_group_tbl: + g_hash_table_destroy(of_dpa->flow_tbl); + return -ENOMEM; +} + +static void of_dpa_uninit(World *world) +{ + OfDpa *of_dpa = world_private(world); + + g_hash_table_destroy(of_dpa->group_tbl); + g_hash_table_destroy(of_dpa->flow_tbl); +} + +static WorldOps of_dpa_ops = { + .init = of_dpa_init, + .uninit = of_dpa_uninit, + .ig = of_dpa_ig, + .cmd = of_dpa_cmd, +}; + +World *of_dpa_world_alloc(Rocker *r) +{ + return world_alloc(r, sizeof(OfDpa), ROCKER_WORLD_TYPE_OF_DPA, &of_dpa_ops); +} diff --git a/hw/net/rocker/rocker_of_dpa.h b/hw/net/rocker/rocker_of_dpa.h new file mode 100644 index 0000000..f3f6d77 --- /dev/null +++ b/hw/net/rocker/rocker_of_dpa.h @@ -0,0 +1,22 @@ +/* + * QEMU rocker switch emulation - OF-DPA flow processing support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_OF_DPA_H_ +#define _ROCKER_OF_DPA_H_ + +World *of_dpa_world_alloc(Rocker *r); + +#endif /* _ROCKER_OF_DPA_H_ */ diff --git a/hw/net/rocker/rocker_tlv.h b/hw/net/rocker/rocker_tlv.h new file mode 100644 index 0000000..e3c4ab6 --- /dev/null +++ b/hw/net/rocker/rocker_tlv.h @@ -0,0 +1,244 @@ +/* + * QEMU rocker switch emulation - TLV parsing and composing + * + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_TLV_H_ +#define _ROCKER_TLV_H_ + +#define ROCKER_TLV_ALIGNTO 8U +#define ROCKER_TLV_ALIGN(len) \ + (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1)) +#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(RockerTlv)) + +/* + * <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) ---> + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (RockerTlv) | ing | | ing | + * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+ + * <--------------------------- tlv->len --------------------------> + */ + +static inline RockerTlv *rocker_tlv_next(const RockerTlv *tlv, int *remaining) +{ + int totlen = ROCKER_TLV_ALIGN(le16_to_cpu(tlv->len)); + + *remaining -= totlen; + return (RockerTlv *) ((char *) tlv + totlen); +} + +static inline int rocker_tlv_ok(const RockerTlv *tlv, int remaining) +{ + return remaining >= (int) ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) >= ROCKER_TLV_HDRLEN && + le16_to_cpu(tlv->len) <= remaining; +} + +#define rocker_tlv_for_each(pos, head, len, rem) \ + for (pos = head, rem = len; \ + rocker_tlv_ok(pos, rem); \ + pos = rocker_tlv_next(pos, &(rem))) + +#define rocker_tlv_for_each_nested(pos, tlv, rem) \ + rocker_tlv_for_each(pos, rocker_tlv_data(tlv), rocker_tlv_len(tlv), rem) + +static inline int rocker_tlv_size(int payload) +{ + return ROCKER_TLV_HDRLEN + payload; +} + +static inline int rocker_tlv_total_size(int payload) +{ + return ROCKER_TLV_ALIGN(rocker_tlv_size(payload)); +} + +static inline int rocker_tlv_padlen(int payload) +{ + return rocker_tlv_total_size(payload) - rocker_tlv_size(payload); +} + +static inline int rocker_tlv_type(const RockerTlv *tlv) +{ + return le32_to_cpu(tlv->type); +} + +static inline void *rocker_tlv_data(const RockerTlv *tlv) +{ + return (char *) tlv + ROCKER_TLV_HDRLEN; +} + +static inline int rocker_tlv_len(const RockerTlv *tlv) +{ + return le16_to_cpu(tlv->len) - ROCKER_TLV_HDRLEN; +} + +static inline uint8_t rocker_tlv_get_u8(const RockerTlv *tlv) +{ + return *(uint8_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_u16(const RockerTlv *tlv) +{ + return *(uint16_t *) rocker_tlv_data(tlv); +} + +static inline uint32_t rocker_tlv_get_u32(const RockerTlv *tlv) +{ + return *(uint32_t *) rocker_tlv_data(tlv); +} + +static inline uint64_t rocker_tlv_get_u64(const RockerTlv *tlv) +{ + return *(uint64_t *) rocker_tlv_data(tlv); +} + +static inline uint16_t rocker_tlv_get_le16(const RockerTlv *tlv) +{ + return le16_to_cpup((uint16_t *) rocker_tlv_data(tlv)); +} + +static inline uint32_t rocker_tlv_get_le32(const RockerTlv *tlv) +{ + return le32_to_cpup((uint32_t *) rocker_tlv_data(tlv)); +} + +static inline uint64_t rocker_tlv_get_le64(const RockerTlv *tlv) +{ + return le64_to_cpup((uint64_t *) rocker_tlv_data(tlv)); +} + +static inline void rocker_tlv_parse(RockerTlv **tb, int maxtype, + const char *buf, int buf_len) +{ + const RockerTlv *tlv; + const RockerTlv *head = (const RockerTlv *) buf; + int rem; + + memset(tb, 0, sizeof(RockerTlv *) * (maxtype + 1)); + + rocker_tlv_for_each(tlv, head, buf_len, rem) { + uint32_t type = rocker_tlv_type(tlv); + + if (type > 0 && type <= maxtype) { + tb[type] = (RockerTlv *) tlv; + } + } +} + +static inline void rocker_tlv_parse_nested(RockerTlv **tb, int maxtype, + const RockerTlv *tlv) +{ + rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), rocker_tlv_len(tlv)); +} + +static inline RockerTlv *rocker_tlv_start(char *buf, int buf_pos) +{ + return (RockerTlv *) (buf + buf_pos); +} + +static inline void rocker_tlv_put_iov(char *buf, int *buf_pos, + int type, const struct iovec *iov, + const unsigned int iovcnt) +{ + size_t len = iov_size(iov, iovcnt); + int total_size = rocker_tlv_total_size(len); + RockerTlv *tlv; + + tlv = rocker_tlv_start(buf, *buf_pos); + *buf_pos += total_size; + tlv->type = cpu_to_le32(type); + tlv->len = cpu_to_le16(rocker_tlv_size(len)); + iov_to_buf(iov, iovcnt, 0, rocker_tlv_data(tlv), len); + memset((char *) tlv + le16_to_cpu(tlv->len), 0, rocker_tlv_padlen(len)); +} + +static inline void rocker_tlv_put(char *buf, int *buf_pos, + int type, int len, void *data) +{ + struct iovec iov = { + .iov_base = data, + .iov_len = len, + }; + + rocker_tlv_put_iov(buf, buf_pos, type, &iov, 1); +} + +static inline void rocker_tlv_put_u8(char *buf, int *buf_pos, + int type, uint8_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint8_t), &value); +} + +static inline void rocker_tlv_put_u16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_u32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_u64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline void rocker_tlv_put_le16(char *buf, int *buf_pos, + int type, uint16_t value) +{ + value = cpu_to_le16(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value); +} + +static inline void rocker_tlv_put_le32(char *buf, int *buf_pos, + int type, uint32_t value) +{ + value = cpu_to_le32(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value); +} + +static inline void rocker_tlv_put_le64(char *buf, int *buf_pos, + int type, uint64_t value) +{ + value = cpu_to_le64(value); + rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value); +} + +static inline RockerTlv *rocker_tlv_nest_start(char *buf, int *buf_pos, + int type) +{ + RockerTlv *start = rocker_tlv_start(buf, *buf_pos); + + rocker_tlv_put(buf, buf_pos, type, 0, NULL); + return start; +} + +static inline void rocker_tlv_nest_end(char *buf, int *buf_pos, + RockerTlv *start) +{ + start->len = (char *) rocker_tlv_start(buf, *buf_pos) - (char *) start; +} + +static inline void rocker_tlv_nest_cancel(char *buf, int *buf_pos, + RockerTlv *start) +{ + *buf_pos = (char *) start - buf; +} + +#endif diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c new file mode 100644 index 0000000..b991e87 --- /dev/null +++ b/hw/net/rocker/rocker_world.c @@ -0,0 +1,106 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/iov.h" + +#include "rocker.h" +#include "rocker_world.h" + +struct world { + Rocker *r; + enum rocker_world_type type; + WorldOps *ops; +}; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt) +{ + if (world->ops->ig) { + return world->ops->ig(world, pport, iov, iovcnt); + } + + return iov_size(iov, iovcnt); +} + +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv) +{ + if (world->ops->cmd) { + return world->ops->cmd(world, info, buf, cmd, cmd_info_tlv); + } + + return -ROCKER_ENOTSUP; +} + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops) +{ + World *w = g_malloc0(sizeof(World) + sizeof_private); + + if (w) { + w->r = r; + w->type = type; + w->ops = ops; + if (w->ops->init) { + w->ops->init(w); + } + } + + return w; +} + +void world_free(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + g_free(world); +} + +void world_reset(World *world) +{ + if (world->ops->uninit) { + world->ops->uninit(world); + } + if (world->ops->init) { + world->ops->init(world); + } +} + +void *world_private(World *world) +{ + return world + 1; +} + +Rocker *world_rocker(World *world) +{ + return world->r; +} + +enum rocker_world_type world_type(World *world) +{ + return world->type; +} + +const char *world_name(World *world) +{ + switch (world->type) { + case ROCKER_WORLD_TYPE_OF_DPA: + return "OF_DPA"; + default: + return "unknown"; + } +} diff --git a/hw/net/rocker/rocker_world.h b/hw/net/rocker/rocker_world.h new file mode 100644 index 0000000..18d277b --- /dev/null +++ b/hw/net/rocker/rocker_world.h @@ -0,0 +1,60 @@ +/* + * QEMU rocker switch emulation - switch worlds + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ROCKER_WORLD_H_ +#define _ROCKER_WORLD_H_ + +#include "rocker_hw.h" + +enum rocker_world_type { + ROCKER_WORLD_TYPE_OF_DPA = ROCKER_PORT_MODE_OF_DPA, + ROCKER_WORLD_TYPE_MAX, +}; + +typedef int (world_init)(World *world); +typedef void (world_uninit)(World *world); +typedef ssize_t (world_ig)(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +typedef int (world_cmd)(World *world, DescInfo *info, + char *buf, uint16_t cmd, + RockerTlv *cmd_info_tlv); + +typedef struct world_ops { + world_init *init; + world_uninit *uninit; + world_ig *ig; + world_cmd *cmd; +} WorldOps; + +ssize_t world_ingress(World *world, uint32_t pport, + const struct iovec *iov, int iovcnt); +int world_do_cmd(World *world, DescInfo *info, + char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv); + +World *world_alloc(Rocker *r, size_t sizeof_private, + enum rocker_world_type type, WorldOps *ops); +void world_free(World *world); +void world_reset(World *world); + +void *world_private(World *world); +Rocker *world_rocker(World *world); + +enum rocker_world_type world_type(World *world); +const char *world_name(World *world); + +World *rocker_get_world(Rocker *r, enum rocker_world_type type); + +#endif /* _ROCKER_WORLD_H_ */ diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index cc252ed..3af6faf 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -226,12 +226,6 @@ static void rxfilter_notify(NetClientState *nc) } } -static char *mac_strdup_printf(const uint8_t *mac) -{ - return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", mac[0], - mac[1], mac[2], mac[3], mac[4], mac[5]); -} - static intList *get_vlan_table(VirtIONet *n) { intList *list, *entry; @@ -284,12 +278,12 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) info->multicast_overflow = n->mac_table.multi_overflow; info->unicast_overflow = n->mac_table.uni_overflow; - info->main_mac = mac_strdup_printf(n->mac); + info->main_mac = qemu_mac_strdup_printf(n->mac); str_list = NULL; for (i = 0; i < n->mac_table.first_multi; i++) { entry = g_malloc0(sizeof(*entry)); - entry->value = mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); + entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); entry->next = str_list; str_list = entry; } @@ -298,7 +292,7 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) str_list = NULL; for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { entry = g_malloc0(sizeof(*entry)); - entry->value = mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); + entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); entry->next = str_list; str_list = entry; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index d4ffead..5d050c8 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -88,6 +88,7 @@ #define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003 #define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004 #define PCI_DEVICE_ID_REDHAT_TEST 0x0005 +#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 #define PCI_DEVICE_ID_REDHAT_SDHCI 0x0007 #define PCI_DEVICE_ID_REDHAT_PCIE_HOST 0x0008 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index d7be386..c6de710 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -23,6 +23,7 @@ #define PCI_CLASS_STORAGE_OTHER 0x0180 #define PCI_CLASS_NETWORK_ETHERNET 0x0200 +#define PCI_CLASS_NETWORK_OTHER 0x0280 #define PCI_CLASS_DISPLAY_VGA 0x0300 #define PCI_CLASS_DISPLAY_OTHER 0x0380 diff --git a/include/net/net.h b/include/net/net.h index 50ffcb9..e66ca03 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -97,6 +97,7 @@ typedef struct NICState { bool peer_deleted; } NICState; +char *qemu_mac_strdup_printf(const uint8_t *macaddr); NetClientState *qemu_find_netdev(const char *id); int qemu_find_net_clients_except(const char *id, NetClientState **ncs, NetClientOptionsKind type, int max); @@ -151,6 +151,13 @@ int parse_host_port(struct sockaddr_in *saddr, const char *str) return 0; } +char *qemu_mac_strdup_printf(const uint8_t *macaddr) +{ + return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", + macaddr[0], macaddr[1], macaddr[2], + macaddr[3], macaddr[4], macaddr[5]); +} + void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]) { snprintf(nc->info_str, sizeof(nc->info_str), diff --git a/tests/rocker/README b/tests/rocker/README new file mode 100644 index 0000000..531e673 --- /dev/null +++ b/tests/rocker/README @@ -0,0 +1,5 @@ +Tests require simp (simple network simulator) found here: + +https://github.com/scottfeldman/simp + +Run 'all' to run all tests. diff --git a/tests/rocker/all b/tests/rocker/all new file mode 100755 index 0000000..d5ae963 --- /dev/null +++ b/tests/rocker/all @@ -0,0 +1,19 @@ +echo -n "Running port test... " +./port +if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi + +echo -n "Running bridge test... " +./bridge +if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi + +echo -n "Running bridge STP test... " +./bridge-stp +if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi + +echo -n "Running bridge VLAN test... " +./bridge-vlan +if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi + +echo -n "Running bridge VLAN STP test... " +./bridge-vlan-stp +if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi diff --git a/tests/rocker/bridge b/tests/rocker/bridge new file mode 100755 index 0000000..7a03f9a --- /dev/null +++ b/tests/rocker/bridge @@ -0,0 +1,48 @@ +simp destroy ".*" +simp create -o sw1:rocker:sw1 tut tut.dot +simp start tut +sleep 10 +while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done + +# configure a 2-port bridge + +simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0" + +# turn off vlan default_pvid on br0 + +simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null" + +# turn off learning and flooding in SW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off" + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off" + +# turn on learning in HW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self" + +# bring up bridge and ports + +simp ssh tut sw1 --cmd "sudo ifconfig br0 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp1 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp2 up" +simp ssh tut sw1 --cmd "sudo ifconfig br0 11.0.0.3/24" + +# config IP on hosts + +simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.1/24" +simp ssh tut h2 --cmd "sudo ifconfig swp1 11.0.0.2/24" + +# test... + +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +if [ $? -ne 0 ]; then exit 1; fi +simp ssh tut h1 --cmd "ping -c10 11.0.0.3 >/dev/null" diff --git a/tests/rocker/bridge-stp b/tests/rocker/bridge-stp new file mode 100755 index 0000000..4a111a1 --- /dev/null +++ b/tests/rocker/bridge-stp @@ -0,0 +1,57 @@ +simp destroy ".*" +simp create -o sw1:rocker:sw1 tut tut.dot +simp start tut +sleep 10 +while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done + +# configure a 2-port bridge + +simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge" +simp ssh tut sw1 --cmd "sudo brctl stp br0 on" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0" + +# turn off vlan default_pvid on br0 + +simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null" + +# turn off learning and flooding in SW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off" + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off" + +# turn on learning in HW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self" + +# config IP on hosts + +simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.1/24" +simp ssh tut h2 --cmd "sudo ifconfig swp1 11.0.0.2/24" + +# bring up bridge and ports + +simp ssh tut sw1 --cmd "sudo ifconfig br0 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp1 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp2 up" + +# test... + +simp ssh tut h1 --cmd "ping -w 1 -c1 11.0.0.2 >/dev/null" +if [ $? -eq 0 ]; then exit 1; fi +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan new file mode 100755 index 0000000..9fa3431 --- /dev/null +++ b/tests/rocker/bridge-vlan @@ -0,0 +1,57 @@ +simp destroy ".*" +simp create -o sw1:rocker:sw1 tut tut.dot +simp start tut +sleep 10 +while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done + +# configure a 2-port bridge + +simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0" + +# turn off vlan default_pvid on br0 +# turn on vlan filtering on br0 + +simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null" +simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filtering 2> /dev/null" + +# add both ports to VLAN 57 + +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp1 master" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp2 master" + +# turn off learning and flooding in SW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off" + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off" + +# turn on learning in HW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self" + +# bring up bridge and ports + +simp ssh tut sw1 --cmd "sudo ifconfig br0 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp1 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp2 up" + +# config IP on host VLANs + +simp ssh tut h1 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1" +simp ssh tut h1 --cmd "sudo ifconfig swp1 up" +simp ssh tut h1 --cmd "sudo ifconfig swp1.57 11.0.0.1/24" + +simp ssh tut h2 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1" +simp ssh tut h2 --cmd "sudo ifconfig swp1 up" +simp ssh tut h2 --cmd "sudo ifconfig swp1.57 11.0.0.2/24" + +# test... + +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp new file mode 100755 index 0000000..77ab67e --- /dev/null +++ b/tests/rocker/bridge-vlan-stp @@ -0,0 +1,69 @@ +simp destroy ".*" +simp create -o sw1:rocker:sw1 tut tut.dot +simp start tut +sleep 10 +while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done + +# configure a 2-port bridge + +simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge" +simp ssh tut sw1 --cmd "sudo brctl stp br0 on" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0" +simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0" + +# turn off vlan default_pvid on br0 +# turn on vlan filtering on br0 + +simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null" +simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filtering 2> /dev/null" + +# add both ports to VLAN 57 + +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp1 master" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp2 master" + +# turn off learning and flooding in SW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off" + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off" + +# turn on learning in HW + +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self" + +# config IP on host VLANs + +simp ssh tut h1 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1" +simp ssh tut h1 --cmd "sudo ifconfig swp1 up" +simp ssh tut h1 --cmd "sudo ifconfig swp1.57 11.0.0.1/24" + +simp ssh tut h2 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1" +simp ssh tut h2 --cmd "sudo ifconfig swp1 up" +simp ssh tut h2 --cmd "sudo ifconfig swp1.57 11.0.0.2/24" + +# bring up bridge and ports + +simp ssh tut sw1 --cmd "sudo ifconfig br0 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp1 up" +simp ssh tut sw1 --cmd "sudo ifconfig swp2 up" + +# test... + +simp ssh tut h1 --cmd "ping -w 1 -c1 11.0.0.2 >/dev/null" +if [ $? -eq 0 ]; then exit 1; fi +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" +sleep 10 +simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null" diff --git a/tests/rocker/port b/tests/rocker/port new file mode 100755 index 0000000..3437f7d --- /dev/null +++ b/tests/rocker/port @@ -0,0 +1,22 @@ +simp destroy ".*" +simp create -o sw1:rocker:sw1 tut tut.dot +simp start tut +while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done +while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done + +# bring up DUT ports + +simp ssh tut sw1 --cmd "sudo ifconfig swp1 11.0.0.1/24" +simp ssh tut sw1 --cmd "sudo ifconfig swp2 12.0.0.1/24" + +# config IP on hosts + +simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.2/24" +simp ssh tut h2 --cmd "sudo ifconfig swp1 12.0.0.2/24" + +# test... + +simp ssh tut h1 --cmd "ping -c10 11.0.0.1 >/dev/null" +if [ $? -eq 1 ]; then exit 1; fi +simp ssh tut h2 --cmd "ping -c10 12.0.0.1 >/dev/null" diff --git a/tests/rocker/tut.dot b/tests/rocker/tut.dot new file mode 100644 index 0000000..87f7266 --- /dev/null +++ b/tests/rocker/tut.dot @@ -0,0 +1,8 @@ +graph G { + graph [hostidtype="hostname", version="1:0", date="04/12/2013"]; + edge [dir=none, notify="log"]; + sw1:swp1 -- h1:swp1; + sw1:swp2 -- h2:swp1; + sw1:swp3 -- h3:swp1; + sw1:swp4 -- h4:swp1; +} |