diff options
Diffstat (limited to 'src/net')
-rw-r--r-- | src/net/Makefile.objs | 17 | ||||
-rw-r--r-- | src/net/checksum.c | 121 | ||||
-rw-r--r-- | src/net/clients.h | 65 | ||||
-rw-r--r-- | src/net/dump.c | 362 | ||||
-rw-r--r-- | src/net/eth.c | 217 | ||||
-rw-r--r-- | src/net/filter-buffer.c | 186 | ||||
-rw-r--r-- | src/net/filter.c | 237 | ||||
-rw-r--r-- | src/net/hub.c | 354 | ||||
-rw-r--r-- | src/net/hub.h | 26 | ||||
-rw-r--r-- | src/net/l2tpv3.c | 744 | ||||
-rw-r--r-- | src/net/net.c | 1539 | ||||
-rw-r--r-- | src/net/netmap.c | 463 | ||||
-rw-r--r-- | src/net/queue.c | 278 | ||||
-rw-r--r-- | src/net/slirp.c | 805 | ||||
-rw-r--r-- | src/net/socket.c | 774 | ||||
-rw-r--r-- | src/net/tap-aix.c | 87 | ||||
-rw-r--r-- | src/net/tap-bsd.c | 255 | ||||
-rw-r--r-- | src/net/tap-haiku.c | 86 | ||||
-rw-r--r-- | src/net/tap-linux.c | 314 | ||||
-rw-r--r-- | src/net/tap-linux.h | 54 | ||||
-rw-r--r-- | src/net/tap-solaris.c | 254 | ||||
-rw-r--r-- | src/net/tap-win32.c | 820 | ||||
-rw-r--r-- | src/net/tap.c | 930 | ||||
-rw-r--r-- | src/net/tap_int.h | 49 | ||||
-rw-r--r-- | src/net/util.c | 60 | ||||
-rw-r--r-- | src/net/util.h | 32 | ||||
-rw-r--r-- | src/net/vde.c | 128 | ||||
-rw-r--r-- | src/net/vhost-user.c | 326 |
28 files changed, 9583 insertions, 0 deletions
diff --git a/src/net/Makefile.objs b/src/net/Makefile.objs new file mode 100644 index 0000000..5fa2f97 --- /dev/null +++ b/src/net/Makefile.objs @@ -0,0 +1,17 @@ +common-obj-y = net.o queue.o checksum.o util.o hub.o +common-obj-y += socket.o +common-obj-y += dump.o +common-obj-y += eth.o +common-obj-$(CONFIG_L2TPV3) += l2tpv3.o +common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o +common-obj-$(CONFIG_LINUX) += tap-linux.o +common-obj-$(CONFIG_WIN32) += tap-win32.o +common-obj-$(CONFIG_BSD) += tap-bsd.o +common-obj-$(CONFIG_SOLARIS) += tap-solaris.o +common-obj-$(CONFIG_AIX) += tap-aix.o +common-obj-$(CONFIG_HAIKU) += tap-haiku.o +common-obj-$(CONFIG_SLIRP) += slirp.o +common-obj-$(CONFIG_VDE) += vde.o +common-obj-$(CONFIG_NETMAP) += netmap.o +common-obj-y += filter.o +common-obj-y += filter-buffer.o diff --git a/src/net/checksum.c b/src/net/checksum.c new file mode 100644 index 0000000..0942437 --- /dev/null +++ b/src/net/checksum.c @@ -0,0 +1,121 @@ +/* + * IP checksumming functions. + * (c) 2008 Gerd Hoffmann <kraxel@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 or later of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "net/checksum.h" + +#define PROTO_TCP 6 +#define PROTO_UDP 17 + +uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq) +{ + uint32_t sum = 0; + int i; + + for (i = seq; i < seq + len; i++) { + if (i & 1) { + sum += (uint32_t)buf[i - seq]; + } else { + sum += (uint32_t)buf[i - seq] << 8; + } + } + return sum; +} + +uint16_t net_checksum_finish(uint32_t sum) +{ + while (sum>>16) + sum = (sum & 0xFFFF)+(sum >> 16); + return ~sum; +} + +uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto, + uint8_t *addrs, uint8_t *buf) +{ + uint32_t sum = 0; + + sum += net_checksum_add(length, buf); // payload + sum += net_checksum_add(8, addrs); // src + dst address + sum += proto + length; // protocol & length + return net_checksum_finish(sum); +} + +void net_checksum_calculate(uint8_t *data, int length) +{ + int hlen, plen, proto, csum_offset; + uint16_t csum; + + /* Ensure data has complete L2 & L3 headers. */ + if (length < 14 + 20) { + return; + } + + if ((data[14] & 0xf0) != 0x40) + return; /* not IPv4 */ + hlen = (data[14] & 0x0f) * 4; + plen = (data[16] << 8 | data[17]) - hlen; + proto = data[23]; + + switch (proto) { + case PROTO_TCP: + csum_offset = 16; + break; + case PROTO_UDP: + csum_offset = 6; + break; + default: + return; + } + + if (plen < csum_offset + 2 || 14 + hlen + plen > length) { + return; + } + + data[14+hlen+csum_offset] = 0; + data[14+hlen+csum_offset+1] = 0; + csum = net_checksum_tcpudp(plen, proto, data+14+12, data+14+hlen); + data[14+hlen+csum_offset] = csum >> 8; + data[14+hlen+csum_offset+1] = csum & 0xff; +} + +uint32_t +net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, + uint32_t iov_off, uint32_t size) +{ + size_t iovec_off, buf_off; + unsigned int i; + uint32_t res = 0; + uint32_t seq = 0; + + iovec_off = 0; + buf_off = 0; + for (i = 0; i < iov_cnt && size; i++) { + if (iov_off < (iovec_off + iov[i].iov_len)) { + size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); + void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off); + + res += net_checksum_add_cont(len, chunk_buf, seq); + seq += len; + + buf_off += len; + iov_off += len; + size -= len; + } + iovec_off += iov[i].iov_len; + } + return res; +} diff --git a/src/net/clients.h b/src/net/clients.h new file mode 100644 index 0000000..d47530e --- /dev/null +++ b/src/net/clients.h @@ -0,0 +1,65 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef QEMU_NET_CLIENTS_H +#define QEMU_NET_CLIENTS_H + +#include "net/net.h" +#include "qapi-types.h" + +int net_init_dump(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +#ifdef CONFIG_SLIRP +int net_init_slirp(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); +#endif + +int net_init_hubport(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +int net_init_socket(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +int net_init_tap(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +int net_init_bridge(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +int net_init_l2tpv3(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); +#ifdef CONFIG_VDE +int net_init_vde(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); +#endif + +#ifdef CONFIG_NETMAP +int net_init_netmap(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); +#endif + +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + +#endif /* QEMU_NET_CLIENTS_H */ diff --git a/src/net/dump.c b/src/net/dump.c new file mode 100644 index 0000000..347b5ca --- /dev/null +++ b/src/net/dump.c @@ -0,0 +1,362 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "clients.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/iov.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/visitor.h" +#include "net/filter.h" + +typedef struct DumpState { + int64_t start_ts; + int fd; + int pcap_caplen; +} DumpState; + +#define PCAP_MAGIC 0xa1b2c3d4 + +struct pcap_file_hdr { + uint32_t magic; + uint16_t version_major; + uint16_t version_minor; + int32_t thiszone; + uint32_t sigfigs; + uint32_t snaplen; + uint32_t linktype; +}; + +struct pcap_sf_pkthdr { + struct { + int32_t tv_sec; + int32_t tv_usec; + } ts; + uint32_t caplen; + uint32_t len; +}; + +static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt) +{ + struct pcap_sf_pkthdr hdr; + int64_t ts; + int caplen; + size_t size = iov_size(iov, cnt); + struct iovec dumpiov[cnt + 1]; + + /* Early return in case of previous error. */ + if (s->fd < 0) { + return size; + } + + ts = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL); + caplen = size > s->pcap_caplen ? s->pcap_caplen : size; + + hdr.ts.tv_sec = ts / 1000000 + s->start_ts; + hdr.ts.tv_usec = ts % 1000000; + hdr.caplen = caplen; + hdr.len = size; + + dumpiov[0].iov_base = &hdr; + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen); + + if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { + qemu_log("-net dump write error - stop dump\n"); + close(s->fd); + s->fd = -1; + } + + return size; +} + +static void dump_cleanup(DumpState *s) +{ + close(s->fd); + s->fd = -1; +} + +static int net_dump_state_init(DumpState *s, const char *filename, + int len, Error **errp) +{ + struct pcap_file_hdr hdr; + struct tm tm; + int fd; + + fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, 0644); + if (fd < 0) { + error_setg_errno(errp, errno, "-net dump: can't open %s", filename); + return -1; + } + + hdr.magic = PCAP_MAGIC; + hdr.version_major = 2; + hdr.version_minor = 4; + hdr.thiszone = 0; + hdr.sigfigs = 0; + hdr.snaplen = len; + hdr.linktype = 1; + + if (write(fd, &hdr, sizeof(hdr)) < sizeof(hdr)) { + error_setg_errno(errp, errno, "-net dump write error"); + close(fd); + return -1; + } + + s->fd = fd; + s->pcap_caplen = len; + + qemu_get_timedate(&tm, 0); + s->start_ts = mktime(&tm); + + return 0; +} + +/* Dumping via VLAN netclient */ + +struct DumpNetClient { + NetClientState nc; + DumpState ds; +}; +typedef struct DumpNetClient DumpNetClient; + +static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = size + }; + + return dump_receive_iov(&dc->ds, &iov, 1); +} + +static ssize_t dumpclient_receive_iov(NetClientState *nc, + const struct iovec *iov, int cnt) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + return dump_receive_iov(&dc->ds, iov, cnt); +} + +static void dumpclient_cleanup(NetClientState *nc) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + dump_cleanup(&dc->ds); +} + +static NetClientInfo net_dump_info = { + .type = NET_CLIENT_OPTIONS_KIND_DUMP, + .size = sizeof(DumpNetClient), + .receive = dumpclient_receive, + .receive_iov = dumpclient_receive_iov, + .cleanup = dumpclient_cleanup, +}; + +int net_init_dump(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + int len, rc; + const char *file; + char def_file[128]; + const NetdevDumpOptions *dump; + NetClientState *nc; + DumpNetClient *dnc; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP); + dump = opts->u.dump; + + assert(peer); + + if (dump->has_file) { + file = dump->file; + } else { + int id; + int ret; + + ret = net_hub_id_for_client(peer, &id); + assert(ret == 0); /* peer must be on a hub */ + + snprintf(def_file, sizeof(def_file), "qemu-vlan%d.pcap", id); + file = def_file; + } + + if (dump->has_len) { + if (dump->len > INT_MAX) { + error_setg(errp, "invalid length: %"PRIu64, dump->len); + return -1; + } + len = dump->len; + } else { + len = 65536; + } + + nc = qemu_new_net_client(&net_dump_info, peer, "dump", name); + snprintf(nc->info_str, sizeof(nc->info_str), + "dump to %s (len=%d)", file, len); + + dnc = DO_UPCAST(DumpNetClient, nc, nc); + rc = net_dump_state_init(&dnc->ds, file, len, errp); + if (rc) { + qemu_del_net_client(nc); + } + return rc; +} + +/* Dumping via filter */ + +#define TYPE_FILTER_DUMP "filter-dump" + +#define FILTER_DUMP(obj) \ + OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP) + +struct NetFilterDumpState { + NetFilterState nfs; + DumpState ds; + char *filename; + uint32_t maxlen; +}; +typedef struct NetFilterDumpState NetFilterDumpState; + +static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr, + unsigned flags, const struct iovec *iov, + int iovcnt, NetPacketSent *sent_cb) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_receive_iov(&nfds->ds, iov, iovcnt); + return 0; +} + +static void filter_dump_cleanup(NetFilterState *nf) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_cleanup(&nfds->ds); +} + +static void filter_dump_setup(NetFilterState *nf, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + if (!nfds->filename) { + error_setg(errp, "dump filter needs 'file' property set!"); + return; + } + + net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp); +} + +static void filter_dump_get_maxlen(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + uint32_t value = nfds->maxlen; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_dump_set_maxlen(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (value == 0) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'", + object_get_typename(obj), name, value); + goto out; + } + nfds->maxlen = value; + +out: + error_propagate(errp, local_err); +} + +static char *file_dump_get_filename(Object *obj, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + return g_strdup(nfds->filename); +} + +static void file_dump_set_filename(Object *obj, const char *value, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + g_free(nfds->filename); + nfds->filename = g_strdup(value); +} + +static void filter_dump_instance_init(Object *obj) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + nfds->maxlen = 65536; + + object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen, + filter_dump_set_maxlen, NULL, NULL, NULL); + object_property_add_str(obj, "file", file_dump_get_filename, + file_dump_set_filename, NULL); +} + +static void filter_dump_instance_finalize(Object *obj) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + g_free(nfds->filename); +} + +static void filter_dump_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_dump_setup; + nfc->cleanup = filter_dump_cleanup; + nfc->receive_iov = filter_dump_receive_iov; +} + +static const TypeInfo filter_dump_info = { + .name = TYPE_FILTER_DUMP, + .parent = TYPE_NETFILTER, + .class_init = filter_dump_class_init, + .instance_init = filter_dump_instance_init, + .instance_finalize = filter_dump_instance_finalize, + .instance_size = sizeof(NetFilterDumpState), +}; + +static void filter_dump_register_types(void) +{ + type_register_static(&filter_dump_info); +} + +type_init(filter_dump_register_types); diff --git a/src/net/eth.c b/src/net/eth.c new file mode 100644 index 0000000..7c61132 --- /dev/null +++ b/src/net/eth.c @@ -0,0 +1,217 @@ +/* + * QEMU network structures definitions and helper functions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "net/eth.h" +#include "net/checksum.h" +#include "qemu-common.h" +#include "net/tap.h" + +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, + bool *is_new) +{ + struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); + + switch (be16_to_cpu(ehdr->h_proto)) { + case ETH_P_VLAN: + case ETH_P_DVLAN: + /* vlan hdr exists */ + *is_new = false; + break; + + default: + /* No VLAN header, put a new one */ + vhdr->h_proto = ehdr->h_proto; + ehdr->h_proto = cpu_to_be16(ETH_P_VLAN); + *is_new = true; + break; + } + vhdr->h_tci = cpu_to_be16(vlan_tag); +} + +uint8_t +eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) +{ + uint8_t ecn_state = 0; + + if (l3_proto == ETH_P_IP) { + struct ip_header *iphdr = (struct ip_header *) l3_hdr; + + if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { + if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) { + ecn_state = VIRTIO_NET_HDR_GSO_ECN; + } + if (l4proto == IP_PROTO_TCP) { + return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state; + } else if (l4proto == IP_PROTO_UDP) { + return VIRTIO_NET_HDR_GSO_UDP | ecn_state; + } + } + } else if (l3_proto == ETH_P_IPV6) { + struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr; + + if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) { + ecn_state = VIRTIO_NET_HDR_GSO_ECN; + } + + if (l4proto == IP_PROTO_TCP) { + return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; + } + } + + /* Unsupported offload */ + g_assert_not_reached(); + + return VIRTIO_NET_HDR_GSO_NONE | ecn_state; +} + +void eth_get_protocols(const uint8_t *headers, + uint32_t hdr_length, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp) +{ + int proto; + size_t l2hdr_len = eth_get_l2_hdr_length(headers); + assert(hdr_length >= eth_get_l2_hdr_length(headers)); + *isip4 = *isip6 = *isudp = *istcp = false; + + proto = eth_get_l3_proto(headers, l2hdr_len); + if (proto == ETH_P_IP) { + *isip4 = true; + + struct ip_header *iphdr; + + assert(hdr_length >= + eth_get_l2_hdr_length(headers) + sizeof(struct ip_header)); + + iphdr = PKT_GET_IP_HDR(headers); + + if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { + if (iphdr->ip_p == IP_PROTO_TCP) { + *istcp = true; + } else if (iphdr->ip_p == IP_PROTO_UDP) { + *isudp = true; + } + } + } else if (proto == ETH_P_IPV6) { + uint8_t l4proto; + size_t full_ip6hdr_len; + + struct iovec hdr_vec; + hdr_vec.iov_base = (void *) headers; + hdr_vec.iov_len = hdr_length; + + *isip6 = true; + if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len, + &l4proto, &full_ip6hdr_len)) { + if (l4proto == IP_PROTO_TCP) { + *istcp = true; + } else if (l4proto == IP_PROTO_UDP) { + *isudp = true; + } + } + } +} + +void +eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, + void *l3hdr, size_t l3hdr_len, + size_t l3payload_len, + size_t frag_offset, bool more_frags) +{ + if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) { + uint16_t orig_flags; + struct ip_header *iphdr = (struct ip_header *) l3hdr; + uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; + uint16_t new_ip_off; + + assert(frag_offset % IP_FRAG_UNIT_SIZE == 0); + assert((frag_off_units & ~IP_OFFMASK) == 0); + + orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF); + new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); + iphdr->ip_off = cpu_to_be16(new_ip_off); + iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len); + } +} + +void +eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) +{ + struct ip_header *iphdr = (struct ip_header *) l3hdr; + iphdr->ip_sum = 0; + iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len)); +} + +uint32_t +eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) +{ + struct ip_pseudo_header ipph; + ipph.ip_src = iphdr->ip_src; + ipph.ip_dst = iphdr->ip_dst; + ipph.ip_payload = cpu_to_be16(csl); + ipph.ip_proto = iphdr->ip_p; + ipph.zeros = 0; + return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph); +} + +static bool +eth_is_ip6_extension_header_type(uint8_t hdr_type) +{ + switch (hdr_type) { + case IP6_HOP_BY_HOP: + case IP6_ROUTING: + case IP6_FRAGMENT: + case IP6_ESP: + case IP6_AUTHENTICATION: + case IP6_DESTINATON: + case IP6_MOBILITY: + return true; + default: + return false; + } +} + +bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, uint8_t *l4proto, + size_t *full_hdr_len) +{ + struct ip6_header ip6_hdr; + struct ip6_ext_hdr ext_hdr; + size_t bytes_read; + + bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, + &ip6_hdr, sizeof(ip6_hdr)); + if (bytes_read < sizeof(ip6_hdr)) { + return false; + } + + *full_hdr_len = sizeof(struct ip6_header); + + if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) { + *l4proto = ip6_hdr.ip6_nxt; + return true; + } + + do { + bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len, + &ext_hdr, sizeof(ext_hdr)); + *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; + } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt)); + + *l4proto = ext_hdr.ip6r_nxt; + return true; +} diff --git a/src/net/filter-buffer.c b/src/net/filter-buffer.c new file mode 100644 index 0000000..57be149 --- /dev/null +++ b/src/net/filter-buffer.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "net/filter.h" +#include "net/queue.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/iov.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" + +#define TYPE_FILTER_BUFFER "filter-buffer" + +#define FILTER_BUFFER(obj) \ + OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER) + +typedef struct FilterBufferState { + NetFilterState parent_obj; + + NetQueue *incoming_queue; + uint32_t interval; + QEMUTimer release_timer; +} FilterBufferState; + +static void filter_buffer_flush(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!qemu_net_queue_flush(s->incoming_queue)) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(s->incoming_queue, nf->netdev); + } +} + +static void filter_buffer_release_timer(void *opaque) +{ + NetFilterState *nf = opaque; + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * Note: filter_buffer_flush() drops packets that can't be sent + * TODO: We should leave them queued. But currently there's no way + * for the next filter or receiver to notify us that it can receive + * more packets. + */ + filter_buffer_flush(nf); + /* Timer rearmed to fire again in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); +} + +/* filter APIs */ +static ssize_t filter_buffer_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We return size when buffer a packet, the sender will take it as + * a already sent packet, so sent_cb should not be called later. + * + * FIXME: Even if the guest can't receive packets for some reasons, + * the filter can still accept packets until its internal queue is full. + * For example: + * For some reason, receiver could not receive more packets + * (.can_receive() returns zero). Without a filter, at most one packet + * will be queued in incoming queue and sender's poll will be disabled + * unit its sent_cb() was called. With a filter, it will keep receiving + * the packets without caring about the receiver. This is suboptimal. + * May need more thoughts (e.g keeping sent_cb). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, flags, + iov, iovcnt, NULL); + return iov_size(iov, iovcnt); +} + +static void filter_buffer_cleanup(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_del(&s->release_timer); + } + + /* flush packets */ + if (s->incoming_queue) { + filter_buffer_flush(nf); + g_free(s->incoming_queue); + } +} + +static void filter_buffer_setup(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We may want to accept zero interval when VM FT solutions like MC + * or COLO use this filter to release packets on demand. + */ + if (!s->interval) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval", + "a non-zero interval"); + return; + } + + s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + if (s->interval) { + timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL, + filter_buffer_release_timer, nf); + /* Timer armed to fire in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); + } +} + +static void filter_buffer_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_buffer_setup; + nfc->cleanup = filter_buffer_cleanup; + nfc->receive_iov = filter_buffer_receive_iov; +} + +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + uint32_t value = s->interval; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->interval = value; + +out: + error_propagate(errp, local_err); +} + +static void filter_buffer_init(Object *obj) +{ + object_property_add(obj, "interval", "int", + filter_buffer_get_interval, + filter_buffer_set_interval, NULL, NULL, NULL); +} + +static const TypeInfo filter_buffer_info = { + .name = TYPE_FILTER_BUFFER, + .parent = TYPE_NETFILTER, + .class_init = filter_buffer_class_init, + .instance_init = filter_buffer_init, + .instance_size = sizeof(FilterBufferState), +}; + +static void register_types(void) +{ + type_register_static(&filter_buffer_info); +} + +type_init(register_types); diff --git a/src/net/filter.c b/src/net/filter.c new file mode 100644 index 0000000..f777ba2 --- /dev/null +++ b/src/net/filter.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" + +#include "net/filter.h" +#include "net/net.h" +#include "net/vhost_net.h" +#include "qom/object_interfaces.h" +#include "qemu/iov.h" +#include "qapi/string-output-visitor.h" + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + if (nf->direction == direction || + nf->direction == NET_FILTER_DIRECTION_ALL) { + return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov( + nf, sender, flags, iov, iovcnt, sent_cb); + } + + return 0; +} + +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + int ret = 0; + int direction; + NetFilterState *nf = opaque; + NetFilterState *next = QTAILQ_NEXT(nf, next); + + if (!sender || !sender->peer) { + /* no receiver, or sender been deleted, no need to pass it further */ + goto out; + } + + if (nf->direction == NET_FILTER_DIRECTION_ALL) { + if (sender == nf->netdev) { + /* This packet is sent by netdev itself */ + direction = NET_FILTER_DIRECTION_TX; + } else { + direction = NET_FILTER_DIRECTION_RX; + } + } else { + direction = nf->direction; + } + + while (next) { + /* + * if qemu_netfilter_pass_to_next been called, means that + * the packet has been hold by filter and has already retured size + * to the sender, so sent_cb shouldn't be called later, just + * pass NULL to next. + */ + ret = qemu_netfilter_receive(next, direction, sender, flags, iov, + iovcnt, NULL); + if (ret) { + return ret; + } + next = QTAILQ_NEXT(next, next); + } + + /* + * We have gone through all filters, pass it to receiver. + * Do the valid check again incase sender or receiver been + * deleted while we go through filters. + */ + if (sender && sender->peer) { + qemu_net_queue_send_iov(sender->peer->incoming_queue, + sender, flags, iov, iovcnt, NULL); + } + +out: + /* no receiver, or sender been deleted */ + return iov_size(iov, iovcnt); +} + +static char *netfilter_get_netdev_id(Object *obj, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + return g_strdup(nf->netdev_id); +} + +static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + nf->netdev_id = g_strdup(str); +} + +static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED) +{ + NetFilterState *nf = NETFILTER(obj); + return nf->direction; +} + +static void netfilter_set_direction(Object *obj, int direction, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + nf->direction = direction; +} + +static void netfilter_init(Object *obj) +{ + object_property_add_str(obj, "netdev", + netfilter_get_netdev_id, netfilter_set_netdev_id, + NULL); + object_property_add_enum(obj, "queue", "NetFilterDirection", + NetFilterDirection_lookup, + netfilter_get_direction, netfilter_set_direction, + NULL); +} + +static void netfilter_complete(UserCreatable *uc, Error **errp) +{ + NetFilterState *nf = NETFILTER(uc); + NetClientState *ncs[MAX_QUEUE_NUM]; + NetFilterClass *nfc = NETFILTER_GET_CLASS(uc); + int queues; + Error *local_err = NULL; + char *str, *info; + ObjectProperty *prop; + ObjectPropertyIterator *iter; + StringOutputVisitor *ov; + + if (!nf->netdev_id) { + error_setg(errp, "Parameter 'netdev' is required"); + return; + } + + queues = qemu_find_net_clients_except(nf->netdev_id, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + if (queues < 1) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev", + "a network backend id"); + return; + } else if (queues > 1) { + error_setg(errp, "multiqueue is not supported"); + return; + } + + if (get_vhost_net(ncs[0])) { + error_setg(errp, "Vhost is not supported"); + return; + } + + nf->netdev = ncs[0]; + + if (nfc->setup) { + nfc->setup(nf, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next); + + /* generate info str */ + iter = object_property_iter_init(OBJECT(nf)); + while ((prop = object_property_iter_next(iter))) { + if (!strcmp(prop->name, "type")) { + continue; + } + ov = string_output_visitor_new(false); + object_property_get(OBJECT(nf), string_output_get_visitor(ov), + prop->name, errp); + str = string_output_get_string(ov); + string_output_visitor_cleanup(ov); + info = g_strdup_printf(",%s=%s", prop->name, str); + g_strlcat(nf->info_str, info, sizeof(nf->info_str)); + g_free(str); + g_free(info); + } + object_property_iter_free(iter); +} + +static void netfilter_finalize(Object *obj) +{ + NetFilterState *nf = NETFILTER(obj); + NetFilterClass *nfc = NETFILTER_GET_CLASS(obj); + + if (nfc->cleanup) { + nfc->cleanup(nf); + } + + if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters)) { + QTAILQ_REMOVE(&nf->netdev->filters, nf, next); + } + g_free(nf->netdev_id); +} + +static void netfilter_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = netfilter_complete; +} + +static const TypeInfo netfilter_info = { + .name = TYPE_NETFILTER, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(NetFilterClass), + .class_init = netfilter_class_init, + .instance_size = sizeof(NetFilterState), + .instance_init = netfilter_init, + .instance_finalize = netfilter_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&netfilter_info); +} + +type_init(register_types); diff --git a/src/net/hub.c b/src/net/hub.c new file mode 100644 index 0000000..9ae9f01 --- /dev/null +++ b/src/net/hub.c @@ -0,0 +1,354 @@ +/* + * Hub net client + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Zhi Yong Wu <wuzhy@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "monitor/monitor.h" +#include "net/net.h" +#include "clients.h" +#include "hub.h" +#include "qemu/iov.h" + +/* + * A hub broadcasts incoming packets to all its ports except the source port. + * Hubs can be used to provide independent network segments, also confusingly + * named the QEMU 'vlan' feature. + */ + +typedef struct NetHub NetHub; + +typedef struct NetHubPort { + NetClientState nc; + QLIST_ENTRY(NetHubPort) next; + NetHub *hub; + int id; +} NetHubPort; + +struct NetHub { + int id; + QLIST_ENTRY(NetHub) next; + int num_ports; + QLIST_HEAD(, NetHubPort) ports; +}; + +static QLIST_HEAD(, NetHub) hubs = QLIST_HEAD_INITIALIZER(&hubs); + +static ssize_t net_hub_receive(NetHub *hub, NetHubPort *source_port, + const uint8_t *buf, size_t len) +{ + NetHubPort *port; + + QLIST_FOREACH(port, &hub->ports, next) { + if (port == source_port) { + continue; + } + + qemu_send_packet(&port->nc, buf, len); + } + return len; +} + +static ssize_t net_hub_receive_iov(NetHub *hub, NetHubPort *source_port, + const struct iovec *iov, int iovcnt) +{ + NetHubPort *port; + ssize_t len = iov_size(iov, iovcnt); + + QLIST_FOREACH(port, &hub->ports, next) { + if (port == source_port) { + continue; + } + + qemu_sendv_packet(&port->nc, iov, iovcnt); + } + return len; +} + +static NetHub *net_hub_new(int id) +{ + NetHub *hub; + + hub = g_malloc(sizeof(*hub)); + hub->id = id; + hub->num_ports = 0; + QLIST_INIT(&hub->ports); + + QLIST_INSERT_HEAD(&hubs, hub, next); + + return hub; +} + +static int net_hub_port_can_receive(NetClientState *nc) +{ + NetHubPort *port; + NetHubPort *src_port = DO_UPCAST(NetHubPort, nc, nc); + NetHub *hub = src_port->hub; + + QLIST_FOREACH(port, &hub->ports, next) { + if (port == src_port) { + continue; + } + + if (qemu_can_send_packet(&port->nc)) { + return 1; + } + } + + return 0; +} + +static ssize_t net_hub_port_receive(NetClientState *nc, + const uint8_t *buf, size_t len) +{ + NetHubPort *port = DO_UPCAST(NetHubPort, nc, nc); + + return net_hub_receive(port->hub, port, buf, len); +} + +static ssize_t net_hub_port_receive_iov(NetClientState *nc, + const struct iovec *iov, int iovcnt) +{ + NetHubPort *port = DO_UPCAST(NetHubPort, nc, nc); + + return net_hub_receive_iov(port->hub, port, iov, iovcnt); +} + +static void net_hub_port_cleanup(NetClientState *nc) +{ + NetHubPort *port = DO_UPCAST(NetHubPort, nc, nc); + + QLIST_REMOVE(port, next); +} + +static NetClientInfo net_hub_port_info = { + .type = NET_CLIENT_OPTIONS_KIND_HUBPORT, + .size = sizeof(NetHubPort), + .can_receive = net_hub_port_can_receive, + .receive = net_hub_port_receive, + .receive_iov = net_hub_port_receive_iov, + .cleanup = net_hub_port_cleanup, +}; + +static NetHubPort *net_hub_port_new(NetHub *hub, const char *name) +{ + NetClientState *nc; + NetHubPort *port; + int id = hub->num_ports++; + char default_name[128]; + + if (!name) { + snprintf(default_name, sizeof(default_name), + "hub%dport%d", hub->id, id); + name = default_name; + } + + nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name); + port = DO_UPCAST(NetHubPort, nc, nc); + port->id = id; + port->hub = hub; + + QLIST_INSERT_HEAD(&hub->ports, port, next); + + return port; +} + +/** + * Create a port on a given hub + * @name: Net client name or NULL for default name. + * + * If there is no existing hub with the given id then a new hub is created. + */ +NetClientState *net_hub_add_port(int hub_id, const char *name) +{ + NetHub *hub; + NetHubPort *port; + + QLIST_FOREACH(hub, &hubs, next) { + if (hub->id == hub_id) { + break; + } + } + + if (!hub) { + hub = net_hub_new(hub_id); + } + + port = net_hub_port_new(hub, name); + return &port->nc; +} + +/** + * Find a specific client on a hub + */ +NetClientState *net_hub_find_client_by_name(int hub_id, const char *name) +{ + NetHub *hub; + NetHubPort *port; + NetClientState *peer; + + QLIST_FOREACH(hub, &hubs, next) { + if (hub->id == hub_id) { + QLIST_FOREACH(port, &hub->ports, next) { + peer = port->nc.peer; + + if (peer && strcmp(peer->name, name) == 0) { + return peer; + } + } + } + } + return NULL; +} + +/** + * Find a available port on a hub; otherwise create one new port + */ +NetClientState *net_hub_port_find(int hub_id) +{ + NetHub *hub; + NetHubPort *port; + NetClientState *nc; + + QLIST_FOREACH(hub, &hubs, next) { + if (hub->id == hub_id) { + QLIST_FOREACH(port, &hub->ports, next) { + nc = port->nc.peer; + if (!nc) { + return &(port->nc); + } + } + break; + } + } + + nc = net_hub_add_port(hub_id, NULL); + return nc; +} + +/** + * Print hub configuration + */ +void net_hub_info(Monitor *mon) +{ + NetHub *hub; + NetHubPort *port; + + QLIST_FOREACH(hub, &hubs, next) { + monitor_printf(mon, "hub %d\n", hub->id); + QLIST_FOREACH(port, &hub->ports, next) { + monitor_printf(mon, " \\ %s", port->nc.name); + if (port->nc.peer) { + monitor_printf(mon, ": "); + print_net_client(mon, port->nc.peer); + } else { + monitor_printf(mon, "\n"); + } + } + } +} + +/** + * Get the hub id that a client is connected to + * + * @id: Pointer for hub id output, may be NULL + */ +int net_hub_id_for_client(NetClientState *nc, int *id) +{ + NetHubPort *port; + + if (nc->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + port = DO_UPCAST(NetHubPort, nc, nc); + } else if (nc->peer != NULL && nc->peer->info->type == + NET_CLIENT_OPTIONS_KIND_HUBPORT) { + port = DO_UPCAST(NetHubPort, nc, nc->peer); + } else { + return -ENOENT; + } + + if (id) { + *id = port->hub->id; + } + return 0; +} + +int net_init_hubport(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + const NetdevHubPortOptions *hubport; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT); + assert(!peer); + hubport = opts->u.hubport; + + net_hub_add_port(hubport->hubid, name); + return 0; +} + +/** + * Warn if hub configurations are likely wrong + */ +void net_hub_check_clients(void) +{ + NetHub *hub; + NetHubPort *port; + NetClientState *peer; + + QLIST_FOREACH(hub, &hubs, next) { + int has_nic = 0, has_host_dev = 0; + + QLIST_FOREACH(port, &hub->ports, next) { + peer = port->nc.peer; + if (!peer) { + fprintf(stderr, "Warning: hub port %s has no peer\n", + port->nc.name); + continue; + } + + switch (peer->info->type) { + case NET_CLIENT_OPTIONS_KIND_NIC: + has_nic = 1; + break; + case NET_CLIENT_OPTIONS_KIND_USER: + case NET_CLIENT_OPTIONS_KIND_TAP: + case NET_CLIENT_OPTIONS_KIND_SOCKET: + case NET_CLIENT_OPTIONS_KIND_VDE: + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + has_host_dev = 1; + break; + default: + break; + } + } + if (has_host_dev && !has_nic) { + fprintf(stderr, "Warning: vlan %d with no nics\n", hub->id); + } + if (has_nic && !has_host_dev) { + fprintf(stderr, + "Warning: vlan %d is not connected to host network\n", + hub->id); + } + } +} + +bool net_hub_flush(NetClientState *nc) +{ + NetHubPort *port; + NetHubPort *source_port = DO_UPCAST(NetHubPort, nc, nc); + int ret = 0; + + QLIST_FOREACH(port, &source_port->hub->ports, next) { + if (port != source_port) { + ret += qemu_net_queue_flush(port->nc.incoming_queue); + } + } + return ret ? true : false; +} diff --git a/src/net/hub.h b/src/net/hub.h new file mode 100644 index 0000000..a625eff --- /dev/null +++ b/src/net/hub.h @@ -0,0 +1,26 @@ +/* + * Hub net client + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Zhi Yong Wu <wuzhy@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef NET_HUB_H +#define NET_HUB_H + +#include "qemu-common.h" + +NetClientState *net_hub_add_port(int hub_id, const char *name); +NetClientState *net_hub_find_client_by_name(int hub_id, const char *name); +void net_hub_info(Monitor *mon); +void net_hub_check_clients(void); +bool net_hub_flush(NetClientState *nc); + +#endif /* NET_HUB_H */ diff --git a/src/net/l2tpv3.c b/src/net/l2tpv3.c new file mode 100644 index 0000000..8e68e54 --- /dev/null +++ b/src/net/l2tpv3.c @@ -0,0 +1,744 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2012-2014 Cisco Systems + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/ip.h> +#include <netdb.h> +#include "config-host.h" +#include "net/net.h" +#include "clients.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + + +/* The buffer size needs to be investigated for optimum numbers and + * optimum means of paging in on different systems. This size is + * chosen to be sufficient to accommodate one packet with some headers + */ + +#define BUFFER_ALIGN sysconf(_SC_PAGESIZE) +#define BUFFER_SIZE 2048 +#define IOVSIZE 2 +#define MAX_L2TPV3_MSGCNT 64 +#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) + +/* Header set to 0x30000 signifies a data packet */ + +#define L2TPV3_DATA_PACKET 0x30000 + +/* IANA-assigned IP protocol ID for L2TPv3 */ + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +typedef struct NetL2TPV3State { + NetClientState nc; + int fd; + + /* + * these are used for xmit - that happens packet a time + * and for first sign of life packet (easier to parse that once) + */ + + uint8_t *header_buf; + struct iovec *vec; + + /* + * these are used for receive - try to "eat" up to 32 packets at a time + */ + + struct mmsghdr *msgvec; + + /* + * peer address + */ + + struct sockaddr_storage *dgram_dst; + uint32_t dst_size; + + /* + * L2TPv3 parameters + */ + + uint64_t rx_cookie; + uint64_t tx_cookie; + uint32_t rx_session; + uint32_t tx_session; + uint32_t header_size; + uint32_t counter; + + /* + * DOS avoidance in error handling + */ + + bool header_mismatch; + + /* + * Ring buffer handling + */ + + int queue_head; + int queue_tail; + int queue_depth; + + /* + * Precomputed offsets + */ + + uint32_t offset; + uint32_t cookie_offset; + uint32_t counter_offset; + uint32_t session_offset; + + /* Poll Control */ + + bool read_poll; + bool write_poll; + + /* Flags */ + + bool ipv6; + bool udp; + bool has_counter; + bool pin_counter; + bool cookie; + bool cookie_is_64; + +} NetL2TPV3State; + +static void net_l2tpv3_send(void *opaque); +static void l2tpv3_writable(void *opaque); + +static void l2tpv3_update_fd_handler(NetL2TPV3State *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll ? net_l2tpv3_send : NULL, + s->write_poll ? l2tpv3_writable : NULL, + s); +} + +static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) +{ + if (s->read_poll != enable) { + s->read_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) +{ + if (s->write_poll != enable) { + s->write_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_writable(void *opaque) +{ + NetL2TPV3State *s = opaque; + l2tpv3_write_poll(s, false); + qemu_flush_queued_packets(&s->nc); +} + +static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_read_poll(s, true); +} + +static void l2tpv3_poll(NetClientState *nc, bool enable) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_write_poll(s, enable); + l2tpv3_read_poll(s, enable); +} + +static void l2tpv3_form_header(NetL2TPV3State *s) +{ + uint32_t *counter; + + if (s->udp) { + stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); + } + stl_be_p( + (uint32_t *) (s->header_buf + s->session_offset), + s->tx_session + ); + if (s->cookie) { + if (s->cookie_is_64) { + stq_be_p( + (uint64_t *)(s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } else { + stl_be_p( + (uint32_t *) (s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } + } + if (s->has_counter) { + counter = (uint32_t *)(s->header_buf + s->counter_offset); + if (s->pin_counter) { + *counter = 0; + } else { + stl_be_p(counter, ++s->counter); + } + } +} + +static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, + const struct iovec *iov, + int iovcnt) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct msghdr message; + int ret; + + if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { + error_report( + "iovec too long %d > %d, change l2tpv3.h", + iovcnt, MAX_L2TPV3_IOVCNT + ); + return -1; + } + l2tpv3_form_header(s); + memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); + s->vec->iov_base = s->header_buf; + s->vec->iov_len = s->offset; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = iovcnt + 1; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = iov_size(iov, iovcnt); + } else { + /* signal upper layer that socket buffer is full */ + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct iovec *vec; + struct msghdr message; + ssize_t ret = 0; + + l2tpv3_form_header(s); + vec = s->vec; + vec->iov_base = s->header_buf; + vec->iov_len = s->offset; + vec++; + vec->iov_base = (void *) buf; + vec->iov_len = size; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = 2; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = size; + } else { + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + /* signal upper layer that socket buffer is full */ + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) +{ + + uint32_t *session; + uint64_t cookie; + + if ((!s->udp) && (!s->ipv6)) { + buf += sizeof(struct iphdr) /* fix for ipv4 raw */; + } + + /* we do not do a strict check for "data" packets as per + * the RFC spec because the pure IP spec does not have + * that anyway. + */ + + if (s->cookie) { + if (s->cookie_is_64) { + cookie = ldq_be_p(buf + s->cookie_offset); + } else { + cookie = ldl_be_p(buf + s->cookie_offset); + } + if (cookie != s->rx_cookie) { + if (!s->header_mismatch) { + error_report("unknown cookie id"); + } + return -1; + } + } + session = (uint32_t *) (buf + s->session_offset); + if (ldl_be_p(session) != s->rx_session) { + if (!s->header_mismatch) { + error_report("session mismatch"); + } + return -1; + } + return 0; +} + +static void net_l2tpv3_process_queue(NetL2TPV3State *s) +{ + int size = 0; + struct iovec *vec; + bool bad_read; + int data_size; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + if (s->queue_depth > 0) { + do { + msgvec = s->msgvec + s->queue_tail; + if (msgvec->msg_len > 0) { + data_size = msgvec->msg_len - s->header_size; + vec = msgvec->msg_hdr.msg_iov; + if ((data_size > 0) && + (l2tpv3_verify_header(s, vec->iov_base) == 0)) { + vec++; + /* Use the legacy delivery for now, we will + * switch to using our own ring as a queueing mechanism + * at a later date + */ + size = qemu_send_packet_async( + &s->nc, + vec->iov_base, + data_size, + l2tpv3_send_completed + ); + if (size == 0) { + l2tpv3_read_poll(s, false); + } + bad_read = false; + } else { + bad_read = true; + if (!s->header_mismatch) { + /* report error only once */ + error_report("l2tpv3 header verification failed"); + s->header_mismatch = true; + } + } + } else { + bad_read = true; + } + s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; + s->queue_depth--; + } while ( + (s->queue_depth > 0) && + qemu_can_send_packet(&s->nc) && + ((size > 0) || bad_read) + ); + } +} + +static void net_l2tpv3_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + int target_count, count; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + + if (s->queue_depth) { + + /* The ring buffer we use has variable intake + * count of how much we can read varies - adjust accordingly + */ + + target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; + + /* Ensure we do not overrun the ring when we have + * a lot of enqueued packets + */ + + if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { + target_count = MAX_L2TPV3_MSGCNT - s->queue_head; + } + } else { + + /* we do not have any pending packets - we can use + * the whole message vector linearly instead of using + * it as a ring + */ + + s->queue_head = 0; + s->queue_tail = 0; + target_count = MAX_L2TPV3_MSGCNT; + } + + msgvec = s->msgvec + s->queue_head; + if (target_count > 0) { + do { + count = recvmmsg( + s->fd, + msgvec, + target_count, MSG_DONTWAIT, NULL); + } while ((count == -1) && (errno == EINTR)); + if (count < 0) { + /* Recv error - we still need to flush packets here, + * (re)set queue head to current position + */ + count = 0; + } + s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; + s->queue_depth += count; + } + net_l2tpv3_process_queue(s); +} + +static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) +{ + int i, j; + struct iovec *iov; + struct mmsghdr *cleanup = msgvec; + if (cleanup) { + for (i = 0; i < count; i++) { + if (cleanup->msg_hdr.msg_iov) { + iov = cleanup->msg_hdr.msg_iov; + for (j = 0; j < iovcount; j++) { + g_free(iov->iov_base); + iov++; + } + g_free(cleanup->msg_hdr.msg_iov); + } + cleanup++; + } + g_free(msgvec); + } +} + +static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) +{ + int i; + struct iovec *iov; + struct mmsghdr *msgvec, *result; + + msgvec = g_new(struct mmsghdr, count); + result = msgvec; + for (i = 0; i < count ; i++) { + msgvec->msg_hdr.msg_name = NULL; + msgvec->msg_hdr.msg_namelen = 0; + iov = g_new(struct iovec, IOVSIZE); + msgvec->msg_hdr.msg_iov = iov; + iov->iov_base = g_malloc(s->header_size); + iov->iov_len = s->header_size; + iov++ ; + iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); + iov->iov_len = BUFFER_SIZE; + msgvec->msg_hdr.msg_iovlen = 2; + msgvec->msg_hdr.msg_control = NULL; + msgvec->msg_hdr.msg_controllen = 0; + msgvec->msg_hdr.msg_flags = 0; + msgvec++; + } + return result; +} + +static void net_l2tpv3_cleanup(NetClientState *nc) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + qemu_purge_queued_packets(nc); + l2tpv3_read_poll(s, false); + l2tpv3_write_poll(s, false); + if (s->fd >= 0) { + close(s->fd); + } + destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); + g_free(s->vec); + g_free(s->header_buf); + g_free(s->dgram_dst); +} + +static NetClientInfo net_l2tpv3_info = { + .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, + .size = sizeof(NetL2TPV3State), + .receive = net_l2tpv3_receive_dgram, + .receive_iov = net_l2tpv3_receive_dgram_iov, + .poll = l2tpv3_poll, + .cleanup = net_l2tpv3_cleanup, +}; + +int net_init_l2tpv3(const NetClientOptions *opts, + const char *name, + NetClientState *peer, Error **errp) +{ + /* FIXME error_setg(errp, ...) on failure */ + const NetdevL2TPv3Options *l2tpv3; + NetL2TPV3State *s; + NetClientState *nc; + int fd = -1, gairet; + struct addrinfo hints; + struct addrinfo *result = NULL; + char *srcport, *dstport; + + nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); + + s = DO_UPCAST(NetL2TPV3State, nc, nc); + + s->queue_head = 0; + s->queue_tail = 0; + s->header_mismatch = false; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->u.l2tpv3; + + if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { + s->ipv6 = l2tpv3->ipv6; + } else { + s->ipv6 = false; + } + + if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { + error_report("l2tpv3_open : offset must be less than 256 bytes"); + goto outerr; + } + + if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { + if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { + s->cookie = true; + } else { + goto outerr; + } + } else { + s->cookie = false; + } + + if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { + s->cookie_is_64 = true; + } else { + s->cookie_is_64 = false; + } + + if (l2tpv3->has_udp && l2tpv3->udp) { + s->udp = true; + if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { + error_report("l2tpv3_open : need both src and dst port for udp"); + goto outerr; + } else { + srcport = l2tpv3->srcport; + dstport = l2tpv3->dstport; + } + } else { + s->udp = false; + srcport = NULL; + dstport = NULL; + } + + + s->offset = 4; + s->session_offset = 0; + s->cookie_offset = 4; + s->counter_offset = 4; + + s->tx_session = l2tpv3->txsession; + if (l2tpv3->has_rxsession) { + s->rx_session = l2tpv3->rxsession; + } else { + s->rx_session = s->tx_session; + } + + if (s->cookie) { + s->rx_cookie = l2tpv3->rxcookie; + s->tx_cookie = l2tpv3->txcookie; + if (s->cookie_is_64 == true) { + /* 64 bit cookie */ + s->offset += 8; + s->counter_offset += 8; + } else { + /* 32 bit cookie */ + s->offset += 4; + s->counter_offset += 4; + } + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + s->offset += 4; + s->counter_offset += 4; + s->session_offset += 4; + s->cookie_offset += 4; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); + + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve src, errno = %s", + gai_strerror(gairet) + ); + goto outerr; + } + fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); + if (fd == -1) { + fd = -errno; + error_report("l2tpv3_open : socket creation failed, errno = %d", -fd); + goto outerr; + } + if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { + error_report("l2tpv3_open : could not bind socket err=%i", errno); + goto outerr; + } + if (result) { + freeaddrinfo(result); + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + result = NULL; + gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve dst, error = %s", + gai_strerror(gairet) + ); + goto outerr; + } + + s->dgram_dst = g_new0(struct sockaddr_storage, 1); + memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); + s->dst_size = result->ai_addrlen; + + if (result) { + freeaddrinfo(result); + } + + if (l2tpv3->has_counter && l2tpv3->counter) { + s->has_counter = true; + s->offset += 4; + } else { + s->has_counter = false; + } + + if (l2tpv3->has_pincounter && l2tpv3->pincounter) { + s->has_counter = true; /* pin counter implies that there is counter */ + s->pin_counter = true; + } else { + s->pin_counter = false; + } + + if (l2tpv3->has_offset) { + /* extra offset */ + s->offset += l2tpv3->offset; + } + + if ((s->ipv6) || (s->udp)) { + s->header_size = s->offset; + } else { + s->header_size = s->offset + sizeof(struct iphdr); + } + + s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); + s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); + s->header_buf = g_malloc(s->header_size); + + qemu_set_nonblock(fd); + + s->fd = fd; + s->counter = 0; + + l2tpv3_read_poll(s, true); + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "l2tpv3: connected"); + return 0; +outerr: + qemu_del_net_client(nc); + if (fd >= 0) { + close(fd); + } + if (result) { + freeaddrinfo(result); + } + return -1; +} + diff --git a/src/net/net.c b/src/net/net.c new file mode 100644 index 0000000..ade6051 --- /dev/null +++ b/src/net/net.c @@ -0,0 +1,1539 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "config-host.h" + +#include "net/net.h" +#include "clients.h" +#include "hub.h" +#include "net/slirp.h" +#include "net/eth.h" +#include "util.h" + +#include "monitor/monitor.h" +#include "qemu-common.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" +#include "qemu/config-file.h" +#include "qmp-commands.h" +#include "hw/qdev.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qapi-visit.h" +#include "qapi/opts-visitor.h" +#include "qapi/dealloc-visitor.h" +#include "sysemu/sysemu.h" +#include "net/filter.h" + +/* Net bridge is currently not supported for W32. */ +#if !defined(_WIN32) +# define CONFIG_NET_BRIDGE +#endif + +static VMChangeStateEntry *net_change_state_entry; +static QTAILQ_HEAD(, NetClientState) net_clients; + +const char *host_net_devices[] = { + "tap", + "socket", + "dump", +#ifdef CONFIG_NET_BRIDGE + "bridge", +#endif +#ifdef CONFIG_NETMAP + "netmap", +#endif +#ifdef CONFIG_SLIRP + "user", +#endif +#ifdef CONFIG_VDE + "vde", +#endif + "vhost-user", + NULL, +}; + +int default_net = 1; + +/***********************************************************/ +/* network device redirectors */ + +#if defined(DEBUG_NET) +static void hex_dump(FILE *f, const uint8_t *buf, int size) +{ + int len, i, j, c; + + for(i=0;i<size;i+=16) { + len = size - i; + if (len > 16) + len = 16; + fprintf(f, "%08x ", i); + for(j=0;j<16;j++) { + if (j < len) + fprintf(f, " %02x", buf[i+j]); + else + fprintf(f, " "); + } + fprintf(f, " "); + for(j=0;j<len;j++) { + c = buf[i+j]; + if (c < ' ' || c > '~') + c = '.'; + fprintf(f, "%c", c); + } + fprintf(f, "\n"); + } +} +#endif + +static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) +{ + const char *p, *p1; + int len; + p = *pp; + p1 = strchr(p, sep); + if (!p1) + return -1; + len = p1 - p; + p1++; + if (buf_size > 0) { + if (len > buf_size - 1) + len = buf_size - 1; + memcpy(buf, p, len); + buf[len] = '\0'; + } + *pp = p1; + return 0; +} + +int parse_host_port(struct sockaddr_in *saddr, const char *str) +{ + char buf[512]; + struct hostent *he; + const char *p, *r; + int port; + + p = str; + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) + return -1; + saddr->sin_family = AF_INET; + if (buf[0] == '\0') { + saddr->sin_addr.s_addr = 0; + } else { + if (qemu_isdigit(buf[0])) { + if (!inet_aton(buf, &saddr->sin_addr)) + return -1; + } else { + if ((he = gethostbyname(buf)) == NULL) + return - 1; + saddr->sin_addr = *(struct in_addr *)he->h_addr; + } + } + port = strtol(p, (char **)&r, 0); + if (r == p) + return -1; + saddr->sin_port = htons(port); + return 0; +} + +char *qemu_mac_strdup_printf(const uint8_t *macaddr) +{ + return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", + macaddr[0], macaddr[1], macaddr[2], + macaddr[3], macaddr[4], macaddr[5]); +} + +void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]) +{ + snprintf(nc->info_str, sizeof(nc->info_str), + "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x", + nc->model, + macaddr[0], macaddr[1], macaddr[2], + macaddr[3], macaddr[4], macaddr[5]); +} + +static int mac_table[256] = {0}; + +static void qemu_macaddr_set_used(MACAddr *macaddr) +{ + int index; + + for (index = 0x56; index < 0xFF; index++) { + if (macaddr->a[5] == index) { + mac_table[index]++; + } + } +} + +static void qemu_macaddr_set_free(MACAddr *macaddr) +{ + int index; + static const MACAddr base = { .a = { 0x52, 0x54, 0x00, 0x12, 0x34, 0 } }; + + if (memcmp(macaddr->a, &base.a, (sizeof(base.a) - 1)) != 0) { + return; + } + for (index = 0x56; index < 0xFF; index++) { + if (macaddr->a[5] == index) { + mac_table[index]--; + } + } +} + +static int qemu_macaddr_get_free(void) +{ + int index; + + for (index = 0x56; index < 0xFF; index++) { + if (mac_table[index] == 0) { + return index; + } + } + + return -1; +} + +void qemu_macaddr_default_if_unset(MACAddr *macaddr) +{ + static const MACAddr zero = { .a = { 0,0,0,0,0,0 } }; + static const MACAddr base = { .a = { 0x52, 0x54, 0x00, 0x12, 0x34, 0 } }; + + if (memcmp(macaddr, &zero, sizeof(zero)) != 0) { + if (memcmp(macaddr->a, &base.a, (sizeof(base.a) - 1)) != 0) { + return; + } else { + qemu_macaddr_set_used(macaddr); + return; + } + } + + macaddr->a[0] = 0x52; + macaddr->a[1] = 0x54; + macaddr->a[2] = 0x00; + macaddr->a[3] = 0x12; + macaddr->a[4] = 0x34; + macaddr->a[5] = qemu_macaddr_get_free(); + qemu_macaddr_set_used(macaddr); +} + +/** + * Generate a name for net client + * + * Only net clients created with the legacy -net option and NICs need this. + */ +static char *assign_name(NetClientState *nc1, const char *model) +{ + NetClientState *nc; + int id = 0; + + QTAILQ_FOREACH(nc, &net_clients, next) { + if (nc == nc1) { + continue; + } + if (strcmp(nc->model, model) == 0) { + id++; + } + } + + return g_strdup_printf("%s.%d", model, id); +} + +static void qemu_net_client_destructor(NetClientState *nc) +{ + g_free(nc); +} + +static void qemu_net_client_setup(NetClientState *nc, + NetClientInfo *info, + NetClientState *peer, + const char *model, + const char *name, + NetClientDestructor *destructor) +{ + nc->info = info; + nc->model = g_strdup(model); + if (name) { + nc->name = g_strdup(name); + } else { + nc->name = assign_name(nc, model); + } + + if (peer) { + assert(!peer->peer); + nc->peer = peer; + peer->peer = nc; + } + QTAILQ_INSERT_TAIL(&net_clients, nc, next); + + nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc); + nc->destructor = destructor; + QTAILQ_INIT(&nc->filters); +} + +NetClientState *qemu_new_net_client(NetClientInfo *info, + NetClientState *peer, + const char *model, + const char *name) +{ + NetClientState *nc; + + assert(info->size >= sizeof(NetClientState)); + + nc = g_malloc0(info->size); + qemu_net_client_setup(nc, info, peer, model, name, + qemu_net_client_destructor); + + return nc; +} + +NICState *qemu_new_nic(NetClientInfo *info, + NICConf *conf, + const char *model, + const char *name, + void *opaque) +{ + NetClientState **peers = conf->peers.ncs; + NICState *nic; + int i, queues = MAX(1, conf->peers.queues); + + assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC); + assert(info->size >= sizeof(NICState)); + + nic = g_malloc0(info->size + sizeof(NetClientState) * queues); + nic->ncs = (void *)nic + info->size; + nic->conf = conf; + nic->opaque = opaque; + + for (i = 0; i < queues; i++) { + qemu_net_client_setup(&nic->ncs[i], info, peers[i], model, name, + NULL); + nic->ncs[i].queue_index = i; + } + + return nic; +} + +NetClientState *qemu_get_subqueue(NICState *nic, int queue_index) +{ + return nic->ncs + queue_index; +} + +NetClientState *qemu_get_queue(NICState *nic) +{ + return qemu_get_subqueue(nic, 0); +} + +NICState *qemu_get_nic(NetClientState *nc) +{ + NetClientState *nc0 = nc - nc->queue_index; + + return (NICState *)((void *)nc0 - nc->info->size); +} + +void *qemu_get_nic_opaque(NetClientState *nc) +{ + NICState *nic = qemu_get_nic(nc); + + return nic->opaque; +} + +static void qemu_cleanup_net_client(NetClientState *nc) +{ + QTAILQ_REMOVE(&net_clients, nc, next); + + if (nc->info->cleanup) { + nc->info->cleanup(nc); + } +} + +static void qemu_free_net_client(NetClientState *nc) +{ + if (nc->incoming_queue) { + qemu_del_net_queue(nc->incoming_queue); + } + if (nc->peer) { + nc->peer->peer = NULL; + } + g_free(nc->name); + g_free(nc->model); + if (nc->destructor) { + nc->destructor(nc); + } +} + +void qemu_del_net_client(NetClientState *nc) +{ + NetClientState *ncs[MAX_QUEUE_NUM]; + int queues, i; + NetFilterState *nf, *next; + + assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC); + + /* If the NetClientState belongs to a multiqueue backend, we will change all + * other NetClientStates also. + */ + queues = qemu_find_net_clients_except(nc->name, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + assert(queues != 0); + + QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) { + object_unparent(OBJECT(nf)); + } + + /* If there is a peer NIC, delete and cleanup client, but do not free. */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + NICState *nic = qemu_get_nic(nc->peer); + if (nic->peer_deleted) { + return; + } + nic->peer_deleted = true; + + for (i = 0; i < queues; i++) { + ncs[i]->peer->link_down = true; + } + + if (nc->peer->info->link_status_changed) { + nc->peer->info->link_status_changed(nc->peer); + } + + for (i = 0; i < queues; i++) { + qemu_cleanup_net_client(ncs[i]); + } + + return; + } + + for (i = 0; i < queues; i++) { + qemu_cleanup_net_client(ncs[i]); + qemu_free_net_client(ncs[i]); + } +} + +void qemu_del_nic(NICState *nic) +{ + int i, queues = MAX(nic->conf->peers.queues, 1); + + qemu_macaddr_set_free(&nic->conf->macaddr); + + /* If this is a peer NIC and peer has already been deleted, free it now. */ + if (nic->peer_deleted) { + for (i = 0; i < queues; i++) { + qemu_free_net_client(qemu_get_subqueue(nic, i)->peer); + } + } + + for (i = queues - 1; i >= 0; i--) { + NetClientState *nc = qemu_get_subqueue(nic, i); + + qemu_cleanup_net_client(nc); + qemu_free_net_client(nc); + } + + g_free(nic); +} + +void qemu_foreach_nic(qemu_nic_foreach func, void *opaque) +{ + NetClientState *nc; + + QTAILQ_FOREACH(nc, &net_clients, next) { + if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->queue_index == 0) { + func(qemu_get_nic(nc), opaque); + } + } + } +} + +bool qemu_has_ufo(NetClientState *nc) +{ + if (!nc || !nc->info->has_ufo) { + return false; + } + + return nc->info->has_ufo(nc); +} + +bool qemu_has_vnet_hdr(NetClientState *nc) +{ + if (!nc || !nc->info->has_vnet_hdr) { + return false; + } + + return nc->info->has_vnet_hdr(nc); +} + +bool qemu_has_vnet_hdr_len(NetClientState *nc, int len) +{ + if (!nc || !nc->info->has_vnet_hdr_len) { + return false; + } + + return nc->info->has_vnet_hdr_len(nc, len); +} + +void qemu_using_vnet_hdr(NetClientState *nc, bool enable) +{ + if (!nc || !nc->info->using_vnet_hdr) { + return; + } + + nc->info->using_vnet_hdr(nc, enable); +} + +void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, + int ecn, int ufo) +{ + if (!nc || !nc->info->set_offload) { + return; + } + + nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo); +} + +void qemu_set_vnet_hdr_len(NetClientState *nc, int len) +{ + if (!nc || !nc->info->set_vnet_hdr_len) { + return; + } + + nc->info->set_vnet_hdr_len(nc, len); +} + +int qemu_set_vnet_le(NetClientState *nc, bool is_le) +{ +#ifdef HOST_WORDS_BIGENDIAN + if (!nc || !nc->info->set_vnet_le) { + return -ENOSYS; + } + + return nc->info->set_vnet_le(nc, is_le); +#else + return 0; +#endif +} + +int qemu_set_vnet_be(NetClientState *nc, bool is_be) +{ +#ifdef HOST_WORDS_BIGENDIAN + return 0; +#else + if (!nc || !nc->info->set_vnet_be) { + return -ENOSYS; + } + + return nc->info->set_vnet_be(nc, is_be); +#endif +} + +int qemu_can_send_packet(NetClientState *sender) +{ + int vm_running = runstate_is_running(); + + if (!vm_running) { + return 0; + } + + if (!sender->peer) { + return 1; + } + + if (sender->peer->receive_disabled) { + return 0; + } else if (sender->peer->info->can_receive && + !sender->peer->info->can_receive(sender->peer)) { + return 0; + } + return 1; +} + +static ssize_t filter_receive_iov(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + ssize_t ret = 0; + NetFilterState *nf = NULL; + + QTAILQ_FOREACH(nf, &nc->filters, next) { + ret = qemu_netfilter_receive(nf, direction, sender, flags, iov, + iovcnt, sent_cb); + if (ret) { + return ret; + } + } + + return ret; +} + +static ssize_t filter_receive(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb) +{ + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; + + return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb); +} + +void qemu_purge_queued_packets(NetClientState *nc) +{ + if (!nc->peer) { + return; + } + + qemu_net_queue_purge(nc->peer->incoming_queue, nc); +} + +static +void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge) +{ + nc->receive_disabled = 0; + + if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (net_hub_flush(nc->peer)) { + qemu_notify_event(); + } + } + if (qemu_net_queue_flush(nc->incoming_queue)) { + /* We emptied the queue successfully, signal to the IO thread to repoll + * the file descriptor (for tap, for example). + */ + qemu_notify_event(); + } else if (purge) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(nc->incoming_queue, nc); + } +} + +void qemu_flush_queued_packets(NetClientState *nc) +{ + qemu_flush_or_purge_queued_packets(nc, false); +} + +static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, + unsigned flags, + const uint8_t *buf, int size, + NetPacketSent *sent_cb) +{ + NetQueue *queue; + int ret; + +#ifdef DEBUG_NET + printf("qemu_send_packet_async:\n"); + hex_dump(stdout, buf, size); +#endif + + if (sender->link_down || !sender->peer) { + return size; + } + + /* Let filters handle the packet first */ + ret = filter_receive(sender, NET_FILTER_DIRECTION_TX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + + queue = sender->peer->incoming_queue; + + return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb); +} + +ssize_t qemu_send_packet_async(NetClientState *sender, + const uint8_t *buf, int size, + NetPacketSent *sent_cb) +{ + return qemu_send_packet_async_with_flags(sender, QEMU_NET_PACKET_FLAG_NONE, + buf, size, sent_cb); +} + +void qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size) +{ + qemu_send_packet_async(nc, buf, size, NULL); +} + +ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) +{ + return qemu_send_packet_async_with_flags(nc, QEMU_NET_PACKET_FLAG_RAW, + buf, size, NULL); +} + +static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, + int iovcnt, unsigned flags) +{ + uint8_t buf[NET_BUFSIZE]; + uint8_t *buffer; + size_t offset; + + if (iovcnt == 1) { + buffer = iov[0].iov_base; + offset = iov[0].iov_len; + } else { + buffer = buf; + offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf)); + } + + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + return nc->info->receive_raw(nc, buffer, offset); + } else { + return nc->info->receive(nc, buffer, offset); + } +} + +ssize_t qemu_deliver_packet_iov(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + NetClientState *nc = opaque; + int ret; + + if (nc->link_down) { + return iov_size(iov, iovcnt); + } + + if (nc->receive_disabled) { + return 0; + } + + if (nc->info->receive_iov) { + ret = nc->info->receive_iov(nc, iov, iovcnt); + } else { + ret = nc_sendv_compat(nc, iov, iovcnt, flags); + } + + if (ret == 0) { + nc->receive_disabled = 1; + } + + return ret; +} + +ssize_t qemu_sendv_packet_async(NetClientState *sender, + const struct iovec *iov, int iovcnt, + NetPacketSent *sent_cb) +{ + NetQueue *queue; + int ret; + + if (sender->link_down || !sender->peer) { + return iov_size(iov, iovcnt); + } + + /* Let filters handle the packet first */ + ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + + queue = sender->peer->incoming_queue; + + return qemu_net_queue_send_iov(queue, sender, + QEMU_NET_PACKET_FLAG_NONE, + iov, iovcnt, sent_cb); +} + +ssize_t +qemu_sendv_packet(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + return qemu_sendv_packet_async(nc, iov, iovcnt, NULL); +} + +NetClientState *qemu_find_netdev(const char *id) +{ + NetClientState *nc; + + QTAILQ_FOREACH(nc, &net_clients, next) { + if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) + continue; + if (!strcmp(nc->name, id)) { + return nc; + } + } + + return NULL; +} + +int qemu_find_net_clients_except(const char *id, NetClientState **ncs, + NetClientOptionsKind type, int max) +{ + NetClientState *nc; + int ret = 0; + + QTAILQ_FOREACH(nc, &net_clients, next) { + if (nc->info->type == type) { + continue; + } + if (!id || !strcmp(nc->name, id)) { + if (ret < max) { + ncs[ret] = nc; + } + ret++; + } + } + + return ret; +} + +static int nic_get_free_idx(void) +{ + int index; + + for (index = 0; index < MAX_NICS; index++) + if (!nd_table[index].used) + return index; + return -1; +} + +int qemu_show_nic_models(const char *arg, const char *const *models) +{ + int i; + + if (!arg || !is_help_option(arg)) { + return 0; + } + + fprintf(stderr, "qemu: Supported NIC models: "); + for (i = 0 ; models[i]; i++) + fprintf(stderr, "%s%c", models[i], models[i+1] ? ',' : '\n'); + return 1; +} + +void qemu_check_nic_model(NICInfo *nd, const char *model) +{ + const char *models[2]; + + models[0] = model; + models[1] = NULL; + + if (qemu_show_nic_models(nd->model, models)) + exit(0); + if (qemu_find_nic_model(nd, models, model) < 0) + exit(1); +} + +int qemu_find_nic_model(NICInfo *nd, const char * const *models, + const char *default_model) +{ + int i; + + if (!nd->model) + nd->model = g_strdup(default_model); + + for (i = 0 ; models[i]; i++) { + if (strcmp(nd->model, models[i]) == 0) + return i; + } + + error_report("Unsupported NIC model: %s", nd->model); + return -1; +} + +static int net_init_nic(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + int idx; + NICInfo *nd; + const NetLegacyNicOptions *nic; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC); + nic = opts->u.nic; + + idx = nic_get_free_idx(); + if (idx == -1 || nb_nics >= MAX_NICS) { + error_setg(errp, "too many NICs"); + return -1; + } + + nd = &nd_table[idx]; + + memset(nd, 0, sizeof(*nd)); + + if (nic->has_netdev) { + nd->netdev = qemu_find_netdev(nic->netdev); + if (!nd->netdev) { + error_setg(errp, "netdev '%s' not found", nic->netdev); + return -1; + } + } else { + assert(peer); + nd->netdev = peer; + } + nd->name = g_strdup(name); + if (nic->has_model) { + nd->model = g_strdup(nic->model); + } + if (nic->has_addr) { + nd->devaddr = g_strdup(nic->addr); + } + + if (nic->has_macaddr && + net_parse_macaddr(nd->macaddr.a, nic->macaddr) < 0) { + error_setg(errp, "invalid syntax for ethernet address"); + return -1; + } + if (nic->has_macaddr && + is_multicast_ether_addr(nd->macaddr.a)) { + error_setg(errp, + "NIC cannot have multicast MAC address (odd 1st byte)"); + return -1; + } + qemu_macaddr_default_if_unset(&nd->macaddr); + + if (nic->has_vectors) { + if (nic->vectors > 0x7ffffff) { + error_setg(errp, "invalid # of vectors: %"PRIu32, nic->vectors); + return -1; + } + nd->nvectors = nic->vectors; + } else { + nd->nvectors = DEV_NVECTORS_UNSPECIFIED; + } + + nd->used = 1; + nb_nics++; + + return idx; +} + + +static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( + const NetClientOptions *opts, + const char *name, + NetClientState *peer, Error **errp) = { + [NET_CLIENT_OPTIONS_KIND_NIC] = net_init_nic, +#ifdef CONFIG_SLIRP + [NET_CLIENT_OPTIONS_KIND_USER] = net_init_slirp, +#endif + [NET_CLIENT_OPTIONS_KIND_TAP] = net_init_tap, + [NET_CLIENT_OPTIONS_KIND_SOCKET] = net_init_socket, +#ifdef CONFIG_VDE + [NET_CLIENT_OPTIONS_KIND_VDE] = net_init_vde, +#endif +#ifdef CONFIG_NETMAP + [NET_CLIENT_OPTIONS_KIND_NETMAP] = net_init_netmap, +#endif + [NET_CLIENT_OPTIONS_KIND_DUMP] = net_init_dump, +#ifdef CONFIG_NET_BRIDGE + [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, +#endif + [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, +#ifdef CONFIG_VHOST_NET_USED + [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, +#endif +#ifdef CONFIG_L2TPV3 + [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3, +#endif +}; + + +static int net_client_init1(const void *object, int is_netdev, Error **errp) +{ + const NetClientOptions *opts; + const char *name; + NetClientState *peer = NULL; + + if (is_netdev) { + const Netdev *netdev = object; + opts = netdev->opts; + name = netdev->id; + + if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP || + opts->type == NET_CLIENT_OPTIONS_KIND_NIC || + !net_client_init_fun[opts->type]) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", + "a netdev backend type"); + return -1; + } + } else { + const NetLegacy *net = object; + opts = net->opts; + /* missing optional values have been initialized to "all bits zero" */ + name = net->has_id ? net->id : net->name; + + if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) { + return 0; /* nothing to do */ + } + if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", + "a net type"); + return -1; + } + + if (!net_client_init_fun[opts->type]) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", + "a net backend type (maybe it is not compiled " + "into this binary)"); + return -1; + } + + /* Do not add to a vlan if it's a nic with a netdev= parameter. */ + if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC || + !opts->u.nic->has_netdev) { + peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); + } + } + + if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) { + /* FIXME drop when all init functions store an Error */ + if (errp && !*errp) { + error_setg(errp, QERR_DEVICE_INIT_FAILED, + NetClientOptionsKind_lookup[opts->type]); + } + return -1; + } + return 0; +} + + +static void net_visit(Visitor *v, int is_netdev, void **object, Error **errp) +{ + if (is_netdev) { + visit_type_Netdev(v, (Netdev **)object, NULL, errp); + } else { + visit_type_NetLegacy(v, (NetLegacy **)object, NULL, errp); + } +} + + +int net_client_init(QemuOpts *opts, int is_netdev, Error **errp) +{ + void *object = NULL; + Error *err = NULL; + int ret = -1; + + { + OptsVisitor *ov = opts_visitor_new(opts); + + net_visit(opts_get_visitor(ov), is_netdev, &object, &err); + opts_visitor_cleanup(ov); + } + + if (!err) { + ret = net_client_init1(object, is_netdev, &err); + } + + if (object) { + QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); + + net_visit(qapi_dealloc_get_visitor(dv), is_netdev, &object, NULL); + qapi_dealloc_visitor_cleanup(dv); + } + + error_propagate(errp, err); + return ret; +} + + +static int net_host_check_device(const char *device) +{ + int i; + for (i = 0; host_net_devices[i]; i++) { + if (!strncmp(host_net_devices[i], device, + strlen(host_net_devices[i]))) { + return 1; + } + } + + return 0; +} + +void hmp_host_net_add(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *opts_str = qdict_get_try_str(qdict, "opts"); + Error *local_err = NULL; + QemuOpts *opts; + + if (!net_host_check_device(device)) { + monitor_printf(mon, "invalid host network device %s\n", device); + return; + } + + opts = qemu_opts_parse_noisily(qemu_find_opts("net"), + opts_str ? opts_str : "", false); + if (!opts) { + return; + } + + qemu_opt_set(opts, "type", device, &error_abort); + + net_client_init(opts, 0, &local_err); + if (local_err) { + error_report_err(local_err); + monitor_printf(mon, "adding host network device %s failed\n", device); + } +} + +void hmp_host_net_remove(Monitor *mon, const QDict *qdict) +{ + NetClientState *nc; + int vlan_id = qdict_get_int(qdict, "vlan_id"); + const char *device = qdict_get_str(qdict, "device"); + + nc = net_hub_find_client_by_name(vlan_id, device); + if (!nc) { + error_report("Host network device '%s' on hub '%d' not found", + device, vlan_id); + return; + } + if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + error_report("invalid host network device '%s'", device); + return; + } + + qemu_del_net_client(nc->peer); + qemu_del_net_client(nc); +} + +void netdev_add(QemuOpts *opts, Error **errp) +{ + net_client_init(opts, 1, errp); +} + +void qmp_netdev_add(QDict *qdict, QObject **ret, Error **errp) +{ + Error *local_err = NULL; + QemuOptsList *opts_list; + QemuOpts *opts; + + opts_list = qemu_find_opts_err("netdev", &local_err); + if (local_err) { + goto out; + } + + opts = qemu_opts_from_qdict(opts_list, qdict, &local_err); + if (local_err) { + goto out; + } + + netdev_add(opts, &local_err); + if (local_err) { + qemu_opts_del(opts); + goto out; + } + +out: + error_propagate(errp, local_err); +} + +void qmp_netdev_del(const char *id, Error **errp) +{ + NetClientState *nc; + QemuOpts *opts; + + nc = qemu_find_netdev(id); + if (!nc) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", id); + return; + } + + opts = qemu_opts_find(qemu_find_opts_err("netdev", NULL), id); + if (!opts) { + error_setg(errp, "Device '%s' is not a netdev", id); + return; + } + + qemu_del_net_client(nc); + qemu_opts_del(opts); +} + +void print_net_client(Monitor *mon, NetClientState *nc) +{ + NetFilterState *nf; + + monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name, + nc->queue_index, + NetClientOptionsKind_lookup[nc->info->type], + nc->info_str); + if (!QTAILQ_EMPTY(&nc->filters)) { + monitor_printf(mon, "filters:\n"); + } + QTAILQ_FOREACH(nf, &nc->filters, next) { + char *path = object_get_canonical_path_component(OBJECT(nf)); + monitor_printf(mon, " - %s: type=%s%s\n", path, + object_get_typename(OBJECT(nf)), + nf->info_str); + g_free(path); + } +} + +RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, + Error **errp) +{ + NetClientState *nc; + RxFilterInfoList *filter_list = NULL, *last_entry = NULL; + + QTAILQ_FOREACH(nc, &net_clients, next) { + RxFilterInfoList *entry; + RxFilterInfo *info; + + if (has_name && strcmp(nc->name, name) != 0) { + continue; + } + + /* only query rx-filter information of NIC */ + if (nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC) { + if (has_name) { + error_setg(errp, "net client(%s) isn't a NIC", name); + return NULL; + } + continue; + } + + /* only query information on queue 0 since the info is per nic, + * not per queue + */ + if (nc->queue_index != 0) + continue; + + if (nc->info->query_rx_filter) { + info = nc->info->query_rx_filter(nc); + entry = g_malloc0(sizeof(*entry)); + entry->value = info; + + if (!filter_list) { + filter_list = entry; + } else { + last_entry->next = entry; + } + last_entry = entry; + } else if (has_name) { + error_setg(errp, "net client(%s) doesn't support" + " rx-filter querying", name); + return NULL; + } + + if (has_name) { + break; + } + } + + if (filter_list == NULL && has_name) { + error_setg(errp, "invalid net client name: %s", name); + } + + return filter_list; +} + +void hmp_info_network(Monitor *mon, const QDict *qdict) +{ + NetClientState *nc, *peer; + NetClientOptionsKind type; + + net_hub_info(mon); + + QTAILQ_FOREACH(nc, &net_clients, next) { + peer = nc->peer; + type = nc->info->type; + + /* Skip if already printed in hub info */ + if (net_hub_id_for_client(nc, NULL) == 0) { + continue; + } + + if (!peer || type == NET_CLIENT_OPTIONS_KIND_NIC) { + print_net_client(mon, nc); + } /* else it's a netdev connected to a NIC, printed with the NIC */ + if (peer && type == NET_CLIENT_OPTIONS_KIND_NIC) { + monitor_printf(mon, " \\ "); + print_net_client(mon, peer); + } + } +} + +void qmp_set_link(const char *name, bool up, Error **errp) +{ + NetClientState *ncs[MAX_QUEUE_NUM]; + NetClientState *nc; + int queues, i; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_OPTIONS_KIND_MAX, + MAX_QUEUE_NUM); + + if (queues == 0) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", name); + return; + } + nc = ncs[0]; + + for (i = 0; i < queues; i++) { + ncs[i]->link_down = !up; + } + + if (nc->info->link_status_changed) { + nc->info->link_status_changed(nc); + } + + if (nc->peer) { + /* Change peer link only if the peer is NIC and then notify peer. + * If the peer is a HUBPORT or a backend, we do not change the + * link status. + * + * This behavior is compatible with qemu vlans where there could be + * multiple clients that can still communicate with each other in + * disconnected mode. For now maintain this compatibility. + */ + if (nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + for (i = 0; i < queues; i++) { + ncs[i]->peer->link_down = !up; + } + } + if (nc->peer->info->link_status_changed) { + nc->peer->info->link_status_changed(nc->peer); + } + } +} + +static void net_vm_change_state_handler(void *opaque, int running, + RunState state) +{ + NetClientState *nc; + NetClientState *tmp; + + QTAILQ_FOREACH_SAFE(nc, &net_clients, next, tmp) { + if (running) { + /* Flush queued packets and wake up backends. */ + if (nc->peer && qemu_can_send_packet(nc)) { + qemu_flush_queued_packets(nc->peer); + } + } else { + /* Complete all queued packets, to guarantee we don't modify + * state later when VM is not running. + */ + qemu_flush_or_purge_queued_packets(nc, true); + } + } +} + +void net_cleanup(void) +{ + NetClientState *nc; + + /* We may del multiple entries during qemu_del_net_client(), + * so QTAILQ_FOREACH_SAFE() is also not safe here. + */ + while (!QTAILQ_EMPTY(&net_clients)) { + nc = QTAILQ_FIRST(&net_clients); + if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + qemu_del_nic(qemu_get_nic(nc)); + } else { + qemu_del_net_client(nc); + } + } + + qemu_del_vm_change_state_handler(net_change_state_entry); +} + +void net_check_clients(void) +{ + NetClientState *nc; + int i; + + /* Don't warn about the default network setup that you get if + * no command line -net or -netdev options are specified. There + * are two cases that we would otherwise complain about: + * (1) board doesn't support a NIC but the implicit "-net nic" + * requested one + * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" + * sets up a nic that isn't connected to anything. + */ + if (default_net) { + return; + } + + net_hub_check_clients(); + + QTAILQ_FOREACH(nc, &net_clients, next) { + if (!nc->peer) { + fprintf(stderr, "Warning: %s %s has no peer\n", + nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC ? + "nic" : "netdev", nc->name); + } + } + + /* Check that all NICs requested via -net nic actually got created. + * NICs created via -device don't need to be checked here because + * they are always instantiated. + */ + for (i = 0; i < MAX_NICS; i++) { + NICInfo *nd = &nd_table[i]; + if (nd->used && !nd->instantiated) { + fprintf(stderr, "Warning: requested NIC (%s, model %s) " + "was not created (not supported by this machine?)\n", + nd->name ? nd->name : "anonymous", + nd->model ? nd->model : "unspecified"); + } + } +} + +static int net_init_client(void *dummy, QemuOpts *opts, Error **errp) +{ + Error *local_err = NULL; + + net_client_init(opts, 0, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + + return 0; +} + +static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp) +{ + Error *local_err = NULL; + int ret; + + ret = net_client_init(opts, 1, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + + return ret; +} + +int net_init_clients(void) +{ + QemuOptsList *net = qemu_find_opts("net"); + + if (default_net) { + /* if no clients, we use a default config */ + qemu_opts_set(net, NULL, "type", "nic", &error_abort); +#ifdef CONFIG_SLIRP + qemu_opts_set(net, NULL, "type", "user", &error_abort); +#endif + } + + net_change_state_entry = + qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL); + + QTAILQ_INIT(&net_clients); + + if (qemu_opts_foreach(qemu_find_opts("netdev"), + net_init_netdev, NULL, NULL)) { + return -1; + } + + if (qemu_opts_foreach(net, net_init_client, NULL, NULL)) { + return -1; + } + + return 0; +} + +int net_client_parse(QemuOptsList *opts_list, const char *optarg) +{ +#if defined(CONFIG_SLIRP) + int ret; + if (net_slirp_parse_legacy(opts_list, optarg, &ret)) { + return ret; + } +#endif + + if (!qemu_opts_parse_noisily(opts_list, optarg, true)) { + return -1; + } + + default_net = 0; + return 0; +} + +/* From FreeBSD */ +/* XXX: optimize */ +unsigned compute_mcast_idx(const uint8_t *ep) +{ + uint32_t crc; + int carry, i, j; + uint8_t b; + + crc = 0xffffffff; + for (i = 0; i < 6; i++) { + b = *ep++; + for (j = 0; j < 8; j++) { + carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01); + crc <<= 1; + b >>= 1; + if (carry) { + crc = ((crc ^ POLYNOMIAL) | carry); + } + } + } + return crc >> 26; +} + +QemuOptsList qemu_netdev_opts = { + .name = "netdev", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_netdev_opts.head), + .desc = { + /* + * no elements => accept any params + * validation will happen later + */ + { /* end of list */ } + }, +}; + +QemuOptsList qemu_net_opts = { + .name = "net", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_net_opts.head), + .desc = { + /* + * no elements => accept any params + * validation will happen later + */ + { /* end of list */ } + }, +}; diff --git a/src/net/netmap.c b/src/net/netmap.c new file mode 100644 index 0000000..5558368 --- /dev/null +++ b/src/net/netmap.c @@ -0,0 +1,463 @@ +/* + * netmap access for qemu + * + * Copyright (c) 2012-2013 Luigi Rizzo + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#include <sys/ioctl.h> +#include <net/if.h> +#include <sys/mman.h> +#include <stdint.h> +#include <stdio.h> +#define NETMAP_WITH_LIBS +#include <net/netmap.h> +#include <net/netmap_user.h> + +#include "net/net.h" +#include "net/tap.h" +#include "clients.h" +#include "sysemu/sysemu.h" +#include "qemu/error-report.h" +#include "qemu/iov.h" + +/* Private netmap device info. */ +typedef struct NetmapPriv { + int fd; + size_t memsize; + void *mem; + struct netmap_if *nifp; + struct netmap_ring *rx; + struct netmap_ring *tx; + char fdname[PATH_MAX]; /* Normally "/dev/netmap". */ + char ifname[IFNAMSIZ]; +} NetmapPriv; + +typedef struct NetmapState { + NetClientState nc; + NetmapPriv me; + bool read_poll; + bool write_poll; + struct iovec iov[IOV_MAX]; + int vnet_hdr_len; /* Current virtio-net header length. */ +} NetmapState; + +#ifndef __FreeBSD__ +#define pkt_copy bcopy +#else +/* A fast copy routine only for multiples of 64 bytes, non overlapped. */ +static inline void +pkt_copy(const void *_src, void *_dst, int l) +{ + const uint64_t *src = _src; + uint64_t *dst = _dst; + if (unlikely(l >= 1024)) { + bcopy(src, dst, l); + return; + } + for (; l > 0; l -= 64) { + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } +} +#endif /* __FreeBSD__ */ + +/* + * Open a netmap device. We assume there is only one queue + * (which is the case for the VALE bridge). + */ +static void netmap_open(NetmapPriv *me, Error **errp) +{ + int fd; + int err; + size_t l; + struct nmreq req; + + me->fd = fd = open(me->fdname, O_RDWR); + if (fd < 0) { + error_setg_file_open(errp, errno, me->fdname); + return; + } + memset(&req, 0, sizeof(req)); + pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); + req.nr_ringid = NETMAP_NO_TX_POLL; + req.nr_version = NETMAP_API; + err = ioctl(fd, NIOCREGIF, &req); + if (err) { + error_setg_errno(errp, errno, "Unable to register %s", me->ifname); + goto error; + } + l = me->memsize = req.nr_memsize; + + me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (me->mem == MAP_FAILED) { + error_setg_errno(errp, errno, "Unable to mmap netmap shared memory"); + me->mem = NULL; + goto error; + } + + me->nifp = NETMAP_IF(me->mem, req.nr_offset); + me->tx = NETMAP_TXRING(me->nifp, 0); + me->rx = NETMAP_RXRING(me->nifp, 0); + + return; + +error: + close(me->fd); +} + +static void netmap_send(void *opaque); +static void netmap_writable(void *opaque); + +/* Set the event-loop handlers for the netmap backend. */ +static void netmap_update_fd_handler(NetmapState *s) +{ + qemu_set_fd_handler(s->me.fd, + s->read_poll ? netmap_send : NULL, + s->write_poll ? netmap_writable : NULL, + s); +} + +/* Update the read handler. */ +static void netmap_read_poll(NetmapState *s, bool enable) +{ + if (s->read_poll != enable) { /* Do nothing if not changed. */ + s->read_poll = enable; + netmap_update_fd_handler(s); + } +} + +/* Update the write handler. */ +static void netmap_write_poll(NetmapState *s, bool enable) +{ + if (s->write_poll != enable) { + s->write_poll = enable; + netmap_update_fd_handler(s); + } +} + +static void netmap_poll(NetClientState *nc, bool enable) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + + if (s->read_poll != enable || s->write_poll != enable) { + s->write_poll = enable; + s->read_poll = enable; + netmap_update_fd_handler(s); + } +} + +/* + * The fd_write() callback, invoked if the fd is marked as + * writable after a poll. Unregister the handler and flush any + * buffered packets. + */ +static void netmap_writable(void *opaque) +{ + NetmapState *s = opaque; + + netmap_write_poll(s, false); + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t netmap_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + struct netmap_ring *ring = s->me.tx; + uint32_t i; + uint32_t idx; + uint8_t *dst; + + if (unlikely(!ring)) { + /* Drop. */ + return size; + } + + if (unlikely(size > ring->nr_buf_size)) { + RD(5, "[netmap_receive] drop packet of size %d > %d\n", + (int)size, ring->nr_buf_size); + return size; + } + + if (nm_ring_empty(ring)) { + /* No available slots in the netmap TX ring. */ + netmap_write_poll(s, true); + return 0; + } + + i = ring->cur; + idx = ring->slot[i].buf_idx; + dst = (uint8_t *)NETMAP_BUF(ring, idx); + + ring->slot[i].len = size; + ring->slot[i].flags = 0; + pkt_copy(buf, dst, size); + ring->cur = ring->head = nm_ring_next(ring, i); + ioctl(s->me.fd, NIOCTXSYNC, NULL); + + return size; +} + +static ssize_t netmap_receive_iov(NetClientState *nc, + const struct iovec *iov, int iovcnt) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + struct netmap_ring *ring = s->me.tx; + uint32_t last; + uint32_t idx; + uint8_t *dst; + int j; + uint32_t i; + + if (unlikely(!ring)) { + /* Drop the packet. */ + return iov_size(iov, iovcnt); + } + + last = i = ring->cur; + + if (nm_ring_space(ring) < iovcnt) { + /* Not enough netmap slots. */ + netmap_write_poll(s, true); + return 0; + } + + for (j = 0; j < iovcnt; j++) { + int iov_frag_size = iov[j].iov_len; + int offset = 0; + int nm_frag_size; + + /* Split each iovec fragment over more netmap slots, if + necessary. */ + while (iov_frag_size) { + nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); + + if (unlikely(nm_ring_empty(ring))) { + /* We run out of netmap slots while splitting the + iovec fragments. */ + netmap_write_poll(s, true); + return 0; + } + + idx = ring->slot[i].buf_idx; + dst = (uint8_t *)NETMAP_BUF(ring, idx); + + ring->slot[i].len = nm_frag_size; + ring->slot[i].flags = NS_MOREFRAG; + pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); + + last = i; + i = nm_ring_next(ring, i); + + offset += nm_frag_size; + iov_frag_size -= nm_frag_size; + } + } + /* The last slot must not have NS_MOREFRAG set. */ + ring->slot[last].flags &= ~NS_MOREFRAG; + + /* Now update ring->cur and ring->head. */ + ring->cur = ring->head = i; + + ioctl(s->me.fd, NIOCTXSYNC, NULL); + + return iov_size(iov, iovcnt); +} + +/* Complete a previous send (backend --> guest) and enable the + fd_read callback. */ +static void netmap_send_completed(NetClientState *nc, ssize_t len) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + + netmap_read_poll(s, true); +} + +static void netmap_send(void *opaque) +{ + NetmapState *s = opaque; + struct netmap_ring *ring = s->me.rx; + + /* Keep sending while there are available packets into the netmap + RX ring and the forwarding path towards the peer is open. */ + while (!nm_ring_empty(ring)) { + uint32_t i; + uint32_t idx; + bool morefrag; + int iovcnt = 0; + int iovsize; + + do { + i = ring->cur; + idx = ring->slot[i].buf_idx; + morefrag = (ring->slot[i].flags & NS_MOREFRAG); + s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); + s->iov[iovcnt].iov_len = ring->slot[i].len; + iovcnt++; + + ring->cur = ring->head = nm_ring_next(ring, i); + } while (!nm_ring_empty(ring) && morefrag); + + if (unlikely(nm_ring_empty(ring) && morefrag)) { + RD(5, "[netmap_send] ran out of slots, with a pending" + "incomplete packet\n"); + } + + iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, + netmap_send_completed); + + if (iovsize == 0) { + /* The peer does not receive anymore. Packet is queued, stop + * reading from the backend until netmap_send_completed() + */ + netmap_read_poll(s, false); + break; + } + } +} + +/* Flush and close. */ +static void netmap_cleanup(NetClientState *nc) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + + qemu_purge_queued_packets(nc); + + netmap_poll(nc, false); + munmap(s->me.mem, s->me.memsize); + close(s->me.fd); + + s->me.fd = -1; +} + +/* Offloading manipulation support callbacks. */ +static bool netmap_has_ufo(NetClientState *nc) +{ + return true; +} + +static bool netmap_has_vnet_hdr(NetClientState *nc) +{ + return true; +} + +static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) +{ + return len == 0 || len == sizeof(struct virtio_net_hdr) || + len == sizeof(struct virtio_net_hdr_mrg_rxbuf); +} + +static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) +{ +} + +static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + int err; + struct nmreq req; + + /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header + * length for the netmap adapter associated to 'me->ifname'. + */ + memset(&req, 0, sizeof(req)); + pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname); + req.nr_version = NETMAP_API; + req.nr_cmd = NETMAP_BDG_VNET_HDR; + req.nr_arg1 = len; + err = ioctl(s->me.fd, NIOCREGIF, &req); + if (err) { + error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s", + s->me.ifname, strerror(errno)); + } else { + /* Keep track of the current length. */ + s->vnet_hdr_len = len; + } +} + +static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, + int ecn, int ufo) +{ + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + + /* Setting a virtio-net header length greater than zero automatically + * enables the offloadings. + */ + if (!s->vnet_hdr_len) { + netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); + } +} + +/* NetClientInfo methods */ +static NetClientInfo net_netmap_info = { + .type = NET_CLIENT_OPTIONS_KIND_NETMAP, + .size = sizeof(NetmapState), + .receive = netmap_receive, + .receive_iov = netmap_receive_iov, + .poll = netmap_poll, + .cleanup = netmap_cleanup, + .has_ufo = netmap_has_ufo, + .has_vnet_hdr = netmap_has_vnet_hdr, + .has_vnet_hdr_len = netmap_has_vnet_hdr_len, + .using_vnet_hdr = netmap_using_vnet_hdr, + .set_offload = netmap_set_offload, + .set_vnet_hdr_len = netmap_set_vnet_hdr_len, +}; + +/* The exported init function + * + * ... -net netmap,ifname="..." + */ +int net_init_netmap(const NetClientOptions *opts, + const char *name, NetClientState *peer, Error **errp) +{ + const NetdevNetmapOptions *netmap_opts = opts->u.netmap; + NetClientState *nc; + Error *err = NULL; + NetmapPriv me; + NetmapState *s; + + pstrcpy(me.fdname, sizeof(me.fdname), + netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); + /* Set default name for the port if not supplied. */ + pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); + netmap_open(&me, &err); + if (err) { + error_propagate(errp, err); + return -1; + } + /* Create the object. */ + nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); + s = DO_UPCAST(NetmapState, nc, nc); + s->me = me; + s->vnet_hdr_len = 0; + netmap_read_poll(s, true); /* Initially only poll for reads. */ + + return 0; +} + diff --git a/src/net/queue.c b/src/net/queue.c new file mode 100644 index 0000000..de8b9d3 --- /dev/null +++ b/src/net/queue.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "net/queue.h" +#include "qemu/queue.h" +#include "net/net.h" + +/* The delivery handler may only return zero if it will call + * qemu_net_queue_flush() when it determines that it is once again able + * to deliver packets. It must also call qemu_net_queue_purge() in its + * cleanup path. + * + * If a sent callback is provided to send(), the caller must handle a + * zero return from the delivery handler by not sending any more packets + * until we have invoked the callback. Only in that case will we queue + * the packet. + * + * If a sent callback isn't provided, we just drop the packet to avoid + * unbounded queueing. + */ + +struct NetPacket { + QTAILQ_ENTRY(NetPacket) entry; + NetClientState *sender; + unsigned flags; + int size; + NetPacketSent *sent_cb; + uint8_t data[0]; +}; + +struct NetQueue { + void *opaque; + uint32_t nq_maxlen; + uint32_t nq_count; + NetQueueDeliverFunc *deliver; + + QTAILQ_HEAD(packets, NetPacket) packets; + + unsigned delivering : 1; +}; + +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque) +{ + NetQueue *queue; + + queue = g_new0(NetQueue, 1); + + queue->opaque = opaque; + queue->nq_maxlen = 10000; + queue->nq_count = 0; + queue->deliver = deliver; + + QTAILQ_INIT(&queue->packets); + + queue->delivering = 0; + + return queue; +} + +void qemu_del_net_queue(NetQueue *queue) +{ + NetPacket *packet, *next; + + QTAILQ_FOREACH_SAFE(packet, &queue->packets, entry, next) { + QTAILQ_REMOVE(&queue->packets, packet, entry); + g_free(packet); + } + + g_free(queue); +} + +static void qemu_net_queue_append(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *buf, + size_t size, + NetPacketSent *sent_cb) +{ + NetPacket *packet; + + if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { + return; /* drop if queue full and no callback */ + } + packet = g_malloc(sizeof(NetPacket) + size); + packet->sender = sender; + packet->flags = flags; + packet->size = size; + packet->sent_cb = sent_cb; + memcpy(packet->data, buf, size); + + queue->nq_count++; + QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); +} + +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + NetPacket *packet; + size_t max_len = 0; + int i; + + if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { + return; /* drop if queue full and no callback */ + } + for (i = 0; i < iovcnt; i++) { + max_len += iov[i].iov_len; + } + + packet = g_malloc(sizeof(NetPacket) + max_len); + packet->sender = sender; + packet->sent_cb = sent_cb; + packet->flags = flags; + packet->size = 0; + + for (i = 0; i < iovcnt; i++) { + size_t len = iov[i].iov_len; + + memcpy(packet->data + packet->size, iov[i].iov_base, len); + packet->size += len; + } + + queue->nq_count++; + QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); +} + +static ssize_t qemu_net_queue_deliver(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size) +{ + ssize_t ret = -1; + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; + + queue->delivering = 1; + ret = queue->deliver(sender, flags, &iov, 1, queue->opaque); + queue->delivering = 0; + + return ret; +} + +static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt) +{ + ssize_t ret = -1; + + queue->delivering = 1; + ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque); + queue->delivering = 0; + + return ret; +} + +ssize_t qemu_net_queue_send(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb) +{ + ssize_t ret; + + if (queue->delivering || !qemu_can_send_packet(sender)) { + qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + return 0; + } + + ret = qemu_net_queue_deliver(queue, sender, flags, data, size); + if (ret == 0) { + qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + return 0; + } + + qemu_net_queue_flush(queue); + + return ret; +} + +ssize_t qemu_net_queue_send_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + ssize_t ret; + + if (queue->delivering || !qemu_can_send_packet(sender)) { + qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb); + return 0; + } + + ret = qemu_net_queue_deliver_iov(queue, sender, flags, iov, iovcnt); + if (ret == 0) { + qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb); + return 0; + } + + qemu_net_queue_flush(queue); + + return ret; +} + +void qemu_net_queue_purge(NetQueue *queue, NetClientState *from) +{ + NetPacket *packet, *next; + + QTAILQ_FOREACH_SAFE(packet, &queue->packets, entry, next) { + if (packet->sender == from) { + QTAILQ_REMOVE(&queue->packets, packet, entry); + queue->nq_count--; + if (packet->sent_cb) { + packet->sent_cb(packet->sender, 0); + } + g_free(packet); + } + } +} + +bool qemu_net_queue_flush(NetQueue *queue) +{ + while (!QTAILQ_EMPTY(&queue->packets)) { + NetPacket *packet; + int ret; + + packet = QTAILQ_FIRST(&queue->packets); + QTAILQ_REMOVE(&queue->packets, packet, entry); + queue->nq_count--; + + ret = qemu_net_queue_deliver(queue, + packet->sender, + packet->flags, + packet->data, + packet->size); + if (ret == 0) { + queue->nq_count++; + QTAILQ_INSERT_HEAD(&queue->packets, packet, entry); + return false; + } + + if (packet->sent_cb) { + packet->sent_cb(packet->sender, ret); + } + + g_free(packet); + } + return true; +} diff --git a/src/net/slirp.c b/src/net/slirp.c new file mode 100644 index 0000000..f505570 --- /dev/null +++ b/src/net/slirp.c @@ -0,0 +1,805 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "net/slirp.h" + +#include "config-host.h" + +#ifndef _WIN32 +#include <pwd.h> +#include <sys/wait.h> +#endif +#include "net/net.h" +#include "clients.h" +#include "hub.h" +#include "monitor/monitor.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" +#include "slirp/libslirp.h" +#include "sysemu/char.h" + +static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) +{ + const char *p, *p1; + int len; + p = *pp; + p1 = strchr(p, sep); + if (!p1) + return -1; + len = p1 - p; + p1++; + if (buf_size > 0) { + if (len > buf_size - 1) + len = buf_size - 1; + memcpy(buf, p, len); + buf[len] = '\0'; + } + *pp = p1; + return 0; +} + +/* slirp network adapter */ + +#define SLIRP_CFG_HOSTFWD 1 +#define SLIRP_CFG_LEGACY 2 + +struct slirp_config_str { + struct slirp_config_str *next; + int flags; + char str[1024]; + int legacy_format; +}; + +typedef struct SlirpState { + NetClientState nc; + QTAILQ_ENTRY(SlirpState) entry; + Slirp *slirp; +#ifndef _WIN32 + char smb_dir[128]; +#endif +} SlirpState; + +static struct slirp_config_str *slirp_configs; +const char *legacy_tftp_prefix; +const char *legacy_bootp_filename; +static QTAILQ_HEAD(slirp_stacks, SlirpState) slirp_stacks = + QTAILQ_HEAD_INITIALIZER(slirp_stacks); + +static int slirp_hostfwd(SlirpState *s, const char *redir_str, + int legacy_format); +static int slirp_guestfwd(SlirpState *s, const char *config_str, + int legacy_format); + +#ifndef _WIN32 +static const char *legacy_smb_export; + +static int slirp_smb(SlirpState *s, const char *exported_dir, + struct in_addr vserver_addr); +static void slirp_smb_cleanup(SlirpState *s); +#else +static inline void slirp_smb_cleanup(SlirpState *s) { } +#endif + +void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len) +{ + SlirpState *s = opaque; + + qemu_send_packet(&s->nc, pkt, pkt_len); +} + +static ssize_t net_slirp_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + SlirpState *s = DO_UPCAST(SlirpState, nc, nc); + + slirp_input(s->slirp, buf, size); + + return size; +} + +static void net_slirp_cleanup(NetClientState *nc) +{ + SlirpState *s = DO_UPCAST(SlirpState, nc, nc); + + slirp_cleanup(s->slirp); + slirp_smb_cleanup(s); + QTAILQ_REMOVE(&slirp_stacks, s, entry); +} + +static NetClientInfo net_slirp_info = { + .type = NET_CLIENT_OPTIONS_KIND_USER, + .size = sizeof(SlirpState), + .receive = net_slirp_receive, + .cleanup = net_slirp_cleanup, +}; + +static int net_slirp_init(NetClientState *peer, const char *model, + const char *name, int restricted, + const char *vnetwork, const char *vhost, + const char *vhostname, const char *tftp_export, + const char *bootfile, const char *vdhcp_start, + const char *vnameserver, const char *smb_export, + const char *vsmbserver, const char **dnssearch) +{ + /* default settings according to historic slirp */ + struct in_addr net = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */ + struct in_addr mask = { .s_addr = htonl(0xffffff00) }; /* 255.255.255.0 */ + struct in_addr host = { .s_addr = htonl(0x0a000202) }; /* 10.0.2.2 */ + struct in_addr dhcp = { .s_addr = htonl(0x0a00020f) }; /* 10.0.2.15 */ + struct in_addr dns = { .s_addr = htonl(0x0a000203) }; /* 10.0.2.3 */ +#ifndef _WIN32 + struct in_addr smbsrv = { .s_addr = 0 }; +#endif + NetClientState *nc; + SlirpState *s; + char buf[20]; + uint32_t addr; + int shift; + char *end; + struct slirp_config_str *config; + + if (!tftp_export) { + tftp_export = legacy_tftp_prefix; + } + if (!bootfile) { + bootfile = legacy_bootp_filename; + } + + if (vnetwork) { + if (get_str_sep(buf, sizeof(buf), &vnetwork, '/') < 0) { + if (!inet_aton(vnetwork, &net)) { + return -1; + } + addr = ntohl(net.s_addr); + if (!(addr & 0x80000000)) { + mask.s_addr = htonl(0xff000000); /* class A */ + } else if ((addr & 0xfff00000) == 0xac100000) { + mask.s_addr = htonl(0xfff00000); /* priv. 172.16.0.0/12 */ + } else if ((addr & 0xc0000000) == 0x80000000) { + mask.s_addr = htonl(0xffff0000); /* class B */ + } else if ((addr & 0xffff0000) == 0xc0a80000) { + mask.s_addr = htonl(0xffff0000); /* priv. 192.168.0.0/16 */ + } else if ((addr & 0xffff0000) == 0xc6120000) { + mask.s_addr = htonl(0xfffe0000); /* tests 198.18.0.0/15 */ + } else if ((addr & 0xe0000000) == 0xe0000000) { + mask.s_addr = htonl(0xffffff00); /* class C */ + } else { + mask.s_addr = htonl(0xfffffff0); /* multicast/reserved */ + } + } else { + if (!inet_aton(buf, &net)) { + return -1; + } + shift = strtol(vnetwork, &end, 10); + if (*end != '\0') { + if (!inet_aton(vnetwork, &mask)) { + return -1; + } + } else if (shift < 4 || shift > 32) { + return -1; + } else { + mask.s_addr = htonl(0xffffffff << (32 - shift)); + } + } + net.s_addr &= mask.s_addr; + host.s_addr = net.s_addr | (htonl(0x0202) & ~mask.s_addr); + dhcp.s_addr = net.s_addr | (htonl(0x020f) & ~mask.s_addr); + dns.s_addr = net.s_addr | (htonl(0x0203) & ~mask.s_addr); + } + + if (vhost && !inet_aton(vhost, &host)) { + return -1; + } + if ((host.s_addr & mask.s_addr) != net.s_addr) { + return -1; + } + + if (vnameserver && !inet_aton(vnameserver, &dns)) { + return -1; + } + if ((dns.s_addr & mask.s_addr) != net.s_addr || + dns.s_addr == host.s_addr) { + return -1; + } + + if (vdhcp_start && !inet_aton(vdhcp_start, &dhcp)) { + return -1; + } + if ((dhcp.s_addr & mask.s_addr) != net.s_addr || + dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) { + return -1; + } + +#ifndef _WIN32 + if (vsmbserver && !inet_aton(vsmbserver, &smbsrv)) { + return -1; + } +#endif + + nc = qemu_new_net_client(&net_slirp_info, peer, model, name); + + snprintf(nc->info_str, sizeof(nc->info_str), + "net=%s,restrict=%s", inet_ntoa(net), + restricted ? "on" : "off"); + + s = DO_UPCAST(SlirpState, nc, nc); + + s->slirp = slirp_init(restricted, net, mask, host, vhostname, + tftp_export, bootfile, dhcp, dns, dnssearch, s); + QTAILQ_INSERT_TAIL(&slirp_stacks, s, entry); + + for (config = slirp_configs; config; config = config->next) { + if (config->flags & SLIRP_CFG_HOSTFWD) { + if (slirp_hostfwd(s, config->str, + config->flags & SLIRP_CFG_LEGACY) < 0) + goto error; + } else { + if (slirp_guestfwd(s, config->str, + config->flags & SLIRP_CFG_LEGACY) < 0) + goto error; + } + } +#ifndef _WIN32 + if (!smb_export) { + smb_export = legacy_smb_export; + } + if (smb_export) { + if (slirp_smb(s, smb_export, smbsrv) < 0) + goto error; + } +#endif + + return 0; + +error: + qemu_del_net_client(nc); + return -1; +} + +static SlirpState *slirp_lookup(Monitor *mon, const char *vlan, + const char *stack) +{ + + if (vlan) { + NetClientState *nc; + nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack); + if (!nc) { + monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n"); + return NULL; + } + if (strcmp(nc->model, "user")) { + monitor_printf(mon, "invalid device specified\n"); + return NULL; + } + return DO_UPCAST(SlirpState, nc, nc); + } else { + if (QTAILQ_EMPTY(&slirp_stacks)) { + monitor_printf(mon, "user mode network stack not in use\n"); + return NULL; + } + return QTAILQ_FIRST(&slirp_stacks); + } +} + +void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict) +{ + struct in_addr host_addr = { .s_addr = INADDR_ANY }; + int host_port; + char buf[256]; + const char *src_str, *p; + SlirpState *s; + int is_udp = 0; + int err; + const char *arg1 = qdict_get_str(qdict, "arg1"); + const char *arg2 = qdict_get_try_str(qdict, "arg2"); + const char *arg3 = qdict_get_try_str(qdict, "arg3"); + + if (arg2) { + s = slirp_lookup(mon, arg1, arg2); + src_str = arg3; + } else { + s = slirp_lookup(mon, NULL, NULL); + src_str = arg1; + } + if (!s) { + return; + } + + p = src_str; + if (!p || get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + + if (!strcmp(buf, "tcp") || buf[0] == '\0') { + is_udp = 0; + } else if (!strcmp(buf, "udp")) { + is_udp = 1; + } else { + goto fail_syntax; + } + + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (buf[0] != '\0' && !inet_aton(buf, &host_addr)) { + goto fail_syntax; + } + + host_port = atoi(p); + + err = slirp_remove_hostfwd(s->slirp, is_udp, host_addr, host_port); + + monitor_printf(mon, "host forwarding rule for %s %s\n", src_str, + err ? "not found" : "removed"); + return; + + fail_syntax: + monitor_printf(mon, "invalid format\n"); +} + +static int slirp_hostfwd(SlirpState *s, const char *redir_str, + int legacy_format) +{ + struct in_addr host_addr = { .s_addr = INADDR_ANY }; + struct in_addr guest_addr = { .s_addr = 0 }; + int host_port, guest_port; + const char *p; + char buf[256]; + int is_udp; + char *end; + + p = redir_str; + if (!p || get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (!strcmp(buf, "tcp") || buf[0] == '\0') { + is_udp = 0; + } else if (!strcmp(buf, "udp")) { + is_udp = 1; + } else { + goto fail_syntax; + } + + if (!legacy_format) { + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (buf[0] != '\0' && !inet_aton(buf, &host_addr)) { + goto fail_syntax; + } + } + + if (get_str_sep(buf, sizeof(buf), &p, legacy_format ? ':' : '-') < 0) { + goto fail_syntax; + } + host_port = strtol(buf, &end, 0); + if (*end != '\0' || host_port < 1 || host_port > 65535) { + goto fail_syntax; + } + + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (buf[0] != '\0' && !inet_aton(buf, &guest_addr)) { + goto fail_syntax; + } + + guest_port = strtol(p, &end, 0); + if (*end != '\0' || guest_port < 1 || guest_port > 65535) { + goto fail_syntax; + } + + if (slirp_add_hostfwd(s->slirp, is_udp, host_addr, host_port, guest_addr, + guest_port) < 0) { + error_report("could not set up host forwarding rule '%s'", + redir_str); + return -1; + } + return 0; + + fail_syntax: + error_report("invalid host forwarding rule '%s'", redir_str); + return -1; +} + +void hmp_hostfwd_add(Monitor *mon, const QDict *qdict) +{ + const char *redir_str; + SlirpState *s; + const char *arg1 = qdict_get_str(qdict, "arg1"); + const char *arg2 = qdict_get_try_str(qdict, "arg2"); + const char *arg3 = qdict_get_try_str(qdict, "arg3"); + + if (arg2) { + s = slirp_lookup(mon, arg1, arg2); + redir_str = arg3; + } else { + s = slirp_lookup(mon, NULL, NULL); + redir_str = arg1; + } + if (s) { + slirp_hostfwd(s, redir_str, 0); + } + +} + +int net_slirp_redir(const char *redir_str) +{ + struct slirp_config_str *config; + + if (QTAILQ_EMPTY(&slirp_stacks)) { + config = g_malloc(sizeof(*config)); + pstrcpy(config->str, sizeof(config->str), redir_str); + config->flags = SLIRP_CFG_HOSTFWD | SLIRP_CFG_LEGACY; + config->next = slirp_configs; + slirp_configs = config; + return 0; + } + + return slirp_hostfwd(QTAILQ_FIRST(&slirp_stacks), redir_str, 1); +} + +#ifndef _WIN32 + +/* automatic user mode samba server configuration */ +static void slirp_smb_cleanup(SlirpState *s) +{ + char cmd[128]; + int ret; + + if (s->smb_dir[0] != '\0') { + snprintf(cmd, sizeof(cmd), "rm -rf %s", s->smb_dir); + ret = system(cmd); + if (ret == -1 || !WIFEXITED(ret)) { + error_report("'%s' failed.", cmd); + } else if (WEXITSTATUS(ret)) { + error_report("'%s' failed. Error code: %d", + cmd, WEXITSTATUS(ret)); + } + s->smb_dir[0] = '\0'; + } +} + +static int slirp_smb(SlirpState* s, const char *exported_dir, + struct in_addr vserver_addr) +{ + char smb_conf[128]; + char smb_cmdline[128]; + struct passwd *passwd; + FILE *f; + + passwd = getpwuid(geteuid()); + if (!passwd) { + error_report("failed to retrieve user name"); + return -1; + } + + if (access(CONFIG_SMBD_COMMAND, F_OK)) { + error_report("could not find '%s', please install it", + CONFIG_SMBD_COMMAND); + return -1; + } + + if (access(exported_dir, R_OK | X_OK)) { + error_report("error accessing shared directory '%s': %s", + exported_dir, strerror(errno)); + return -1; + } + + snprintf(s->smb_dir, sizeof(s->smb_dir), "/tmp/qemu-smb.XXXXXX"); + if (!mkdtemp(s->smb_dir)) { + error_report("could not create samba server dir '%s'", s->smb_dir); + s->smb_dir[0] = 0; + return -1; + } + snprintf(smb_conf, sizeof(smb_conf), "%s/%s", s->smb_dir, "smb.conf"); + + f = fopen(smb_conf, "w"); + if (!f) { + slirp_smb_cleanup(s); + error_report("could not create samba server configuration file '%s'", + smb_conf); + return -1; + } + fprintf(f, + "[global]\n" + "private dir=%s\n" + "interfaces=127.0.0.1\n" + "bind interfaces only=yes\n" + "pid directory=%s\n" + "lock directory=%s\n" + "state directory=%s\n" + "cache directory=%s\n" + "ncalrpc dir=%s/ncalrpc\n" + "log file=%s/log.smbd\n" + "smb passwd file=%s/smbpasswd\n" + "security = user\n" + "map to guest = Bad User\n" + "load printers = no\n" + "printing = bsd\n" + "disable spoolss = yes\n" + "usershare max shares = 0\n" + "[qemu]\n" + "path=%s\n" + "read only=no\n" + "guest ok=yes\n" + "force user=%s\n", + s->smb_dir, + s->smb_dir, + s->smb_dir, + s->smb_dir, + s->smb_dir, + s->smb_dir, + s->smb_dir, + s->smb_dir, + exported_dir, + passwd->pw_name + ); + fclose(f); + + snprintf(smb_cmdline, sizeof(smb_cmdline), "%s -l %s -s %s", + CONFIG_SMBD_COMMAND, s->smb_dir, smb_conf); + + if (slirp_add_exec(s->slirp, 0, smb_cmdline, &vserver_addr, 139) < 0 || + slirp_add_exec(s->slirp, 0, smb_cmdline, &vserver_addr, 445) < 0) { + slirp_smb_cleanup(s); + error_report("conflicting/invalid smbserver address"); + return -1; + } + return 0; +} + +/* automatic user mode samba server configuration (legacy interface) */ +int net_slirp_smb(const char *exported_dir) +{ + struct in_addr vserver_addr = { .s_addr = 0 }; + + if (legacy_smb_export) { + fprintf(stderr, "-smb given twice\n"); + return -1; + } + legacy_smb_export = exported_dir; + if (!QTAILQ_EMPTY(&slirp_stacks)) { + return slirp_smb(QTAILQ_FIRST(&slirp_stacks), exported_dir, + vserver_addr); + } + return 0; +} + +#endif /* !defined(_WIN32) */ + +struct GuestFwd { + CharDriverState *hd; + struct in_addr server; + int port; + Slirp *slirp; +}; + +static int guestfwd_can_read(void *opaque) +{ + struct GuestFwd *fwd = opaque; + return slirp_socket_can_recv(fwd->slirp, fwd->server, fwd->port); +} + +static void guestfwd_read(void *opaque, const uint8_t *buf, int size) +{ + struct GuestFwd *fwd = opaque; + slirp_socket_recv(fwd->slirp, fwd->server, fwd->port, buf, size); +} + +static int slirp_guestfwd(SlirpState *s, const char *config_str, + int legacy_format) +{ + struct in_addr server = { .s_addr = 0 }; + struct GuestFwd *fwd; + const char *p; + char buf[128]; + char *end; + int port; + + p = config_str; + if (legacy_format) { + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + } else { + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (strcmp(buf, "tcp") && buf[0] != '\0') { + goto fail_syntax; + } + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + goto fail_syntax; + } + if (buf[0] != '\0' && !inet_aton(buf, &server)) { + goto fail_syntax; + } + if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) { + goto fail_syntax; + } + } + port = strtol(buf, &end, 10); + if (*end != '\0' || port < 1 || port > 65535) { + goto fail_syntax; + } + + snprintf(buf, sizeof(buf), "guestfwd.tcp.%d", port); + + if ((strlen(p) > 4) && !strncmp(p, "cmd:", 4)) { + if (slirp_add_exec(s->slirp, 0, &p[4], &server, port) < 0) { + error_report("conflicting/invalid host:port in guest forwarding " + "rule '%s'", config_str); + return -1; + } + } else { + fwd = g_new(struct GuestFwd, 1); + fwd->hd = qemu_chr_new(buf, p, NULL); + if (!fwd->hd) { + error_report("could not open guest forwarding device '%s'", buf); + g_free(fwd); + return -1; + } + + if (slirp_add_exec(s->slirp, 3, fwd->hd, &server, port) < 0) { + error_report("conflicting/invalid host:port in guest forwarding " + "rule '%s'", config_str); + g_free(fwd); + return -1; + } + fwd->server = server; + fwd->port = port; + fwd->slirp = s->slirp; + + qemu_chr_fe_claim_no_fail(fwd->hd); + qemu_chr_add_handlers(fwd->hd, guestfwd_can_read, guestfwd_read, + NULL, fwd); + } + return 0; + + fail_syntax: + error_report("invalid guest forwarding rule '%s'", config_str); + return -1; +} + +void hmp_info_usernet(Monitor *mon, const QDict *qdict) +{ + SlirpState *s; + + QTAILQ_FOREACH(s, &slirp_stacks, entry) { + int id; + bool got_vlan_id = net_hub_id_for_client(&s->nc, &id) == 0; + monitor_printf(mon, "VLAN %d (%s):\n", + got_vlan_id ? id : -1, + s->nc.name); + slirp_connection_info(s->slirp, mon); + } +} + +static void +net_init_slirp_configs(const StringList *fwd, int flags) +{ + while (fwd) { + struct slirp_config_str *config; + + config = g_malloc0(sizeof(*config)); + pstrcpy(config->str, sizeof(config->str), fwd->value->str); + config->flags = flags; + config->next = slirp_configs; + slirp_configs = config; + + fwd = fwd->next; + } +} + +static const char **slirp_dnssearch(const StringList *dnsname) +{ + const StringList *c = dnsname; + size_t i = 0, num_opts = 0; + const char **ret; + + while (c) { + num_opts++; + c = c->next; + } + + if (num_opts == 0) { + return NULL; + } + + ret = g_malloc((num_opts + 1) * sizeof(*ret)); + c = dnsname; + while (c) { + ret[i++] = c->value->str; + c = c->next; + } + ret[i] = NULL; + return ret; +} + +int net_init_slirp(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + /* FIXME error_setg(errp, ...) on failure */ + struct slirp_config_str *config; + char *vnet; + int ret; + const NetdevUserOptions *user; + const char **dnssearch; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER); + user = opts->u.user; + + vnet = user->has_net ? g_strdup(user->net) : + user->has_ip ? g_strdup_printf("%s/24", user->ip) : + NULL; + + dnssearch = slirp_dnssearch(user->dnssearch); + + /* all optional fields are initialized to "all bits zero" */ + + net_init_slirp_configs(user->hostfwd, SLIRP_CFG_HOSTFWD); + net_init_slirp_configs(user->guestfwd, 0); + + ret = net_slirp_init(peer, "user", name, user->q_restrict, vnet, + user->host, user->hostname, user->tftp, + user->bootfile, user->dhcpstart, user->dns, user->smb, + user->smbserver, dnssearch); + + while (slirp_configs) { + config = slirp_configs; + slirp_configs = config->next; + g_free(config); + } + + g_free(vnet); + g_free(dnssearch); + + return ret; +} + +int net_slirp_parse_legacy(QemuOptsList *opts_list, const char *optarg, int *ret) +{ + if (strcmp(opts_list->name, "net") != 0 || + strncmp(optarg, "channel,", strlen("channel,")) != 0) { + return 0; + } + + /* handle legacy -net channel,port:chr */ + optarg += strlen("channel,"); + + if (QTAILQ_EMPTY(&slirp_stacks)) { + struct slirp_config_str *config; + + config = g_malloc(sizeof(*config)); + pstrcpy(config->str, sizeof(config->str), optarg); + config->flags = SLIRP_CFG_LEGACY; + config->next = slirp_configs; + slirp_configs = config; + *ret = 0; + } else { + *ret = slirp_guestfwd(QTAILQ_FIRST(&slirp_stacks), optarg, 1); + } + + return 1; +} + diff --git a/src/net/socket.c b/src/net/socket.c new file mode 100644 index 0000000..e8605d4 --- /dev/null +++ b/src/net/socket.c @@ -0,0 +1,774 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "config-host.h" + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + +typedef struct NetSocketState { + NetClientState nc; + int listen_fd; + int fd; + int state; /* 0 = getting length, 1 = getting data */ + unsigned int index; + unsigned int packet_len; + unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ + uint8_t buf[NET_BUFSIZE]; + struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ + IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ +} NetSocketState; + +static void net_socket_accept(void *opaque); +static void net_socket_writable(void *opaque); + +static void net_socket_update_fd_handler(NetSocketState *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll ? s->send_fn : NULL, + s->write_poll ? net_socket_writable : NULL, + s); +} + +static void net_socket_read_poll(NetSocketState *s, bool enable) +{ + s->read_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_write_poll(NetSocketState *s, bool enable) +{ + s->write_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_writable(void *opaque) +{ + NetSocketState *s = opaque; + + net_socket_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - s->send_index; + ret = iov_send(s->fd, iov, 2, s->send_index, remaining); + + if (ret == -1 && errno == EAGAIN) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + s->send_index += ret; + net_socket_write_poll(s, true); + return 0; + } + s->send_index = 0; + return size; +} + +static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) +{ + NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + ssize_t ret; + + do { + ret = qemu_sendto(s->fd, buf, size, 0, + (struct sockaddr *)&s->dgram_dst, + sizeof(s->dgram_dst)); + } while (ret == -1 && errno == EINTR); + + if (ret == -1 && errno == EAGAIN) { + net_socket_write_poll(s, true); + return 0; + } + return ret; +} + +static void net_socket_send_completed(NetClientState *nc, ssize_t len) +{ + NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + + if (!s->read_poll) { + net_socket_read_poll(s, true); + } +} + +static void net_socket_send(void *opaque) +{ + NetSocketState *s = opaque; + int size, err; + unsigned l; + uint8_t buf1[NET_BUFSIZE]; + const uint8_t *buf; + + size = qemu_recv(s->fd, buf1, sizeof(buf1), 0); + if (size < 0) { + err = socket_error(); + if (err != EWOULDBLOCK) + goto eoc; + } else if (size == 0) { + /* end of connection */ + eoc: + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); + if (s->listen_fd != -1) { + qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); + } + closesocket(s->fd); + + s->fd = -1; + s->state = 0; + s->index = 0; + s->packet_len = 0; + s->nc.link_down = true; + memset(s->buf, 0, sizeof(s->buf)); + memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); + + return; + } + buf = buf1; + while (size > 0) { + /* reassemble a packet from the network */ + switch(s->state) { + case 0: + l = 4 - s->index; + if (l > size) + l = size; + memcpy(s->buf + s->index, buf, l); + buf += l; + size -= l; + s->index += l; + if (s->index == 4) { + /* got length */ + s->packet_len = ntohl(*(uint32_t *)s->buf); + s->index = 0; + s->state = 1; + } + break; + case 1: + l = s->packet_len - s->index; + if (l > size) + l = size; + if (s->index + l <= sizeof(s->buf)) { + memcpy(s->buf + s->index, buf, l); + } else { + fprintf(stderr, "serious error: oversized packet received," + "connection terminated.\n"); + s->state = 0; + goto eoc; + } + + s->index += l; + buf += l; + size -= l; + if (s->index >= s->packet_len) { + s->index = 0; + s->state = 0; + if (qemu_send_packet_async(&s->nc, s->buf, s->packet_len, + net_socket_send_completed) == 0) { + net_socket_read_poll(s, false); + break; + } + } + break; + } + } +} + +static void net_socket_send_dgram(void *opaque) +{ + NetSocketState *s = opaque; + int size; + + size = qemu_recv(s->fd, s->buf, sizeof(s->buf), 0); + if (size < 0) + return; + if (size == 0) { + /* end of connection */ + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); + return; + } + if (qemu_send_packet_async(&s->nc, s->buf, size, + net_socket_send_completed) == 0) { + net_socket_read_poll(s, false); + } +} + +static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr *localaddr) +{ + struct ip_mreq imr; + int fd; + int val, ret; +#ifdef __OpenBSD__ + unsigned char loop; +#else + int loop; +#endif + + if (!IN_MULTICAST(ntohl(mcastaddr->sin_addr.s_addr))) { + fprintf(stderr, "qemu: error: specified mcastaddr \"%s\" (0x%08x) " + "does not contain a multicast address\n", + inet_ntoa(mcastaddr->sin_addr), + (int)ntohl(mcastaddr->sin_addr.s_addr)); + return -1; + + } + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + perror("socket(PF_INET, SOCK_DGRAM)"); + return -1; + } + + /* Allow multiple sockets to bind the same multicast ip and port by setting + * SO_REUSEADDR. This is the only situation where SO_REUSEADDR should be set + * on windows. Use socket_set_fast_reuse otherwise as it sets SO_REUSEADDR + * only on posix systems. + */ + val = 1; + ret = qemu_setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + if (ret < 0) { + perror("setsockopt(SOL_SOCKET, SO_REUSEADDR)"); + goto fail; + } + + ret = bind(fd, (struct sockaddr *)mcastaddr, sizeof(*mcastaddr)); + if (ret < 0) { + perror("bind"); + goto fail; + } + + /* Add host to multicast group */ + imr.imr_multiaddr = mcastaddr->sin_addr; + if (localaddr) { + imr.imr_interface = *localaddr; + } else { + imr.imr_interface.s_addr = htonl(INADDR_ANY); + } + + ret = qemu_setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)); + if (ret < 0) { + perror("setsockopt(IP_ADD_MEMBERSHIP)"); + goto fail; + } + + /* Force mcast msgs to loopback (eg. several QEMUs in same host */ + loop = 1; + ret = qemu_setsockopt(fd, IPPROTO_IP, IP_MULTICAST_LOOP, + &loop, sizeof(loop)); + if (ret < 0) { + perror("setsockopt(SOL_IP, IP_MULTICAST_LOOP)"); + goto fail; + } + + /* If a bind address is given, only send packets from that address */ + if (localaddr != NULL) { + ret = qemu_setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, + localaddr, sizeof(*localaddr)); + if (ret < 0) { + perror("setsockopt(IP_MULTICAST_IF)"); + goto fail; + } + } + + qemu_set_nonblock(fd); + return fd; +fail: + if (fd >= 0) + closesocket(fd); + return -1; +} + +static void net_socket_cleanup(NetClientState *nc) +{ + NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + if (s->fd != -1) { + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); + close(s->fd); + s->fd = -1; + } + if (s->listen_fd != -1) { + qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); + closesocket(s->listen_fd); + s->listen_fd = -1; + } +} + +static NetClientInfo net_dgram_socket_info = { + .type = NET_CLIENT_OPTIONS_KIND_SOCKET, + .size = sizeof(NetSocketState), + .receive = net_socket_receive_dgram, + .cleanup = net_socket_cleanup, +}; + +static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, + const char *model, + const char *name, + int fd, int is_connected) +{ + struct sockaddr_in saddr; + int newfd; + socklen_t saddr_len = sizeof(saddr); + NetClientState *nc; + NetSocketState *s; + + /* fd passed: multicast: "learn" dgram_dst address from bound address and save it + * Because this may be "shared" socket from a "master" process, datagrams would be recv() + * by ONLY ONE process: we must "clone" this dgram socket --jjo + */ + + if (is_connected) { + if (getsockname(fd, (struct sockaddr *) &saddr, &saddr_len) == 0) { + /* must be bound */ + if (saddr.sin_addr.s_addr == 0) { + fprintf(stderr, "qemu: error: init_dgram: fd=%d unbound, " + "cannot setup multicast dst addr\n", fd); + goto err; + } + /* clone dgram socket */ + newfd = net_socket_mcast_create(&saddr, NULL); + if (newfd < 0) { + /* error already reported by net_socket_mcast_create() */ + goto err; + } + /* clone newfd to fd, close newfd */ + dup2(newfd, fd); + close(newfd); + + } else { + fprintf(stderr, + "qemu: error: init_dgram: fd=%d failed getsockname(): %s\n", + fd, strerror(errno)); + goto err; + } + } + + nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name); + + s = DO_UPCAST(NetSocketState, nc, nc); + + s->fd = fd; + s->listen_fd = -1; + s->send_fn = net_socket_send_dgram; + net_socket_read_poll(s, true); + + /* mcast: save bound address as dst */ + if (is_connected) { + s->dgram_dst = saddr; + snprintf(nc->info_str, sizeof(nc->info_str), + "socket: fd=%d (cloned mcast=%s:%d)", + fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + } else { + snprintf(nc->info_str, sizeof(nc->info_str), + "socket: fd=%d", fd); + } + + return s; + +err: + closesocket(fd); + return NULL; +} + +static void net_socket_connect(void *opaque) +{ + NetSocketState *s = opaque; + s->send_fn = net_socket_send; + net_socket_read_poll(s, true); +} + +static NetClientInfo net_socket_info = { + .type = NET_CLIENT_OPTIONS_KIND_SOCKET, + .size = sizeof(NetSocketState), + .receive = net_socket_receive, + .cleanup = net_socket_cleanup, +}; + +static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, + const char *model, + const char *name, + int fd, int is_connected) +{ + NetClientState *nc; + NetSocketState *s; + + nc = qemu_new_net_client(&net_socket_info, peer, model, name); + + snprintf(nc->info_str, sizeof(nc->info_str), "socket: fd=%d", fd); + + s = DO_UPCAST(NetSocketState, nc, nc); + + s->fd = fd; + s->listen_fd = -1; + + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + socket_set_nodelay(fd); + + if (is_connected) { + net_socket_connect(s); + } else { + qemu_set_fd_handler(s->fd, NULL, net_socket_connect, s); + } + return s; +} + +static NetSocketState *net_socket_fd_init(NetClientState *peer, + const char *model, const char *name, + int fd, int is_connected) +{ + int so_type = -1, optlen=sizeof(so_type); + + if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, + (socklen_t *)&optlen)< 0) { + fprintf(stderr, "qemu: error: getsockopt(SO_TYPE) for fd=%d failed\n", + fd); + closesocket(fd); + return NULL; + } + switch(so_type) { + case SOCK_DGRAM: + return net_socket_fd_init_dgram(peer, model, name, fd, is_connected); + case SOCK_STREAM: + return net_socket_fd_init_stream(peer, model, name, fd, is_connected); + default: + /* who knows ... this could be a eg. a pty, do warn and continue as stream */ + fprintf(stderr, "qemu: warning: socket type=%d for fd=%d is not SOCK_DGRAM or SOCK_STREAM\n", so_type, fd); + return net_socket_fd_init_stream(peer, model, name, fd, is_connected); + } + return NULL; +} + +static void net_socket_accept(void *opaque) +{ + NetSocketState *s = opaque; + struct sockaddr_in saddr; + socklen_t len; + int fd; + + for(;;) { + len = sizeof(saddr); + fd = qemu_accept(s->listen_fd, (struct sockaddr *)&saddr, &len); + if (fd < 0 && errno != EINTR) { + return; + } else if (fd >= 0) { + qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); + break; + } + } + + s->fd = fd; + s->nc.link_down = false; + net_socket_connect(s); + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "socket: connection from %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); +} + +static int net_socket_listen_init(NetClientState *peer, + const char *model, + const char *name, + const char *host_str) +{ + NetClientState *nc; + NetSocketState *s; + struct sockaddr_in saddr; + int fd, ret; + + if (parse_host_port(&saddr, host_str) < 0) + return -1; + + fd = qemu_socket(PF_INET, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + return -1; + } + qemu_set_nonblock(fd); + + socket_set_fast_reuse(fd); + + ret = bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + perror("bind"); + closesocket(fd); + return -1; + } + ret = listen(fd, 0); + if (ret < 0) { + perror("listen"); + closesocket(fd); + return -1; + } + + nc = qemu_new_net_client(&net_socket_info, peer, model, name); + s = DO_UPCAST(NetSocketState, nc, nc); + s->fd = -1; + s->listen_fd = fd; + s->nc.link_down = true; + + qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); + return 0; +} + +static int net_socket_connect_init(NetClientState *peer, + const char *model, + const char *name, + const char *host_str) +{ + NetSocketState *s; + int fd, connected, ret, err; + struct sockaddr_in saddr; + + if (parse_host_port(&saddr, host_str) < 0) + return -1; + + fd = qemu_socket(PF_INET, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + return -1; + } + qemu_set_nonblock(fd); + + connected = 0; + for(;;) { + ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + err = socket_error(); + if (err == EINTR || err == EWOULDBLOCK) { + } else if (err == EINPROGRESS) { + break; +#ifdef _WIN32 + } else if (err == WSAEALREADY || err == WSAEINVAL) { + break; +#endif + } else { + perror("connect"); + closesocket(fd); + return -1; + } + } else { + connected = 1; + break; + } + } + s = net_socket_fd_init(peer, model, name, fd, connected); + if (!s) + return -1; + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "socket: connect to %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + return 0; +} + +static int net_socket_mcast_init(NetClientState *peer, + const char *model, + const char *name, + const char *host_str, + const char *localaddr_str) +{ + NetSocketState *s; + int fd; + struct sockaddr_in saddr; + struct in_addr localaddr, *param_localaddr; + + if (parse_host_port(&saddr, host_str) < 0) + return -1; + + if (localaddr_str != NULL) { + if (inet_aton(localaddr_str, &localaddr) == 0) + return -1; + param_localaddr = &localaddr; + } else { + param_localaddr = NULL; + } + + fd = net_socket_mcast_create(&saddr, param_localaddr); + if (fd < 0) + return -1; + + s = net_socket_fd_init(peer, model, name, fd, 0); + if (!s) + return -1; + + s->dgram_dst = saddr; + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "socket: mcast=%s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + return 0; + +} + +static int net_socket_udp_init(NetClientState *peer, + const char *model, + const char *name, + const char *rhost, + const char *lhost) +{ + NetSocketState *s; + int fd, ret; + struct sockaddr_in laddr, raddr; + + if (parse_host_port(&laddr, lhost) < 0) { + return -1; + } + + if (parse_host_port(&raddr, rhost) < 0) { + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + perror("socket(PF_INET, SOCK_DGRAM)"); + return -1; + } + + ret = socket_set_fast_reuse(fd); + if (ret < 0) { + closesocket(fd); + return -1; + } + ret = bind(fd, (struct sockaddr *)&laddr, sizeof(laddr)); + if (ret < 0) { + perror("bind"); + closesocket(fd); + return -1; + } + qemu_set_nonblock(fd); + + s = net_socket_fd_init(peer, model, name, fd, 0); + if (!s) { + return -1; + } + + s->dgram_dst = raddr; + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "socket: udp=%s:%d", + inet_ntoa(raddr.sin_addr), ntohs(raddr.sin_port)); + return 0; +} + +int net_init_socket(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + /* FIXME error_setg(errp, ...) on failure */ + Error *err = NULL; + const NetdevSocketOptions *sock; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET); + sock = opts->u.socket; + + if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast + + sock->has_udp != 1) { + error_report("exactly one of fd=, listen=, connect=, mcast= or udp=" + " is required"); + return -1; + } + + if (sock->has_localaddr && !sock->has_mcast && !sock->has_udp) { + error_report("localaddr= is only valid with mcast= or udp="); + return -1; + } + + if (sock->has_fd) { + int fd; + + fd = monitor_fd_param(cur_mon, sock->fd, &err); + if (fd == -1) { + error_report_err(err); + return -1; + } + qemu_set_nonblock(fd); + if (!net_socket_fd_init(peer, "socket", name, fd, 1)) { + return -1; + } + return 0; + } + + if (sock->has_listen) { + if (net_socket_listen_init(peer, "socket", name, sock->listen) == -1) { + return -1; + } + return 0; + } + + if (sock->has_connect) { + if (net_socket_connect_init(peer, "socket", name, sock->connect) == + -1) { + return -1; + } + return 0; + } + + if (sock->has_mcast) { + /* if sock->localaddr is missing, it has been initialized to "all bits + * zero" */ + if (net_socket_mcast_init(peer, "socket", name, sock->mcast, + sock->localaddr) == -1) { + return -1; + } + return 0; + } + + assert(sock->has_udp); + if (!sock->has_localaddr) { + error_report("localaddr= is mandatory with udp="); + return -1; + } + if (net_socket_udp_init(peer, "socket", name, sock->udp, sock->localaddr) == + -1) { + return -1; + } + return 0; +} diff --git a/src/net/tap-aix.c b/src/net/tap-aix.c new file mode 100644 index 0000000..e84fc39 --- /dev/null +++ b/src/net/tap-aix.c @@ -0,0 +1,87 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" +#include <stdio.h> + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + error_setg(errp, "no tap on AIX"); + return -1; +} + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) +{ +} + +int tap_probe_vnet_hdr(int fd) +{ + return 0; +} + +int tap_probe_has_ufo(int fd) +{ + return 0; +} + +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + return -EINVAL; +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + return -EINVAL; +} + +void tap_fd_set_offload(int fd, int csum, int tso4, + int tso6, int ecn, int ufo) +{ +} + +int tap_fd_enable(int fd) +{ + return -1; +} + +int tap_fd_disable(int fd) +{ + return -1; +} + +int tap_fd_get_ifname(int fd, char *ifname) +{ + return -1; +} + diff --git a/src/net/tap-bsd.c b/src/net/tap-bsd.c new file mode 100644 index 0000000..0103a97 --- /dev/null +++ b/src/net/tap-bsd.c @@ -0,0 +1,255 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" +#include "qemu-common.h" +#include "sysemu/sysemu.h" +#include "qemu/error-report.h" + +#if defined(__NetBSD__) || defined(__FreeBSD__) +#include <sys/ioctl.h> +#include <net/if.h> +#include <net/if_tap.h> +#endif + +#ifndef __FreeBSD__ +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + int fd; +#ifdef TAPGIFNAME + struct ifreq ifr; +#else + char *dev; + struct stat s; +#endif + + /* if no ifname is given, always start the search from tap0/tun0. */ + int i; + char dname[100]; + + for (i = 0; i < 10; i++) { + if (*ifname) { + snprintf(dname, sizeof dname, "/dev/%s", ifname); + } else { +#if defined(__OpenBSD__) + snprintf(dname, sizeof dname, "/dev/tun%d", i); +#else + snprintf(dname, sizeof dname, "/dev/tap%d", i); +#endif + } + TFR(fd = open(dname, O_RDWR)); + if (fd >= 0) { + break; + } + else if (errno == ENXIO || errno == ENOENT) { + break; + } + if (*ifname) { + break; + } + } + if (fd < 0) { + error_setg_errno(errp, errno, "could not open %s", dname); + return -1; + } + +#ifdef TAPGIFNAME + if (ioctl(fd, TAPGIFNAME, (void *)&ifr) < 0) { + error_setg_errno(errp, errno, "could not get tap name"); + return -1; + } + pstrcpy(ifname, ifname_size, ifr.ifr_name); +#else + if (fstat(fd, &s) < 0) { + error_setg_errno(errp, errno, "could not stat %s", dname); + return -1; + } + dev = devname(s.st_rdev, S_IFCHR); + pstrcpy(ifname, ifname_size, dev); +#endif + + if (*vnet_hdr) { + /* BSD doesn't have IFF_VNET_HDR */ + *vnet_hdr = 0; + + if (vnet_hdr_required && !*vnet_hdr) { + error_setg(errp, "vnet_hdr=1 requested, but no kernel " + "support for IFF_VNET_HDR available"); + close(fd); + return -1; + } + } + fcntl(fd, F_SETFL, O_NONBLOCK); + return fd; +} + +#else /* __FreeBSD__ */ + +#define PATH_NET_TAP "/dev/tap" + +static int tap_open_clone(char *ifname, int ifname_size, Error **errp) +{ + int fd, s, ret; + struct ifreq ifr; + + TFR(fd = open(PATH_NET_TAP, O_RDWR)); + if (fd < 0) { + error_setg_errno(errp, errno, "could not open %s", PATH_NET_TAP); + return -1; + } + + memset(&ifr, 0, sizeof(ifr)); + + ret = ioctl(fd, TAPGIFNAME, (void *)&ifr); + if (ret < 0) { + error_setg_errno(errp, errno, "could not get tap interface name"); + close(fd); + return -1; + } + + if (ifname[0] != '\0') { + /* User requested the interface to have a specific name */ + s = socket(AF_LOCAL, SOCK_DGRAM, 0); + if (s < 0) { + error_setg_errno(errp, errno, + "could not open socket to set interface name"); + close(fd); + return -1; + } + ifr.ifr_data = ifname; + ret = ioctl(s, SIOCSIFNAME, (void *)&ifr); + close(s); + if (ret < 0) { + error_setg(errp, "could not set tap interface name"); + close(fd); + return -1; + } + } else { + pstrcpy(ifname, ifname_size, ifr.ifr_name); + } + + return fd; +} + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + int fd = -1; + + /* If the specified tap device already exists just use it. */ + if (ifname[0] != '\0') { + char dname[100]; + snprintf(dname, sizeof dname, "/dev/%s", ifname); + TFR(fd = open(dname, O_RDWR)); + if (fd < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "could not open %s", dname); + return -1; + } + } + + if (fd < 0) { + /* Tap device not specified or does not exist. */ + if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) { + return -1; + } + } + + if (*vnet_hdr) { + /* BSD doesn't have IFF_VNET_HDR */ + *vnet_hdr = 0; + + if (vnet_hdr_required && !*vnet_hdr) { + error_setg(errp, "vnet_hdr=1 requested, but no kernel " + "support for IFF_VNET_HDR available"); + goto error; + } + } + if (mq_required) { + error_setg(errp, "mq_required requested, but no kernel support" + " for IFF_MULTI_QUEUE available"); + goto error; + } + + fcntl(fd, F_SETFL, O_NONBLOCK); + return fd; + +error: + close(fd); + return -1; +} +#endif /* __FreeBSD__ */ + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) +{ +} + +int tap_probe_vnet_hdr(int fd) +{ + return 0; +} + +int tap_probe_has_ufo(int fd) +{ + return 0; +} + +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + return -EINVAL; +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + return -EINVAL; +} + +void tap_fd_set_offload(int fd, int csum, int tso4, + int tso6, int ecn, int ufo) +{ +} + +int tap_fd_enable(int fd) +{ + return -1; +} + +int tap_fd_disable(int fd) +{ + return -1; +} + +int tap_fd_get_ifname(int fd, char *ifname) +{ + return -1; +} diff --git a/src/net/tap-haiku.c b/src/net/tap-haiku.c new file mode 100644 index 0000000..2e738ec --- /dev/null +++ b/src/net/tap-haiku.c @@ -0,0 +1,86 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" +#include <stdio.h> + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + error_setg(errp, "no tap on Haiku"); + return -1; +} + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) +{ +} + +int tap_probe_vnet_hdr(int fd) +{ + return 0; +} + +int tap_probe_has_ufo(int fd) +{ + return 0; +} + +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + return -EINVAL; +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + return -EINVAL; +} + +void tap_fd_set_offload(int fd, int csum, int tso4, + int tso6, int ecn, int ufo) +{ +} + +int tap_fd_enable(int fd) +{ + return -1; +} + +int tap_fd_disable(int fd) +{ + return -1; +} + +int tap_fd_get_ifname(int fd, char *ifname) +{ + return -1; +} diff --git a/src/net/tap-linux.c b/src/net/tap-linux.c new file mode 100644 index 0000000..5bd9d21 --- /dev/null +++ b/src/net/tap-linux.c @@ -0,0 +1,314 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" +#include "tap-linux.h" +#include "net/tap.h" + +#include <net/if.h> +#include <sys/ioctl.h> + +#include "sysemu/sysemu.h" +#include "qemu-common.h" +#include "qemu/error-report.h" + +#define PATH_NET_TUN "/dev/net/tun" + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + struct ifreq ifr; + int fd, ret; + int len = sizeof(struct virtio_net_hdr); + unsigned int features; + + TFR(fd = open(PATH_NET_TUN, O_RDWR)); + if (fd < 0) { + error_setg_errno(errp, errno, "could not open %s", PATH_NET_TUN); + return -1; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl(fd, TUNGETFEATURES, &features) == -1) { + error_report("warning: TUNGETFEATURES failed: %s", strerror(errno)); + features = 0; + } + + if (features & IFF_ONE_QUEUE) { + ifr.ifr_flags |= IFF_ONE_QUEUE; + } + + if (*vnet_hdr) { + if (features & IFF_VNET_HDR) { + *vnet_hdr = 1; + ifr.ifr_flags |= IFF_VNET_HDR; + } else { + *vnet_hdr = 0; + } + + if (vnet_hdr_required && !*vnet_hdr) { + error_setg(errp, "vnet_hdr=1 requested, but no kernel " + "support for IFF_VNET_HDR available"); + close(fd); + return -1; + } + /* + * Make sure vnet header size has the default value: for a persistent + * tap it might have been modified e.g. by another instance of qemu. + * Ignore errors since old kernels do not support this ioctl: in this + * case the header size implicitly has the correct value. + */ + ioctl(fd, TUNSETVNETHDRSZ, &len); + } + + if (mq_required) { + if (!(features & IFF_MULTI_QUEUE)) { + error_setg(errp, "multiqueue required, but no kernel " + "support for IFF_MULTI_QUEUE available"); + close(fd); + return -1; + } else { + ifr.ifr_flags |= IFF_MULTI_QUEUE; + } + } + + if (ifname[0] != '\0') + pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); + else + pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d"); + ret = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (ret != 0) { + if (ifname[0] != '\0') { + error_setg_errno(errp, errno, "could not configure %s (%s)", + PATH_NET_TUN, ifr.ifr_name); + } else { + error_setg_errno(errp, errno, "could not configure %s", + PATH_NET_TUN); + } + close(fd); + return -1; + } + pstrcpy(ifname, ifname_size, ifr.ifr_name); + fcntl(fd, F_SETFL, O_NONBLOCK); + return fd; +} + +/* sndbuf implements a kind of flow control for tap. + * Unfortunately when it's enabled, and packets are sent + * to other guests on the same host, the receiver + * can lock up the transmitter indefinitely. + * + * To avoid packet loss, sndbuf should be set to a value lower than the tx + * queue capacity of any destination network interface. + * Ethernet NICs generally have txqueuelen=1000, so 1Mb is + * a good value, given a 1500 byte MTU. + */ +#define TAP_DEFAULT_SNDBUF 0 + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) +{ + int sndbuf; + + sndbuf = !tap->has_sndbuf ? TAP_DEFAULT_SNDBUF : + tap->sndbuf > INT_MAX ? INT_MAX : + tap->sndbuf; + + if (!sndbuf) { + sndbuf = INT_MAX; + } + + if (ioctl(fd, TUNSETSNDBUF, &sndbuf) == -1 && tap->has_sndbuf) { + error_setg_errno(errp, errno, "TUNSETSNDBUF ioctl failed"); + } +} + +int tap_probe_vnet_hdr(int fd) +{ + struct ifreq ifr; + + if (ioctl(fd, TUNGETIFF, &ifr) != 0) { + error_report("TUNGETIFF ioctl() failed: %s", strerror(errno)); + return 0; + } + + return ifr.ifr_flags & IFF_VNET_HDR; +} + +int tap_probe_has_ufo(int fd) +{ + unsigned offload; + + offload = TUN_F_CSUM | TUN_F_UFO; + + if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) + return 0; + + return 1; +} + +/* Verify that we can assign given length */ +int tap_probe_vnet_hdr_len(int fd, int len) +{ + int orig; + if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) { + return 0; + } + if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { + return 0; + } + /* Restore original length: we can't handle failure. */ + if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) { + fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n", + strerror(errno)); + abort(); + return -errno; + } + return 1; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ + if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { + fprintf(stderr, "TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n", + strerror(errno)); + abort(); + } +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + int arg = is_le ? 1 : 0; + + if (!ioctl(fd, TUNSETVNETLE, &arg)) { + return 0; + } + + /* Check if our kernel supports TUNSETVNETLE */ + if (errno == EINVAL) { + return -errno; + } + + error_report("TUNSETVNETLE ioctl() failed: %s.", strerror(errno)); + abort(); +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + int arg = is_be ? 1 : 0; + + if (!ioctl(fd, TUNSETVNETBE, &arg)) { + return 0; + } + + /* Check if our kernel supports TUNSETVNETBE */ + if (errno == EINVAL) { + return -errno; + } + + error_report("TUNSETVNETBE ioctl() failed: %s.", strerror(errno)); + abort(); +} + +void tap_fd_set_offload(int fd, int csum, int tso4, + int tso6, int ecn, int ufo) +{ + unsigned int offload = 0; + + /* Check if our kernel supports TUNSETOFFLOAD */ + if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) { + return; + } + + if (csum) { + offload |= TUN_F_CSUM; + if (tso4) + offload |= TUN_F_TSO4; + if (tso6) + offload |= TUN_F_TSO6; + if ((tso4 || tso6) && ecn) + offload |= TUN_F_TSO_ECN; + if (ufo) + offload |= TUN_F_UFO; + } + + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { + offload &= ~TUN_F_UFO; + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { + fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n", + strerror(errno)); + } + } +} + +/* Enable a specific queue of tap. */ +int tap_fd_enable(int fd) +{ + struct ifreq ifr; + int ret; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_ATTACH_QUEUE; + ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr); + + if (ret != 0) { + error_report("could not enable queue"); + } + + return ret; +} + +/* Disable a specific queue of tap/ */ +int tap_fd_disable(int fd) +{ + struct ifreq ifr; + int ret; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_DETACH_QUEUE; + ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr); + + if (ret != 0) { + error_report("could not disable queue"); + } + + return ret; +} + +int tap_fd_get_ifname(int fd, char *ifname) +{ + struct ifreq ifr; + + if (ioctl(fd, TUNGETIFF, &ifr) != 0) { + error_report("TUNGETIFF ioctl() failed: %s", + strerror(errno)); + return -1; + } + + pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name); + return 0; +} diff --git a/src/net/tap-linux.h b/src/net/tap-linux.h new file mode 100644 index 0000000..01dc6f8 --- /dev/null +++ b/src/net/tap-linux.h @@ -0,0 +1,54 @@ +/* + * Universal TUN/TAP device driver. + * Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef QEMU_TAP_LINUX_H +#define QEMU_TAP_LINUX_H + +#include <stdint.h> +#ifdef __linux__ + +#include <linux/ioctl.h> + +/* Ioctl defines */ +#define TUNSETIFF _IOW('T', 202, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNSETVNETBE _IOW('T', 222, int) + +#endif + +/* TUNSETIFF ifr flags */ +#define IFF_TAP 0x0002 +#define IFF_NO_PI 0x1000 +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ + +#endif /* QEMU_TAP_H */ diff --git a/src/net/tap-solaris.c b/src/net/tap-solaris.c new file mode 100644 index 0000000..0f60f78 --- /dev/null +++ b/src/net/tap-solaris.c @@ -0,0 +1,254 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" +#include "sysemu/sysemu.h" + +#include <sys/stat.h> +#include <sys/ethernet.h> +#include <sys/sockio.h> +#include <netinet/arp.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> // must come after ip.h +#include <netinet/udp.h> +#include <netinet/tcp.h> +#include <net/if.h> +#include <stropts.h> +#include "qemu/error-report.h" + +ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) +{ + struct strbuf sbuf; + int f = 0; + + sbuf.maxlen = maxlen; + sbuf.buf = (char *)buf; + + return getmsg(tapfd, NULL, &sbuf, &f) >= 0 ? sbuf.len : -1; +} + +#define TUNNEWPPA (('T'<<16) | 0x0001) +/* + * Allocate TAP device, returns opened fd. + * Stores dev name in the first arg(must be large enough). + */ +static int tap_alloc(char *dev, size_t dev_size, Error **errp) +{ + /* FIXME leaks like a sieve on error paths */ + /* FIXME suspicious: many errors are reported, then ignored */ + int tap_fd, if_fd, ppa = -1; + static int ip_fd = 0; + char *ptr; + + static int arp_fd = 0; + int ip_muxid, arp_muxid; + struct strioctl strioc_if, strioc_ppa; + int link_type = I_PLINK; + struct lifreq ifr; + char actual_name[32] = ""; + + memset(&ifr, 0x0, sizeof(ifr)); + + if( *dev ){ + ptr = dev; + while( *ptr && !qemu_isdigit((int)*ptr) ) ptr++; + ppa = atoi(ptr); + } + + /* Check if IP device was opened */ + if( ip_fd ) + close(ip_fd); + + TFR(ip_fd = open("/dev/udp", O_RDWR, 0)); + if (ip_fd < 0) { + error_setg(errp, "Can't open /dev/ip (actually /dev/udp)"); + return -1; + } + + TFR(tap_fd = open("/dev/tap", O_RDWR, 0)); + if (tap_fd < 0) { + error_setg(errp, "Can't open /dev/tap"); + return -1; + } + + /* Assign a new PPA and get its unit number. */ + strioc_ppa.ic_cmd = TUNNEWPPA; + strioc_ppa.ic_timout = 0; + strioc_ppa.ic_len = sizeof(ppa); + strioc_ppa.ic_dp = (char *)&ppa; + if ((ppa = ioctl (tap_fd, I_STR, &strioc_ppa)) < 0) + error_report("Can't assign new interface"); + + TFR(if_fd = open("/dev/tap", O_RDWR, 0)); + if (if_fd < 0) { + error_setg(errp, "Can't open /dev/tap (2)"); + return -1; + } + if(ioctl(if_fd, I_PUSH, "ip") < 0){ + error_setg(errp, "Can't push IP module"); + return -1; + } + + if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) < 0) + error_report("Can't get flags"); + + snprintf (actual_name, 32, "tap%d", ppa); + pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name); + + ifr.lifr_ppa = ppa; + /* Assign ppa according to the unit number returned by tun device */ + + if (ioctl (if_fd, SIOCSLIFNAME, &ifr) < 0) + error_report("Can't set PPA %d", ppa); + if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) <0) + error_report("Can't get flags"); + /* Push arp module to if_fd */ + if (ioctl (if_fd, I_PUSH, "arp") < 0) + error_report("Can't push ARP module (2)"); + + /* Push arp module to ip_fd */ + if (ioctl (ip_fd, I_POP, NULL) < 0) + error_report("I_POP failed"); + if (ioctl (ip_fd, I_PUSH, "arp") < 0) + error_report("Can't push ARP module (3)"); + /* Open arp_fd */ + TFR(arp_fd = open ("/dev/tap", O_RDWR, 0)); + if (arp_fd < 0) + error_report("Can't open %s", "/dev/tap"); + + /* Set ifname to arp */ + strioc_if.ic_cmd = SIOCSLIFNAME; + strioc_if.ic_timout = 0; + strioc_if.ic_len = sizeof(ifr); + strioc_if.ic_dp = (char *)𝔦 + if (ioctl(arp_fd, I_STR, &strioc_if) < 0){ + error_report("Can't set ifname to arp"); + } + + if((ip_muxid = ioctl(ip_fd, I_LINK, if_fd)) < 0){ + error_setg(errp, "Can't link TAP device to IP"); + return -1; + } + + if ((arp_muxid = ioctl (ip_fd, link_type, arp_fd)) < 0) + error_report("Can't link TAP device to ARP"); + + close (if_fd); + + memset(&ifr, 0x0, sizeof(ifr)); + pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name); + ifr.lifr_ip_muxid = ip_muxid; + ifr.lifr_arp_muxid = arp_muxid; + + if (ioctl (ip_fd, SIOCSLIFMUXID, &ifr) < 0) + { + ioctl (ip_fd, I_PUNLINK , arp_muxid); + ioctl (ip_fd, I_PUNLINK, ip_muxid); + error_report("Can't set multiplexor id"); + } + + snprintf(dev, dev_size, "tap%d", ppa); + return tap_fd; +} + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + char dev[10]=""; + int fd; + + fd = tap_alloc(dev, sizeof(dev), errp); + if (fd < 0) { + return -1; + } + pstrcpy(ifname, ifname_size, dev); + if (*vnet_hdr) { + /* Solaris doesn't have IFF_VNET_HDR */ + *vnet_hdr = 0; + + if (vnet_hdr_required && !*vnet_hdr) { + error_setg(errp, "vnet_hdr=1 requested, but no kernel " + "support for IFF_VNET_HDR available"); + close(fd); + return -1; + } + } + fcntl(fd, F_SETFL, O_NONBLOCK); + return fd; +} + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) +{ +} + +int tap_probe_vnet_hdr(int fd) +{ + return 0; +} + +int tap_probe_has_ufo(int fd) +{ + return 0; +} + +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + return -EINVAL; +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + return -EINVAL; +} + +void tap_fd_set_offload(int fd, int csum, int tso4, + int tso6, int ecn, int ufo) +{ +} + +int tap_fd_enable(int fd) +{ + return -1; +} + +int tap_fd_disable(int fd) +{ + return -1; +} + +int tap_fd_get_ifname(int fd, char *ifname) +{ + return -1; +} diff --git a/src/net/tap-win32.c b/src/net/tap-win32.c new file mode 100644 index 0000000..7fddb20 --- /dev/null +++ b/src/net/tap-win32.c @@ -0,0 +1,820 @@ +/* + * TAP-Win32 -- A kernel driver to provide virtual tap device functionality + * on Windows. Originally derived from the CIPE-Win32 + * project by Damion K. Wilson, with extensive modifications by + * James Yonan. + * + * All source code which derives from the CIPE-Win32 project is + * Copyright (C) Damion K. Wilson, 2003, and is released under the + * GPL version 2 (see below). + * + * All other source code is Copyright (C) James Yonan, 2003-2004, + * and is released under the GPL version 2 (see below). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (see the file COPYING included with this + * distribution); if not, see <http://www.gnu.org/licenses/>. + */ + +#include "tap_int.h" + +#include "qemu-common.h" +#include "clients.h" /* net_init_tap */ +#include "net/net.h" +#include "net/tap.h" /* tap_has_ufo, ... */ +#include "sysemu/sysemu.h" +#include "qemu/error-report.h" +#include <stdio.h> +#include <windows.h> +#include <winioctl.h> + +//============= +// TAP IOCTLs +//============= + +#define TAP_CONTROL_CODE(request,method) \ + CTL_CODE (FILE_DEVICE_UNKNOWN, request, method, FILE_ANY_ACCESS) + +#define TAP_IOCTL_GET_MAC TAP_CONTROL_CODE (1, METHOD_BUFFERED) +#define TAP_IOCTL_GET_VERSION TAP_CONTROL_CODE (2, METHOD_BUFFERED) +#define TAP_IOCTL_GET_MTU TAP_CONTROL_CODE (3, METHOD_BUFFERED) +#define TAP_IOCTL_GET_INFO TAP_CONTROL_CODE (4, METHOD_BUFFERED) +#define TAP_IOCTL_CONFIG_POINT_TO_POINT TAP_CONTROL_CODE (5, METHOD_BUFFERED) +#define TAP_IOCTL_SET_MEDIA_STATUS TAP_CONTROL_CODE (6, METHOD_BUFFERED) +#define TAP_IOCTL_CONFIG_DHCP_MASQ TAP_CONTROL_CODE (7, METHOD_BUFFERED) +#define TAP_IOCTL_GET_LOG_LINE TAP_CONTROL_CODE (8, METHOD_BUFFERED) +#define TAP_IOCTL_CONFIG_DHCP_SET_OPT TAP_CONTROL_CODE (9, METHOD_BUFFERED) + +//================= +// Registry keys +//================= + +#define ADAPTER_KEY "SYSTEM\\CurrentControlSet\\Control\\Class\\{4D36E972-E325-11CE-BFC1-08002BE10318}" + +#define NETWORK_CONNECTIONS_KEY "SYSTEM\\CurrentControlSet\\Control\\Network\\{4D36E972-E325-11CE-BFC1-08002BE10318}" + +//====================== +// Filesystem prefixes +//====================== + +#define USERMODEDEVICEDIR "\\\\.\\Global\\" +#define TAPSUFFIX ".tap" + + +//====================== +// Compile time configuration +//====================== + +//#define DEBUG_TAP_WIN32 + +/* FIXME: The asynch write path appears to be broken at + * present. WriteFile() ignores the lpNumberOfBytesWritten parameter + * for overlapped writes, with the result we return zero bytes sent, + * and after handling a single packet, receive is disabled for this + * interface. */ +/* #define TUN_ASYNCHRONOUS_WRITES 1 */ + +#define TUN_BUFFER_SIZE 1560 +#define TUN_MAX_BUFFER_COUNT 32 + +/* + * The data member "buffer" must be the first element in the tun_buffer + * structure. See the function, tap_win32_free_buffer. + */ +typedef struct tun_buffer_s { + unsigned char buffer [TUN_BUFFER_SIZE]; + unsigned long read_size; + struct tun_buffer_s* next; +} tun_buffer_t; + +typedef struct tap_win32_overlapped { + HANDLE handle; + HANDLE read_event; + HANDLE write_event; + HANDLE output_queue_semaphore; + HANDLE free_list_semaphore; + HANDLE tap_semaphore; + CRITICAL_SECTION output_queue_cs; + CRITICAL_SECTION free_list_cs; + OVERLAPPED read_overlapped; + OVERLAPPED write_overlapped; + tun_buffer_t buffers[TUN_MAX_BUFFER_COUNT]; + tun_buffer_t* free_list; + tun_buffer_t* output_queue_front; + tun_buffer_t* output_queue_back; +} tap_win32_overlapped_t; + +static tap_win32_overlapped_t tap_overlapped; + +static tun_buffer_t* get_buffer_from_free_list(tap_win32_overlapped_t* const overlapped) +{ + tun_buffer_t* buffer = NULL; + WaitForSingleObject(overlapped->free_list_semaphore, INFINITE); + EnterCriticalSection(&overlapped->free_list_cs); + buffer = overlapped->free_list; +// assert(buffer != NULL); + overlapped->free_list = buffer->next; + LeaveCriticalSection(&overlapped->free_list_cs); + buffer->next = NULL; + return buffer; +} + +static void put_buffer_on_free_list(tap_win32_overlapped_t* const overlapped, tun_buffer_t* const buffer) +{ + EnterCriticalSection(&overlapped->free_list_cs); + buffer->next = overlapped->free_list; + overlapped->free_list = buffer; + LeaveCriticalSection(&overlapped->free_list_cs); + ReleaseSemaphore(overlapped->free_list_semaphore, 1, NULL); +} + +static tun_buffer_t* get_buffer_from_output_queue(tap_win32_overlapped_t* const overlapped, const int block) +{ + tun_buffer_t* buffer = NULL; + DWORD result, timeout = block ? INFINITE : 0L; + + // Non-blocking call + result = WaitForSingleObject(overlapped->output_queue_semaphore, timeout); + + switch (result) + { + // The semaphore object was signaled. + case WAIT_OBJECT_0: + EnterCriticalSection(&overlapped->output_queue_cs); + + buffer = overlapped->output_queue_front; + overlapped->output_queue_front = buffer->next; + + if(overlapped->output_queue_front == NULL) { + overlapped->output_queue_back = NULL; + } + + LeaveCriticalSection(&overlapped->output_queue_cs); + break; + + // Semaphore was nonsignaled, so a time-out occurred. + case WAIT_TIMEOUT: + // Cannot open another window. + break; + } + + return buffer; +} + +static tun_buffer_t* get_buffer_from_output_queue_immediate (tap_win32_overlapped_t* const overlapped) +{ + return get_buffer_from_output_queue(overlapped, 0); +} + +static void put_buffer_on_output_queue(tap_win32_overlapped_t* const overlapped, tun_buffer_t* const buffer) +{ + EnterCriticalSection(&overlapped->output_queue_cs); + + if(overlapped->output_queue_front == NULL && overlapped->output_queue_back == NULL) { + overlapped->output_queue_front = overlapped->output_queue_back = buffer; + } else { + buffer->next = NULL; + overlapped->output_queue_back->next = buffer; + overlapped->output_queue_back = buffer; + } + + LeaveCriticalSection(&overlapped->output_queue_cs); + + ReleaseSemaphore(overlapped->output_queue_semaphore, 1, NULL); +} + + +static int is_tap_win32_dev(const char *guid) +{ + HKEY netcard_key; + LONG status; + DWORD len; + int i = 0; + + status = RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + ADAPTER_KEY, + 0, + KEY_READ, + &netcard_key); + + if (status != ERROR_SUCCESS) { + return FALSE; + } + + for (;;) { + char enum_name[256]; + char unit_string[256]; + HKEY unit_key; + char component_id_string[] = "ComponentId"; + char component_id[256]; + char net_cfg_instance_id_string[] = "NetCfgInstanceId"; + char net_cfg_instance_id[256]; + DWORD data_type; + + len = sizeof (enum_name); + status = RegEnumKeyEx( + netcard_key, + i, + enum_name, + &len, + NULL, + NULL, + NULL, + NULL); + + if (status == ERROR_NO_MORE_ITEMS) + break; + else if (status != ERROR_SUCCESS) { + return FALSE; + } + + snprintf (unit_string, sizeof(unit_string), "%s\\%s", + ADAPTER_KEY, enum_name); + + status = RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + unit_string, + 0, + KEY_READ, + &unit_key); + + if (status != ERROR_SUCCESS) { + return FALSE; + } else { + len = sizeof (component_id); + status = RegQueryValueEx( + unit_key, + component_id_string, + NULL, + &data_type, + (LPBYTE)component_id, + &len); + + if (!(status != ERROR_SUCCESS || data_type != REG_SZ)) { + len = sizeof (net_cfg_instance_id); + status = RegQueryValueEx( + unit_key, + net_cfg_instance_id_string, + NULL, + &data_type, + (LPBYTE)net_cfg_instance_id, + &len); + + if (status == ERROR_SUCCESS && data_type == REG_SZ) { + if (/* !strcmp (component_id, TAP_COMPONENT_ID) &&*/ + !strcmp (net_cfg_instance_id, guid)) { + RegCloseKey (unit_key); + RegCloseKey (netcard_key); + return TRUE; + } + } + } + RegCloseKey (unit_key); + } + ++i; + } + + RegCloseKey (netcard_key); + return FALSE; +} + +static int get_device_guid( + char *name, + int name_size, + char *actual_name, + int actual_name_size) +{ + LONG status; + HKEY control_net_key; + DWORD len; + int i = 0; + int stop = 0; + + status = RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + NETWORK_CONNECTIONS_KEY, + 0, + KEY_READ, + &control_net_key); + + if (status != ERROR_SUCCESS) { + return -1; + } + + while (!stop) + { + char enum_name[256]; + char connection_string[256]; + HKEY connection_key; + char name_data[256]; + DWORD name_type; + const char name_string[] = "Name"; + + len = sizeof (enum_name); + status = RegEnumKeyEx( + control_net_key, + i, + enum_name, + &len, + NULL, + NULL, + NULL, + NULL); + + if (status == ERROR_NO_MORE_ITEMS) + break; + else if (status != ERROR_SUCCESS) { + return -1; + } + + snprintf(connection_string, + sizeof(connection_string), + "%s\\%s\\Connection", + NETWORK_CONNECTIONS_KEY, enum_name); + + status = RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + connection_string, + 0, + KEY_READ, + &connection_key); + + if (status == ERROR_SUCCESS) { + len = sizeof (name_data); + status = RegQueryValueEx( + connection_key, + name_string, + NULL, + &name_type, + (LPBYTE)name_data, + &len); + + if (status != ERROR_SUCCESS || name_type != REG_SZ) { + ++i; + continue; + } + else { + if (is_tap_win32_dev(enum_name)) { + snprintf(name, name_size, "%s", enum_name); + if (actual_name) { + if (strcmp(actual_name, "") != 0) { + if (strcmp(name_data, actual_name) != 0) { + RegCloseKey (connection_key); + ++i; + continue; + } + } + else { + snprintf(actual_name, actual_name_size, "%s", name_data); + } + } + stop = 1; + } + } + + RegCloseKey (connection_key); + } + ++i; + } + + RegCloseKey (control_net_key); + + if (stop == 0) + return -1; + + return 0; +} + +static int tap_win32_set_status(HANDLE handle, int status) +{ + unsigned long len = 0; + + return DeviceIoControl(handle, TAP_IOCTL_SET_MEDIA_STATUS, + &status, sizeof (status), + &status, sizeof (status), &len, NULL); +} + +static void tap_win32_overlapped_init(tap_win32_overlapped_t* const overlapped, const HANDLE handle) +{ + overlapped->handle = handle; + + overlapped->read_event = CreateEvent(NULL, FALSE, FALSE, NULL); + overlapped->write_event = CreateEvent(NULL, FALSE, FALSE, NULL); + + overlapped->read_overlapped.Offset = 0; + overlapped->read_overlapped.OffsetHigh = 0; + overlapped->read_overlapped.hEvent = overlapped->read_event; + + overlapped->write_overlapped.Offset = 0; + overlapped->write_overlapped.OffsetHigh = 0; + overlapped->write_overlapped.hEvent = overlapped->write_event; + + InitializeCriticalSection(&overlapped->output_queue_cs); + InitializeCriticalSection(&overlapped->free_list_cs); + + overlapped->output_queue_semaphore = CreateSemaphore( + NULL, // default security attributes + 0, // initial count + TUN_MAX_BUFFER_COUNT, // maximum count + NULL); // unnamed semaphore + + if(!overlapped->output_queue_semaphore) { + fprintf(stderr, "error creating output queue semaphore!\n"); + } + + overlapped->free_list_semaphore = CreateSemaphore( + NULL, // default security attributes + TUN_MAX_BUFFER_COUNT, // initial count + TUN_MAX_BUFFER_COUNT, // maximum count + NULL); // unnamed semaphore + + if(!overlapped->free_list_semaphore) { + fprintf(stderr, "error creating free list semaphore!\n"); + } + + overlapped->free_list = overlapped->output_queue_front = overlapped->output_queue_back = NULL; + + { + unsigned index; + for(index = 0; index < TUN_MAX_BUFFER_COUNT; index++) { + tun_buffer_t* element = &overlapped->buffers[index]; + element->next = overlapped->free_list; + overlapped->free_list = element; + } + } + /* To count buffers, initially no-signal. */ + overlapped->tap_semaphore = CreateSemaphore(NULL, 0, TUN_MAX_BUFFER_COUNT, NULL); + if(!overlapped->tap_semaphore) + fprintf(stderr, "error creating tap_semaphore.\n"); +} + +static int tap_win32_write(tap_win32_overlapped_t *overlapped, + const void *buffer, unsigned long size) +{ + unsigned long write_size; + BOOL result; + DWORD error; + +#ifdef TUN_ASYNCHRONOUS_WRITES + result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped, + &write_size, FALSE); + + if (!result && GetLastError() == ERROR_IO_INCOMPLETE) + WaitForSingleObject(overlapped->write_event, INFINITE); +#endif + + result = WriteFile(overlapped->handle, buffer, size, + &write_size, &overlapped->write_overlapped); + +#ifdef TUN_ASYNCHRONOUS_WRITES + /* FIXME: we can't sensibly set write_size here, without waiting + * for the IO to complete! Moreover, we can't return zero, + * because that will disable receive on this interface, and we + * also can't assume it will succeed and return the full size, + * because that will result in the buffer being reclaimed while + * the IO is in progress. */ +#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES. +#else /* !TUN_ASYNCHRONOUS_WRITES */ + if (!result) { + error = GetLastError(); + if (error == ERROR_IO_PENDING) { + result = GetOverlappedResult(overlapped->handle, + &overlapped->write_overlapped, + &write_size, TRUE); + } + } +#endif + + if (!result) { +#ifdef DEBUG_TAP_WIN32 + LPTSTR msgbuf; + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + &msgbuf, 0, NULL); + fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf); + LocalFree(msgbuf); +#endif + return 0; + } + + return write_size; +} + +static DWORD WINAPI tap_win32_thread_entry(LPVOID param) +{ + tap_win32_overlapped_t *overlapped = (tap_win32_overlapped_t*)param; + unsigned long read_size; + BOOL result; + DWORD dwError; + tun_buffer_t* buffer = get_buffer_from_free_list(overlapped); + + + for (;;) { + result = ReadFile(overlapped->handle, + buffer->buffer, + sizeof(buffer->buffer), + &read_size, + &overlapped->read_overlapped); + if (!result) { + dwError = GetLastError(); + if (dwError == ERROR_IO_PENDING) { + WaitForSingleObject(overlapped->read_event, INFINITE); + result = GetOverlappedResult( overlapped->handle, &overlapped->read_overlapped, + &read_size, FALSE); + if (!result) { +#ifdef DEBUG_TAP_WIN32 + LPVOID lpBuffer; + dwError = GetLastError(); + FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, dwError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) & lpBuffer, 0, NULL ); + fprintf(stderr, "Tap-Win32: Error GetOverlappedResult %d - %s\n", dwError, lpBuffer); + LocalFree( lpBuffer ); +#endif + } + } else { +#ifdef DEBUG_TAP_WIN32 + LPVOID lpBuffer; + FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, dwError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) & lpBuffer, 0, NULL ); + fprintf(stderr, "Tap-Win32: Error ReadFile %d - %s\n", dwError, lpBuffer); + LocalFree( lpBuffer ); +#endif + } + } + + if(read_size > 0) { + buffer->read_size = read_size; + put_buffer_on_output_queue(overlapped, buffer); + ReleaseSemaphore(overlapped->tap_semaphore, 1, NULL); + buffer = get_buffer_from_free_list(overlapped); + } + } + + return 0; +} + +static int tap_win32_read(tap_win32_overlapped_t *overlapped, + uint8_t **pbuf, int max_size) +{ + int size = 0; + + tun_buffer_t* buffer = get_buffer_from_output_queue_immediate(overlapped); + + if(buffer != NULL) { + *pbuf = buffer->buffer; + size = (int)buffer->read_size; + if(size > max_size) { + size = max_size; + } + } + + return size; +} + +static void tap_win32_free_buffer(tap_win32_overlapped_t *overlapped, + uint8_t *pbuf) +{ + tun_buffer_t* buffer = (tun_buffer_t*)pbuf; + put_buffer_on_free_list(overlapped, buffer); +} + +static int tap_win32_open(tap_win32_overlapped_t **phandle, + const char *preferred_name) +{ + char device_path[256]; + char device_guid[0x100]; + int rc; + HANDLE handle; + BOOL bret; + char name_buffer[0x100] = {0, }; + struct { + unsigned long major; + unsigned long minor; + unsigned long debug; + } version; + DWORD version_len; + DWORD idThread; + + if (preferred_name != NULL) { + snprintf(name_buffer, sizeof(name_buffer), "%s", preferred_name); + } + + rc = get_device_guid(device_guid, sizeof(device_guid), name_buffer, sizeof(name_buffer)); + if (rc) + return -1; + + snprintf (device_path, sizeof(device_path), "%s%s%s", + USERMODEDEVICEDIR, + device_guid, + TAPSUFFIX); + + handle = CreateFile ( + device_path, + GENERIC_READ | GENERIC_WRITE, + 0, + 0, + OPEN_EXISTING, + FILE_ATTRIBUTE_SYSTEM | FILE_FLAG_OVERLAPPED, + 0 ); + + if (handle == INVALID_HANDLE_VALUE) { + return -1; + } + + bret = DeviceIoControl(handle, TAP_IOCTL_GET_VERSION, + &version, sizeof (version), + &version, sizeof (version), &version_len, NULL); + + if (bret == FALSE) { + CloseHandle(handle); + return -1; + } + + if (!tap_win32_set_status(handle, TRUE)) { + return -1; + } + + tap_win32_overlapped_init(&tap_overlapped, handle); + + *phandle = &tap_overlapped; + + CreateThread(NULL, 0, tap_win32_thread_entry, + (LPVOID)&tap_overlapped, 0, &idThread); + return 0; +} + +/********************************************/ + + typedef struct TAPState { + NetClientState nc; + tap_win32_overlapped_t *handle; + } TAPState; + +static void tap_cleanup(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + qemu_del_wait_object(s->handle->tap_semaphore, NULL, NULL); + + /* FIXME: need to kill thread and close file handle: + tap_win32_close(s); + */ +} + +static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + return tap_win32_write(s->handle, buf, size); +} + +static void tap_win32_send(void *opaque) +{ + TAPState *s = opaque; + uint8_t *buf; + int max_size = 4096; + int size; + + size = tap_win32_read(s->handle, &buf, max_size); + if (size > 0) { + qemu_send_packet(&s->nc, buf, size); + tap_win32_free_buffer(s->handle, buf); + } +} + +static bool tap_has_ufo(NetClientState *nc) +{ + return false; +} + +static bool tap_has_vnet_hdr(NetClientState *nc) +{ + return false; +} + +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + +int tap_fd_set_vnet_le(int fd, int is_le) +{ + return -EINVAL; +} + +int tap_fd_set_vnet_be(int fd, int is_be) +{ + return -EINVAL; +} + +static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) +{ +} + +static void tap_set_offload(NetClientState *nc, int csum, int tso4, + int tso6, int ecn, int ufo) +{ +} + +struct vhost_net *tap_get_vhost_net(NetClientState *nc) +{ + return NULL; +} + +static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) +{ + return false; +} + +static void tap_set_vnet_hdr_len(NetClientState *nc, int len) +{ + abort(); +} + +static NetClientInfo net_tap_win32_info = { + .type = NET_CLIENT_OPTIONS_KIND_TAP, + .size = sizeof(TAPState), + .receive = tap_receive, + .cleanup = tap_cleanup, + .has_ufo = tap_has_ufo, + .has_vnet_hdr = tap_has_vnet_hdr, + .has_vnet_hdr_len = tap_has_vnet_hdr_len, + .using_vnet_hdr = tap_using_vnet_hdr, + .set_offload = tap_set_offload, + .set_vnet_hdr_len = tap_set_vnet_hdr_len, +}; + +static int tap_win32_init(NetClientState *peer, const char *model, + const char *name, const char *ifname) +{ + NetClientState *nc; + TAPState *s; + tap_win32_overlapped_t *handle; + + if (tap_win32_open(&handle, ifname) < 0) { + printf("tap: Could not open '%s'\n", ifname); + return -1; + } + + nc = qemu_new_net_client(&net_tap_win32_info, peer, model, name); + + s = DO_UPCAST(TAPState, nc, nc); + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "tap: ifname=%s", ifname); + + s->handle = handle; + + qemu_add_wait_object(s->handle->tap_semaphore, tap_win32_send, s); + + return 0; +} + +int net_init_tap(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + /* FIXME error_setg(errp, ...) on failure */ + const NetdevTapOptions *tap; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; + + if (!tap->has_ifname) { + error_report("tap: no interface name"); + return -1; + } + + if (tap_win32_init(peer, "tap", name, tap->ifname) == -1) { + return -1; + } + + return 0; +} + +int tap_enable(NetClientState *nc) +{ + abort(); +} + +int tap_disable(NetClientState *nc) +{ + abort(); +} diff --git a/src/net/tap.c b/src/net/tap.c new file mode 100644 index 0000000..85c4142 --- /dev/null +++ b/src/net/tap.c @@ -0,0 +1,930 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tap_int.h" + +#include "config-host.h" + +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <net/if.h> + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "sysemu/sysemu.h" +#include "qemu-common.h" +#include "qemu/error-report.h" + +#include "net/tap.h" + +#include "net/vhost_net.h" + +typedef struct TAPState { + NetClientState nc; + int fd; + char down_script[1024]; + char down_script_arg[128]; + uint8_t buf[NET_BUFSIZE]; + bool read_poll; + bool write_poll; + bool using_vnet_hdr; + bool has_ufo; + bool enabled; + VHostNetState *vhost_net; + unsigned host_vnet_hdr_len; +} TAPState; + +static void launch_script(const char *setup_script, const char *ifname, + int fd, Error **errp); + +static void tap_send(void *opaque); +static void tap_writable(void *opaque); + +static void tap_update_fd_handler(TAPState *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll && s->enabled ? tap_send : NULL, + s->write_poll && s->enabled ? tap_writable : NULL, + s); +} + +static void tap_read_poll(TAPState *s, bool enable) +{ + s->read_poll = enable; + tap_update_fd_handler(s); +} + +static void tap_write_poll(TAPState *s, bool enable) +{ + s->write_poll = enable; + tap_update_fd_handler(s); +} + +static void tap_writable(void *opaque) +{ + TAPState *s = opaque; + + tap_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) +{ + ssize_t len; + + do { + len = writev(s->fd, iov, iovcnt); + } while (len == -1 && errno == EINTR); + + if (len == -1 && errno == EAGAIN) { + tap_write_poll(s, true); + return 0; + } + + return len; +} + +static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, + int iovcnt) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + const struct iovec *iovp = iov; + struct iovec iov_copy[iovcnt + 1]; + struct virtio_net_hdr_mrg_rxbuf hdr = { }; + + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { + iov_copy[0].iov_base = &hdr; + iov_copy[0].iov_len = s->host_vnet_hdr_len; + memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); + iovp = iov_copy; + iovcnt++; + } + + return tap_write_packet(s, iovp, iovcnt); +} + +static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + struct iovec iov[2]; + int iovcnt = 0; + struct virtio_net_hdr_mrg_rxbuf hdr = { }; + + if (s->host_vnet_hdr_len) { + iov[iovcnt].iov_base = &hdr; + iov[iovcnt].iov_len = s->host_vnet_hdr_len; + iovcnt++; + } + + iov[iovcnt].iov_base = (char *)buf; + iov[iovcnt].iov_len = size; + iovcnt++; + + return tap_write_packet(s, iov, iovcnt); +} + +static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + struct iovec iov[1]; + + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { + return tap_receive_raw(nc, buf, size); + } + + iov[0].iov_base = (char *)buf; + iov[0].iov_len = size; + + return tap_write_packet(s, iov, 1); +} + +#ifndef __sun__ +ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) +{ + return read(tapfd, buf, maxlen); +} +#endif + +static void tap_send_completed(NetClientState *nc, ssize_t len) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + tap_read_poll(s, true); +} + +static void tap_send(void *opaque) +{ + TAPState *s = opaque; + int size; + int packets = 0; + + while (true) { + uint8_t *buf = s->buf; + + size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); + if (size <= 0) { + break; + } + + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { + buf += s->host_vnet_hdr_len; + size -= s->host_vnet_hdr_len; + } + + size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); + if (size == 0) { + tap_read_poll(s, false); + break; + } else if (size < 0) { + break; + } + + /* + * When the host keeps receiving more packets while tap_send() is + * running we can hog the QEMU global mutex. Limit the number of + * packets that are processed per tap_send() callback to prevent + * stalling the guest. + */ + packets++; + if (packets >= 50) { + break; + } + } +} + +static bool tap_has_ufo(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + + return s->has_ufo; +} + +static bool tap_has_vnet_hdr(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + + return !!s->host_vnet_hdr_len; +} + +static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + + return !!tap_probe_vnet_hdr_len(s->fd, len); +} + +static void tap_set_vnet_hdr_len(NetClientState *nc, int len) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || + len == sizeof(struct virtio_net_hdr)); + + tap_fd_set_vnet_hdr_len(s->fd, len); + s->host_vnet_hdr_len = len; +} + +static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(!!s->host_vnet_hdr_len == using_vnet_hdr); + + s->using_vnet_hdr = using_vnet_hdr; +} + +static int tap_set_vnet_le(NetClientState *nc, bool is_le) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + return tap_fd_set_vnet_le(s->fd, is_le); +} + +static int tap_set_vnet_be(NetClientState *nc, bool is_be) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + return tap_fd_set_vnet_be(s->fd, is_be); +} + +static void tap_set_offload(NetClientState *nc, int csum, int tso4, + int tso6, int ecn, int ufo) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + if (s->fd < 0) { + return; + } + + tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); +} + +static void tap_cleanup(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + Error *err = NULL; + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } + + qemu_purge_queued_packets(nc); + + if (s->down_script[0]) { + launch_script(s->down_script, s->down_script_arg, s->fd, &err); + if (err) { + error_report_err(err); + } + } + + tap_read_poll(s, false); + tap_write_poll(s, false); + close(s->fd); + s->fd = -1; +} + +static void tap_poll(NetClientState *nc, bool enable) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + tap_read_poll(s, enable); + tap_write_poll(s, enable); +} + +int tap_get_fd(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + return s->fd; +} + +/* fd support */ + +static NetClientInfo net_tap_info = { + .type = NET_CLIENT_OPTIONS_KIND_TAP, + .size = sizeof(TAPState), + .receive = tap_receive, + .receive_raw = tap_receive_raw, + .receive_iov = tap_receive_iov, + .poll = tap_poll, + .cleanup = tap_cleanup, + .has_ufo = tap_has_ufo, + .has_vnet_hdr = tap_has_vnet_hdr, + .has_vnet_hdr_len = tap_has_vnet_hdr_len, + .using_vnet_hdr = tap_using_vnet_hdr, + .set_offload = tap_set_offload, + .set_vnet_hdr_len = tap_set_vnet_hdr_len, + .set_vnet_le = tap_set_vnet_le, + .set_vnet_be = tap_set_vnet_be, +}; + +static TAPState *net_tap_fd_init(NetClientState *peer, + const char *model, + const char *name, + int fd, + int vnet_hdr) +{ + NetClientState *nc; + TAPState *s; + + nc = qemu_new_net_client(&net_tap_info, peer, model, name); + + s = DO_UPCAST(TAPState, nc, nc); + + s->fd = fd; + s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; + s->using_vnet_hdr = false; + s->has_ufo = tap_probe_has_ufo(s->fd); + s->enabled = true; + tap_set_offload(&s->nc, 0, 0, 0, 0, 0); + /* + * Make sure host header length is set correctly in tap: + * it might have been modified by another instance of qemu. + */ + if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { + tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); + } + tap_read_poll(s, true); + s->vhost_net = NULL; + return s; +} + +static void launch_script(const char *setup_script, const char *ifname, + int fd, Error **errp) +{ + int pid, status; + char *args[3]; + char **parg; + + /* try to launch network script */ + pid = fork(); + if (pid < 0) { + error_setg_errno(errp, errno, "could not launch network script %s", + setup_script); + return; + } + if (pid == 0) { + int open_max = sysconf(_SC_OPEN_MAX), i; + + for (i = 3; i < open_max; i++) { + if (i != fd) { + close(i); + } + } + parg = args; + *parg++ = (char *)setup_script; + *parg++ = (char *)ifname; + *parg = NULL; + execv(setup_script, args); + _exit(1); + } else { + while (waitpid(pid, &status, 0) != pid) { + /* loop */ + } + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + return; + } + error_setg(errp, "network script %s failed with status %d", + setup_script, status); + } +} + +static int recv_fd(int c) +{ + int fd; + uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; + struct msghdr msg = { + .msg_control = msgbuf, + .msg_controllen = sizeof(msgbuf), + }; + struct cmsghdr *cmsg; + struct iovec iov; + uint8_t req[1]; + ssize_t len; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + msg.msg_controllen = cmsg->cmsg_len; + + iov.iov_base = req; + iov.iov_len = sizeof(req); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + len = recvmsg(c, &msg, 0); + if (len > 0) { + memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + return fd; + } + + return len; +} + +static int net_bridge_run_helper(const char *helper, const char *bridge, + Error **errp) +{ + sigset_t oldmask, mask; + int pid, status; + char *args[5]; + char **parg; + int sv[2]; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + sigprocmask(SIG_BLOCK, &mask, &oldmask); + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_setg_errno(errp, errno, "socketpair() failed"); + return -1; + } + + /* try to launch bridge helper */ + pid = fork(); + if (pid < 0) { + error_setg_errno(errp, errno, "Can't fork bridge helper"); + return -1; + } + if (pid == 0) { + int open_max = sysconf(_SC_OPEN_MAX), i; + char fd_buf[6+10]; + char br_buf[6+IFNAMSIZ] = {0}; + char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15]; + + for (i = 3; i < open_max; i++) { + if (i != sv[1]) { + close(i); + } + } + + snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]); + + if (strrchr(helper, ' ') || strrchr(helper, '\t')) { + /* assume helper is a command */ + + if (strstr(helper, "--br=") == NULL) { + snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); + } + + snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s", + helper, "--use-vnet", fd_buf, br_buf); + + parg = args; + *parg++ = (char *)"sh"; + *parg++ = (char *)"-c"; + *parg++ = helper_cmd; + *parg++ = NULL; + + execv("/bin/sh", args); + } else { + /* assume helper is just the executable path name */ + + snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); + + parg = args; + *parg++ = (char *)helper; + *parg++ = (char *)"--use-vnet"; + *parg++ = fd_buf; + *parg++ = br_buf; + *parg++ = NULL; + + execv(helper, args); + } + _exit(1); + + } else { + int fd; + int saved_errno; + + close(sv[1]); + + do { + fd = recv_fd(sv[0]); + } while (fd == -1 && errno == EINTR); + saved_errno = errno; + + close(sv[0]); + + while (waitpid(pid, &status, 0) != pid) { + /* loop */ + } + sigprocmask(SIG_SETMASK, &oldmask, NULL); + if (fd < 0) { + error_setg_errno(errp, saved_errno, + "failed to recv file descriptor"); + return -1; + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + error_setg(errp, "bridge helper failed"); + return -1; + } + return fd; + } +} + +int net_init_bridge(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + const NetdevBridgeOptions *bridge; + const char *helper, *br; + TAPState *s; + int fd, vnet_hdr; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); + bridge = opts->u.bridge; + + helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; + br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; + + fd = net_bridge_run_helper(helper, br, errp); + if (fd == -1) { + return -1; + } + + fcntl(fd, F_SETFL, O_NONBLOCK); + vnet_hdr = tap_probe_vnet_hdr(fd); + s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, + br); + + return 0; +} + +static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, + const char *setup_script, char *ifname, + size_t ifname_sz, int mq_required, Error **errp) +{ + Error *err = NULL; + int fd, vnet_hdr_required; + + if (tap->has_vnet_hdr) { + *vnet_hdr = tap->vnet_hdr; + vnet_hdr_required = *vnet_hdr; + } else { + *vnet_hdr = 1; + vnet_hdr_required = 0; + } + + TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, + mq_required, errp)); + if (fd < 0) { + return -1; + } + + if (setup_script && + setup_script[0] != '\0' && + strcmp(setup_script, "no") != 0) { + launch_script(setup_script, ifname, fd, &err); + if (err) { + error_propagate(errp, err); + close(fd); + return -1; + } + } + + return fd; +} + +#define MAX_TAP_QUEUES 1024 + +static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, + const char *model, const char *name, + const char *ifname, const char *script, + const char *downscript, const char *vhostfdname, + int vnet_hdr, int fd, Error **errp) +{ + Error *err = NULL; + TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); + int vhostfd; + + tap_set_sndbuf(s->fd, tap, &err); + if (err) { + error_propagate(errp, err); + return; + } + + if (tap->has_fd || tap->has_fds) { + snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); + } else if (tap->has_helper) { + snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", + tap->helper); + } else { + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "ifname=%s,script=%s,downscript=%s", ifname, script, + downscript); + + if (strcmp(downscript, "no") != 0) { + snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); + snprintf(s->down_script_arg, sizeof(s->down_script_arg), + "%s", ifname); + } + } + + if (tap->has_vhost ? tap->vhost : + vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { + VhostNetOptions options; + + options.backend_type = VHOST_BACKEND_TYPE_KERNEL; + options.net_backend = &s->nc; + + if (tap->has_vhostfd || tap->has_vhostfds) { + vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); + if (vhostfd == -1) { + error_propagate(errp, err); + return; + } + } else { + vhostfd = open("/dev/vhost-net", O_RDWR); + if (vhostfd < 0) { + error_setg_errno(errp, errno, + "tap: open vhost char device failed"); + return; + } + } + options.opaque = (void *)(uintptr_t)vhostfd; + + s->vhost_net = vhost_net_init(&options); + if (!s->vhost_net) { + error_setg(errp, + "vhost-net requested but could not be initialized"); + return; + } + } else if (tap->has_vhostfd || tap->has_vhostfds) { + error_setg(errp, "vhostfd= is not valid without vhost"); + } +} + +static int get_fds(char *str, char *fds[], int max) +{ + char *ptr = str, *this; + size_t len = strlen(str); + int i = 0; + + while (i < max && ptr < str + len) { + this = strchr(ptr, ':'); + + if (this == NULL) { + fds[i] = g_strdup(ptr); + } else { + fds[i] = g_strndup(ptr, this - ptr); + } + + i++; + if (this == NULL) { + break; + } else { + ptr = this + 1; + } + } + + return i; +} + +int net_init_tap(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + const NetdevTapOptions *tap; + int fd, vnet_hdr = 0, i = 0, queues; + /* for the no-fd, no-helper case */ + const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ + const char *downscript = NULL; + Error *err = NULL; + const char *vhostfdname; + char ifname[128]; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; + queues = tap->has_queues ? tap->queues : 1; + vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; + + /* QEMU vlans does not support multiqueue tap, in this case peer is set. + * For -netdev, peer is always NULL. */ + if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) { + error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans"); + return -1; + } + + if (tap->has_fd) { + if (tap->has_ifname || tap->has_script || tap->has_downscript || + tap->has_vnet_hdr || tap->has_helper || tap->has_queues || + tap->has_fds || tap->has_vhostfds) { + error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " + "helper=, queues=, fds=, and vhostfds= " + "are invalid with fd="); + return -1; + } + + fd = monitor_fd_param(cur_mon, tap->fd, &err); + if (fd == -1) { + error_propagate(errp, err); + return -1; + } + + fcntl(fd, F_SETFL, O_NONBLOCK); + + vnet_hdr = tap_probe_vnet_hdr(fd); + + net_init_tap_one(tap, peer, "tap", name, NULL, + script, downscript, + vhostfdname, vnet_hdr, fd, &err); + if (err) { + error_propagate(errp, err); + return -1; + } + } else if (tap->has_fds) { + char *fds[MAX_TAP_QUEUES]; + char *vhost_fds[MAX_TAP_QUEUES]; + int nfds, nvhosts; + + if (tap->has_ifname || tap->has_script || tap->has_downscript || + tap->has_vnet_hdr || tap->has_helper || tap->has_queues || + tap->has_vhostfd) { + error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " + "helper=, queues=, and vhostfd= " + "are invalid with fds="); + return -1; + } + + nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); + if (tap->has_vhostfds) { + nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); + if (nfds != nvhosts) { + error_setg(errp, "The number of fds passed does not match " + "the number of vhostfds passed"); + return -1; + } + } + + for (i = 0; i < nfds; i++) { + fd = monitor_fd_param(cur_mon, fds[i], &err); + if (fd == -1) { + error_propagate(errp, err); + return -1; + } + + fcntl(fd, F_SETFL, O_NONBLOCK); + + if (i == 0) { + vnet_hdr = tap_probe_vnet_hdr(fd); + } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { + error_setg(errp, + "vnet_hdr not consistent across given tap fds"); + return -1; + } + + net_init_tap_one(tap, peer, "tap", name, ifname, + script, downscript, + tap->has_vhostfds ? vhost_fds[i] : NULL, + vnet_hdr, fd, &err); + if (err) { + error_propagate(errp, err); + return -1; + } + } + } else if (tap->has_helper) { + if (tap->has_ifname || tap->has_script || tap->has_downscript || + tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { + error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " + "queues=, and vhostfds= are invalid with helper="); + return -1; + } + + fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE, + errp); + if (fd == -1) { + return -1; + } + + fcntl(fd, F_SETFL, O_NONBLOCK); + vnet_hdr = tap_probe_vnet_hdr(fd); + + net_init_tap_one(tap, peer, "bridge", name, ifname, + script, downscript, vhostfdname, + vnet_hdr, fd, &err); + if (err) { + error_propagate(errp, err); + close(fd); + return -1; + } + } else { + if (tap->has_vhostfds) { + error_setg(errp, "vhostfds= is invalid if fds= wasn't specified"); + return -1; + } + script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; + downscript = tap->has_downscript ? tap->downscript : + DEFAULT_NETWORK_DOWN_SCRIPT; + + if (tap->has_ifname) { + pstrcpy(ifname, sizeof ifname, tap->ifname); + } else { + ifname[0] = '\0'; + } + + for (i = 0; i < queues; i++) { + fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, + ifname, sizeof ifname, queues > 1, errp); + if (fd == -1) { + return -1; + } + + if (queues > 1 && i == 0 && !tap->has_ifname) { + if (tap_fd_get_ifname(fd, ifname)) { + error_setg(errp, "Fail to get ifname"); + close(fd); + return -1; + } + } + + net_init_tap_one(tap, peer, "tap", name, ifname, + i >= 1 ? "no" : script, + i >= 1 ? "no" : downscript, + vhostfdname, vnet_hdr, fd, &err); + if (err) { + error_propagate(errp, err); + close(fd); + return -1; + } + } + } + + return 0; +} + +VHostNetState *tap_get_vhost_net(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + return s->vhost_net; +} + +int tap_enable(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + int ret; + + if (s->enabled) { + return 0; + } else { + ret = tap_fd_enable(s->fd); + if (ret == 0) { + s->enabled = true; + tap_update_fd_handler(s); + } + return ret; + } +} + +int tap_disable(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + int ret; + + if (s->enabled == 0) { + return 0; + } else { + ret = tap_fd_disable(s->fd); + if (ret == 0) { + qemu_purge_queued_packets(nc); + s->enabled = false; + tap_update_fd_handler(s); + } + return ret; + } +} diff --git a/src/net/tap_int.h b/src/net/tap_int.h new file mode 100644 index 0000000..2378021 --- /dev/null +++ b/src/net/tap_int.h @@ -0,0 +1,49 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_TAP_H +#define QEMU_TAP_H + +#include "qemu-common.h" +#include "qapi-types.h" + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp); + +ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen); + +void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); +int tap_probe_vnet_hdr(int fd); +int tap_probe_vnet_hdr_len(int fd, int len); +int tap_probe_has_ufo(int fd); +void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); +void tap_fd_set_vnet_hdr_len(int fd, int len); +int tap_fd_set_vnet_le(int fd, int vnet_is_le); +int tap_fd_set_vnet_be(int fd, int vnet_is_be); +int tap_fd_enable(int fd); +int tap_fd_disable(int fd); +int tap_fd_get_ifname(int fd, char *ifname); + +#endif /* QEMU_TAP_H */ diff --git a/src/net/util.c b/src/net/util.c new file mode 100644 index 0000000..7e95076 --- /dev/null +++ b/src/net/util.c @@ -0,0 +1,60 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "util.h" +#include <errno.h> +#include <stdlib.h> + +int net_parse_macaddr(uint8_t *macaddr, const char *p) +{ + int i; + char *last_char; + long int offset; + + errno = 0; + offset = strtol(p, &last_char, 0); + if (errno == 0 && *last_char == '\0' && + offset >= 0 && offset <= 0xFFFFFF) { + macaddr[3] = (offset & 0xFF0000) >> 16; + macaddr[4] = (offset & 0xFF00) >> 8; + macaddr[5] = offset & 0xFF; + return 0; + } + + for (i = 0; i < 6; i++) { + macaddr[i] = strtol(p, (char **)&p, 16); + if (i == 5) { + if (*p != '\0') { + return -1; + } + } else { + if (*p != ':' && *p != '-') { + return -1; + } + p++; + } + } + + return 0; +} diff --git a/src/net/util.h b/src/net/util.h new file mode 100644 index 0000000..10c7da9 --- /dev/null +++ b/src/net/util.h @@ -0,0 +1,32 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_NET_UTIL_H +#define QEMU_NET_UTIL_H + +#include <stdint.h> + +int net_parse_macaddr(uint8_t *macaddr, const char *p); + +#endif /* QEMU_NET_UTIL_H */ diff --git a/src/net/vde.c b/src/net/vde.c new file mode 100644 index 0000000..4475d92 --- /dev/null +++ b/src/net/vde.c @@ -0,0 +1,128 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "config-host.h" + +#include <libvdeplug.h> + +#include "net/net.h" +#include "clients.h" +#include "qemu-common.h" +#include "qemu/option.h" +#include "qemu/main-loop.h" + +typedef struct VDEState { + NetClientState nc; + VDECONN *vde; +} VDEState; + +static void vde_to_qemu(void *opaque) +{ + VDEState *s = opaque; + uint8_t buf[NET_BUFSIZE]; + int size; + + size = vde_recv(s->vde, (char *)buf, sizeof(buf), 0); + if (size > 0) { + qemu_send_packet(&s->nc, buf, size); + } +} + +static ssize_t vde_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + VDEState *s = DO_UPCAST(VDEState, nc, nc); + ssize_t ret; + + do { + ret = vde_send(s->vde, (const char *)buf, size, 0); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static void vde_cleanup(NetClientState *nc) +{ + VDEState *s = DO_UPCAST(VDEState, nc, nc); + qemu_set_fd_handler(vde_datafd(s->vde), NULL, NULL, NULL); + vde_close(s->vde); +} + +static NetClientInfo net_vde_info = { + .type = NET_CLIENT_OPTIONS_KIND_VDE, + .size = sizeof(VDEState), + .receive = vde_receive, + .cleanup = vde_cleanup, +}; + +static int net_vde_init(NetClientState *peer, const char *model, + const char *name, const char *sock, + int port, const char *group, int mode) +{ + NetClientState *nc; + VDEState *s; + VDECONN *vde; + char *init_group = (char *)group; + char *init_sock = (char *)sock; + + struct vde_open_args args = { + .port = port, + .group = init_group, + .mode = mode, + }; + + vde = vde_open(init_sock, (char *)"QEMU", &args); + if (!vde){ + return -1; + } + + nc = qemu_new_net_client(&net_vde_info, peer, model, name); + + snprintf(nc->info_str, sizeof(nc->info_str), "sock=%s,fd=%d", + sock, vde_datafd(vde)); + + s = DO_UPCAST(VDEState, nc, nc); + + s->vde = vde; + + qemu_set_fd_handler(vde_datafd(s->vde), vde_to_qemu, NULL, s); + + return 0; +} + +int net_init_vde(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + /* FIXME error_setg(errp, ...) on failure */ + const NetdevVdeOptions *vde; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE); + vde = opts->u.vde; + + /* missing optional values have been initialized to "all bits zero" */ + if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group, + vde->has_mode ? vde->mode : 0700) == -1) { + return -1; + } + + return 0; +} diff --git a/src/net/vhost-user.c b/src/net/vhost-user.c new file mode 100644 index 0000000..b368a90 --- /dev/null +++ b/src/net/vhost-user.c @@ -0,0 +1,326 @@ +/* + * vhost-user.c + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "clients.h" +#include "net/vhost_net.h" +#include "net/vhost-user.h" +#include "sysemu/char.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qmp-commands.h" +#include "trace.h" + +typedef struct VhostUserState { + NetClientState nc; + CharDriverState *chr; + VHostNetState *vhost_net; +} VhostUserState; + +typedef struct VhostUserChardevProps { + bool is_socket; + bool is_unix; + bool is_server; +} VhostUserChardevProps; + +VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + return s->vhost_net; +} + +static int vhost_user_running(VhostUserState *s) +{ + return (s->vhost_net) ? 1 : 0; +} + +static void vhost_user_stop(int queues, NetClientState *ncs[]) +{ + VhostUserState *s; + int i; + + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (!vhost_user_running(s)) { + continue; + } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } + } +} + +static int vhost_user_start(int queues, NetClientState *ncs[]) +{ + VhostNetOptions options; + VhostUserState *s; + int max_queues; + int i; + + options.backend_type = VHOST_BACKEND_TYPE_USER; + + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (vhost_user_running(s)) { + continue; + } + + options.net_backend = ncs[i]; + options.opaque = s->chr; + s->vhost_net = vhost_net_init(&options); + if (!s->vhost_net) { + error_report("failed to init vhost_net for queue %d\n", i); + goto err; + } + + if (i == 0) { + max_queues = vhost_net_get_max_queues(s->vhost_net); + if (queues > max_queues) { + error_report("you are asking more queues than " + "supported: %d\n", max_queues); + goto err; + } + } + } + + return 0; + +err: + vhost_user_stop(i + 1, ncs); + return -1; +} + +static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + /* In case of RARP (message size is 60) notify backup to send a fake RARP. + This fake RARP will be sent by backend only for guest + without GUEST_ANNOUNCE capability. + */ + if (size == 60) { + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + int r; + static int display_rarp_failure = 1; + char mac_addr[6]; + + /* extract guest mac address from the RARP message */ + memcpy(mac_addr, &buf[6], 6); + + r = vhost_net_notify_migration_done(s->vhost_net, mac_addr); + + if ((r != 0) && (display_rarp_failure)) { + fprintf(stderr, + "Vhost user backend fails to broadcast fake RARP\n"); + fflush(stderr); + display_rarp_failure = 0; + } + } + + return size; +} + +static void vhost_user_cleanup(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } + + qemu_purge_queued_packets(nc); +} + +static bool vhost_user_has_vnet_hdr(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static bool vhost_user_has_ufo(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static NetClientInfo net_vhost_user_info = { + .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, + .size = sizeof(VhostUserState), + .receive = vhost_user_receive, + .cleanup = vhost_user_cleanup, + .has_vnet_hdr = vhost_user_has_vnet_hdr, + .has_ufo = vhost_user_has_ufo, +}; + +static void net_vhost_user_event(void *opaque, int event) +{ + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + trace_vhost_user_event(s->chr->label, event); + switch (event) { + case CHR_EVENT_OPENED: + if (vhost_user_start(queues, ncs) < 0) { + exit(1); + } + qmp_set_link(name, true, &err); + break; + case CHR_EVENT_CLOSED: + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); + break; + } + + if (err) { + error_report_err(err); + } +} + +static int net_vhost_user_init(NetClientState *peer, const char *device, + const char *name, CharDriverState *chr, + int queues) +{ + NetClientState *nc; + VhostUserState *s; + int i; + + for (i = 0; i < queues; i++) { + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", + i, chr->label); + + nc->queue_index = i; + + s = DO_UPCAST(VhostUserState, nc, nc); + s->chr = chr; + } + + qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, (void*)name); + + return 0; +} + +static int net_vhost_chardev_opts(void *opaque, + const char *name, const char *value, + Error **errp) +{ + VhostUserChardevProps *props = opaque; + + if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) { + props->is_socket = true; + } else if (strcmp(name, "path") == 0) { + props->is_unix = true; + } else if (strcmp(name, "server") == 0) { + props->is_server = true; + } else { + error_setg(errp, + "vhost-user does not support a chardev with option %s=%s", + name, value); + return -1; + } + return 0; +} + +static CharDriverState *net_vhost_parse_chardev( + const NetdevVhostUserOptions *opts, Error **errp) +{ + CharDriverState *chr = qemu_chr_find(opts->chardev); + VhostUserChardevProps props; + + if (chr == NULL) { + error_setg(errp, "chardev \"%s\" not found", opts->chardev); + return NULL; + } + + /* inspect chardev opts */ + memset(&props, 0, sizeof(props)); + if (qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, errp)) { + return NULL; + } + + if (!props.is_socket || !props.is_unix) { + error_setg(errp, "chardev \"%s\" is not a unix socket", + opts->chardev); + return NULL; + } + + qemu_chr_fe_claim_no_fail(chr); + + return chr; +} + +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) +{ + const char *name = opaque; + const char *driver, *netdev; + const char virtio_name[] = "virtio-net-"; + + driver = qemu_opt_get(opts, "driver"); + netdev = qemu_opt_get(opts, "netdev"); + + if (!driver || !netdev) { + return 0; + } + + if (strcmp(netdev, name) == 0 && + strncmp(driver, virtio_name, strlen(virtio_name)) != 0) { + error_setg(errp, "vhost-user requires frontend driver virtio-net-*"); + return -1; + } + + return 0; +} + +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + int queues; + const NetdevVhostUserOptions *vhost_user_opts; + CharDriverState *chr; + + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->u.vhost_user; + + chr = net_vhost_parse_chardev(vhost_user_opts, errp); + if (!chr) { + return -1; + } + + /* verify net frontend */ + if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net, + (char *)name, errp)) { + return -1; + } + + queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1; + if (queues < 1) { + error_setg(errp, + "vhost-user number of queues must be bigger than zero"); + return -1; + } + + return net_vhost_user_init(peer, "vhost_user", name, chr, queues); +} |