16 #include <sys/socket.h> 34 #define VHOST_USER_DEBUG_SOCKET 0 36 #if VHOST_USER_DEBUG_SOCKET == 1 37 #define DBG_SOCK(args...) clib_warning(args); 39 #define DBG_SOCK(args...) 45 static const char *vhost_message_str[] __attribute__ ((unused)) =
69 static int dpdk_vhost_user_set_vring_enable (
u32 hw_if_index,
111 qva_to_vva (
struct virtio_net *dev,
uword qemu_va)
113 struct virtio_memory_regions *region;
118 for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++)
120 region = &dev->mem->regions[regionidx];
121 if ((qemu_va >= region->userspace_address) &&
122 (qemu_va <= region->userspace_address + region->memory_size))
124 vhost_va = qemu_va + region->guest_phys_address +
125 region->address_offset - region->userspace_address;
133 dpdk_vhost_user_device_from_hw_if_index (
u32 hw_if_index)
147 dpdk_vhost_user_device_from_sw_if_index (
u32 sw_if_index)
153 return dpdk_vhost_user_device_from_hw_if_index (sw->
hw_if_index);
157 stop_processing_packets (
u32 hw_if_index,
u8 idx)
159 dpdk_device_t *xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
161 xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
168 int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
169 for (idx = 0; idx < numqs; idx++)
170 xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
172 xd->vu_is_running = 0;
178 dpdk_vu_intf_t *vui = xd->vu_intf;
179 struct virtio_memory *mem = xd->vu_vhost_dev.mem;
181 for (i = 0; i < mem->nregions; i++)
183 if ((mem->regions[i].guest_phys_address <= addr) &&
184 ((mem->regions[i].guest_phys_address +
185 mem->regions[i].memory_size) > addr))
187 return (
void *) ((
uword) vui->region_addr[i] + addr -
188 (
uword) mem->regions[
i].guest_phys_address);
191 DBG_SOCK (
"failed to map guest mem addr %lx", addr);
196 dpdk_create_vhost_user_if_internal (
u32 * hw_if_index,
u32 if_id,
u8 * hwaddr)
205 dpdk_vu_intf_t *vui =
NULL;
217 if (inactive_cnt > 0)
227 (
"reusing inactive vhost-user interface sw_if_index %d",
233 (
"error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!",
246 if (if_id != (
u32) ~ 0)
247 xd->vu_if_id = if_id;
251 for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
253 memset (xd->vu_vhost_dev.virtqueue[j], 0,
254 sizeof (
struct vhost_virtqueue));
255 xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
256 xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
257 xd->vu_vhost_dev.virtqueue[j]->backend = -1;
258 vui->vrings[j].packets = 0;
259 vui->vrings[j].bytes = 0;
289 xd->vu_vhost_dev.virt_qp_nb = num_qpairs;
294 if (if_id == (
u32) ~ 0)
297 xd->vu_if_id = if_id;
303 xd->vu_vhost_dev.mem =
clib_mem_alloc (
sizeof (
struct virtio_memory) +
306 virtio_memory_regions));
309 xd->vu_vhost_dev.mem->nregions = 0;
316 for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
318 xd->vu_vhost_dev.virtqueue[j] =
320 memset (xd->vu_vhost_dev.virtqueue[j], 0,
321 sizeof (
struct vhost_virtqueue));
322 xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
323 xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
324 xd->vu_vhost_dev.virtqueue[j]->backend = -1;
325 vui->vrings[j].packets = 0;
326 vui->vrings[j].bytes = 0;
332 (
"tm->n_vlib_mains: %d. TX %d, RX: %d, num_qpairs: %d, Lock: %p",
366 rnd = (
u32) (now * 1e6);
387 DBG_SOCK (
"xd->device_index: %d, dm->input_cpu_count: %d, " 392 for (q = 0; q < num_qpairs; q++)
409 VLIB_NODE_STATE_POLLING);
413 VLIB_NODE_STATE_POLLING);
421 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 432 dpdk_vhost_user_set_protocol_features (
u32 hw_if_index,
u64 prot_features)
435 xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
437 xd->vu_vhost_dev.protocol_features = prot_features;
442 dpdk_vhost_user_get_features (
u32 hw_if_index,
u64 * features)
444 *features = rte_vhost_feature_get ();
446 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 447 #define OFFLOAD_FEATURES ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 448 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 449 (1ULL << VIRTIO_NET_F_CSUM) | \ 450 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 451 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 452 (1ULL << VIRTIO_NET_F_GUEST_TSO6)) 458 *features &= (~OFFLOAD_FEATURES);
461 DBG_SOCK (
"supported features: 0x%lx", *features);
466 dpdk_vhost_user_set_features (
u32 hw_if_index,
u64 features)
469 u16 hdr_len =
sizeof (
struct virtio_net_hdr);
472 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
478 xd->vu_vhost_dev.features = features;
480 if (xd->vu_vhost_dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))
481 hdr_len =
sizeof (
struct virtio_net_hdr_mrg_rxbuf);
483 int numqs = VIRTIO_QNUM;
486 numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
487 for (idx = 0; idx < numqs; idx++)
489 xd->vu_vhost_dev.virtqueue[idx]->vhost_hlen = hdr_len;
497 dpdk_vhost_user_set_vring_enable (hw_if_index, idx, 1);
507 struct virtio_memory *mem;
512 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
519 mem = xd->vu_vhost_dev.mem;
523 for (i = 0; i < mem->nregions; i++)
525 u64 mapped_size, mapped_address;
528 mem->regions[
i].guest_phys_address_end =
536 (
NULL, mapped_size, PROT_READ | PROT_WRITE,
537 MAP_SHARED, fd[i], 0));
546 vui->region_addr[
i] = mapped_address;
547 vui->region_fd[
i] = fd[
i];
549 mem->regions[
i].address_offset =
550 mapped_address - mem->regions[
i].guest_phys_address;
552 DBG_SOCK (
"map memory region %d addr 0x%lx off 0x%lx len 0x%lx",
553 i, vui->region_addr[i], vui->region_offset[i], mapped_size);
558 mem->mapped_address = mem->regions[
i].address_offset;
562 disable_interface (xd);
567 dpdk_vhost_user_set_vring_num (
u32 hw_if_index,
u8 idx,
u32 num)
570 struct vhost_virtqueue *vq;
572 DBG_SOCK (
"idx %u num %u", idx, num);
574 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
579 vq = xd->vu_vhost_dev.virtqueue[idx];
582 stop_processing_packets (hw_if_index, idx);
588 dpdk_vhost_user_set_vring_addr (
u32 hw_if_index,
u8 idx,
uword desc,
592 struct vhost_virtqueue *vq;
594 DBG_SOCK (
"idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx",
595 idx, desc, used, avail, log);
597 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
602 vq = xd->vu_vhost_dev.virtqueue[idx];
604 vq->desc = (
struct vring_desc *) qva_to_vva (&xd->vu_vhost_dev, desc);
605 vq->used = (
struct vring_used *) qva_to_vva (&xd->vu_vhost_dev, used);
606 vq->avail = (
struct vring_avail *) qva_to_vva (&xd->vu_vhost_dev, avail);
607 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 608 vq->log_guest_addr =
log;
611 if (!(vq->desc && vq->used && vq->avail))
616 if (vq->last_used_idx != vq->used->idx)
618 clib_warning (
"last_used_idx (%u) and vq->used->idx (%u) mismatches; " 619 "some packets maybe resent for Tx and dropped for Rx",
620 vq->last_used_idx, vq->used->idx);
621 vq->last_used_idx = vq->used->idx;
622 vq->last_used_idx_res = vq->used->idx;
633 rte_vhost_enable_guest_notification (&xd->vu_vhost_dev, idx, 0);
634 stop_processing_packets (hw_if_index, idx);
640 dpdk_vhost_user_get_vring_base (
u32 hw_if_index,
u8 idx,
u32 * num)
643 struct vhost_virtqueue *vq;
645 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
651 vq = xd->vu_vhost_dev.virtqueue[idx];
652 *num = vq->last_used_idx;
661 DBG_SOCK (
"Stopping vring Q %u of device %d", idx, hw_if_index);
662 dpdk_vu_intf_t *vui = xd->vu_intf;
665 if (vui->vrings[idx].callfd > 0)
668 vui->vrings[idx].callfd_idx);
672 vui->vrings[idx].enabled = 0;
673 vui->vrings[idx].callfd = -1;
678 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 679 vq->log_guest_addr = 0;
683 int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
684 for (idx = 0; idx < numqs; idx++)
686 if (xd->vu_vhost_dev.virtqueue[idx]->enabled)
693 DBG_SOCK (
"Device %d disabled", hw_if_index);
694 xd->vu_is_running = 0;
701 dpdk_vhost_user_set_vring_base (
u32 hw_if_index,
u8 idx,
u32 num)
704 struct vhost_virtqueue *vq;
706 DBG_SOCK (
"idx %u num %u", idx, num);
708 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
714 vq = xd->vu_vhost_dev.virtqueue[idx];
715 vq->last_used_idx = num;
716 vq->last_used_idx_res = num;
718 stop_processing_packets (hw_if_index, idx);
724 dpdk_vhost_user_set_vring_kick (
u32 hw_if_index,
u8 idx,
int fd)
728 dpdk_vu_vring *vring;
729 struct vhost_virtqueue *vq0, *vq1, *vq;
730 int index, vu_is_running = 0;
732 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
738 vq = xd->vu_vhost_dev.virtqueue[idx];
741 vring = &xd->vu_intf->vrings[idx];
742 vq->enabled = (vq->desc && vq->avail && vq->used && vring->enabled) ? 1 : 0;
748 int numqs = VIRTIO_QNUM;
749 numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
751 for (index = 0; index < numqs; index += 2)
753 vq0 = xd->vu_vhost_dev.virtqueue[index];
754 vq1 = xd->vu_vhost_dev.virtqueue[index + 1];
755 if (vq0->enabled && vq1->enabled)
761 DBG_SOCK (
"SET_VRING_KICK - idx %d, running %d, fd: %d",
762 idx, vu_is_running, fd);
764 xd->vu_is_running = vu_is_running;
765 if (xd->vu_is_running && xd->admin_up)
770 ETH_LINK_FULL_DUPLEX);
777 dpdk_vhost_user_set_vring_enable (
u32 hw_if_index,
u8 idx,
int enable)
780 struct vhost_virtqueue *vq;
783 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
797 vui->vrings[idx].enabled = enable;
799 int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
802 if (!vui->vrings[numqs].enabled)
811 vq = xd->vu_vhost_dev.virtqueue[idx];
812 if (vq->desc && vq->avail && vq->used)
813 xd->vu_vhost_dev.virtqueue[idx]->enabled = enable;
819 dpdk_vhost_user_callfd_read_ready (
unix_file_t * uf)
821 __attribute__ ((unused))
int n;
828 dpdk_vhost_user_set_vring_call (
u32 hw_if_index,
u8 idx,
int fd)
831 struct vhost_virtqueue *vq;
834 DBG_SOCK (
"SET_VRING_CALL - idx %d, fd %d", idx, fd);
836 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
842 dpdk_vu_intf_t *vui = xd->vu_intf;
845 if (vui->vrings[idx].callfd > -1)
848 vui->vrings[idx].callfd_idx);
851 vui->vrings[idx].callfd = fd;
852 template.read_function = dpdk_vhost_user_callfd_read_ready;
853 template.file_descriptor = fd;
856 vq = xd->vu_vhost_dev.virtqueue[idx];
865 dpdk_vu_intf_t *vui = xd->vu_intf;
871 dpdk_vu_vring *vring = &(vui->vrings[idx]);
872 struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
875 return (vring->callfd > -1)
876 && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
883 dpdk_vu_intf_t *vui = xd->vu_intf;
889 dpdk_vu_vring *vring = &(vui->vrings[idx]);
890 struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
893 if ((vring->callfd > -1)
894 && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
896 eventfd_write (vring->callfd, (eventfd_t) 1);
897 vring->n_since_last_int = 0;
898 vring->int_deadline =
911 const char *sock_filename,
912 u8 is_server,
u64 feature_mask,
u32 * sw_if_index)
915 dpdk_vu_intf_t *vui = xd->vu_intf;
916 memset (vui, 0,
sizeof (*vui));
918 vui->unix_fd = sockfd;
919 vui->num_vrings = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
920 DBG_SOCK (
"dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings);
921 vui->sock_is_server = is_server;
922 strncpy (vui->sock_filename, sock_filename,
926 vui->feature_mask = feature_mask;
928 vui->unix_file_index = ~0;
930 for (q = 0; q < vui->num_vrings; q++)
932 vui->vrings[q].enabled = 0;
933 vui->vrings[q].callfd = -1;
934 vui->vrings[q].kickfd = -1;
948 dpdk_vu_intf_t *vui = xd->vu_intf;
958 dpdk_vu_intf_t *vui = xd->vu_intf;
959 struct virtio_memory *mem = xd->vu_vhost_dev.mem;
961 for (i = 0; i < mem->nregions; i++)
963 if (vui->region_addr[i] != -1)
968 ssize_t map_sz = RTE_ALIGN_CEIL (mem->regions[i].memory_size +
969 vui->region_offset[i], page_sz);
972 munmap ((
void *) (vui->region_addr[i] - vui->region_offset[i]),
976 (
"unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x",
977 i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz);
979 vui->region_addr[
i] = -1;
985 close (vui->region_fd[i]);
994 dpdk_vu_intf_t *vui = xd->vu_intf;
997 struct vhost_virtqueue *vq;
1003 if (vui->unix_file_index != ~0)
1006 vui->unix_file_index = ~0;
1011 close (vui->unix_fd);
1015 for (q = 0; q < vui->num_vrings; q++)
1017 vq = xd->vu_vhost_dev.virtqueue[q];
1018 if (vui->vrings[q].callfd > -1)
1021 vui->vrings[q].callfd_idx);
1025 if (vui->vrings[q].kickfd > -1)
1027 close (vui->vrings[q].kickfd);
1028 vui->vrings[q].kickfd = -1;
1031 vui->vrings[q].enabled = 0;
1032 vui->vrings[q].callfd = -1;
1034 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 1035 vq->log_guest_addr = 0;
1041 xd->vu_is_running = 0;
1043 dpdk_unmap_all_mem_regions (xd);
1051 int fd, number_of_fds = 0;
1053 vhost_user_msg_t msg;
1055 struct iovec iov[1];
1058 dpdk_vu_intf_t *vui;
1059 struct cmsghdr *cmsg;
1071 xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1078 memset (&mh, 0,
sizeof (mh));
1079 memset (control, 0,
sizeof (control));
1082 iov[0].iov_base = (
void *) &msg;
1087 mh.msg_control = control;
1088 mh.msg_controllen =
sizeof (control);
1095 if (mh.msg_flags & MSG_CTRUNC)
1100 cmsg = CMSG_FIRSTHDR (&mh);
1102 if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
1103 (cmsg->cmsg_type == SCM_RIGHTS) &&
1104 (cmsg->cmsg_len - CMSG_LEN (0) <=
1107 number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) /
sizeof (
int);
1108 clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds *
sizeof (
int));
1112 if ((msg.flags & 7) != 1)
1114 DBG_SOCK (
"malformed message received. closing socket");
1119 int rv __attribute__ ((unused));
1124 DBG_SOCK (
"VPP VHOST message %s", vhost_message_str[msg.request]);
1125 switch (msg.request)
1133 msg.u64 &= vui->feature_mask;
1134 msg.size =
sizeof (msg.u64);
1138 DBG_SOCK (
"if %d msg VHOST_USER_SET_FEATURES features 0x%016lx",
1145 DBG_SOCK (
"if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
1148 if ((msg.memory.nregions < 1) ||
1152 DBG_SOCK (
"number of mem regions must be between 1 and %i",
1158 if (msg.memory.nregions != number_of_fds)
1160 DBG_SOCK (
"each memory region must have FD");
1165 dpdk_unmap_all_mem_regions (xd);
1171 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
1174 if ((msg.state.num > 32768) ||
1175 (msg.state.num == 0) ||
1176 (msg.state.num % 2))
1184 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
1188 msg.addr.desc_user_addr,
1189 msg.addr.used_user_addr,
1190 msg.addr.avail_user_addr,
1191 msg.addr.log_guest_addr);
1203 q = (
u8) (msg.u64 & 0xFF);
1205 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_CALL u64 %lx, idx: %d",
1208 if (!(msg.u64 & 0x100))
1210 if (number_of_fds != 1)
1224 q = (
u8) (msg.u64 & 0xFF);
1226 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_KICK u64 %lx, idx: %d",
1229 if (!(msg.u64 & 0x100))
1231 if (number_of_fds != 1)
1234 if (vui->vrings[q].kickfd > -1)
1235 close (vui->vrings[q].kickfd);
1237 vui->vrings[q].kickfd = fds[0];
1240 vui->vrings[q].kickfd = -1;
1243 vui->vrings[q].kickfd);
1248 q = (
u8) (msg.u64 & 0xFF);
1250 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_ERR u64 %lx, idx: %d",
1253 if (!(msg.u64 & 0x100))
1255 if (number_of_fds != 1)
1263 vui->vrings[q].errfd = fd;
1267 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1275 DBG_SOCK (
"if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1279 msg.size =
sizeof (msg.state);
1290 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 1293 if (msg.size != sizeof (msg.log))
1296 (
"invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu",
1297 msg.size, sizeof (msg.log));
1302 (xd->vu_vhost_dev.protocol_features & (1 <<
1306 (
"VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1314 RTE_ALIGN_CEIL (msg.log.size + msg.log.offset, page_sz);
1316 void *addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1319 DBG_SOCK (
"map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p",
1320 map_sz, msg.log.offset, fd, addr);
1322 if (addr == MAP_FAILED)
1324 clib_warning (
"failed to map memory. errno is %d", errno);
1329 xd->vu_vhost_dev.log_size = msg.log.size;
1331 msg.size =
sizeof (msg.u64);
1333 DBG_SOCK (
"if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented",
1343 DBG_SOCK (
"if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
1348 DBG_SOCK (
"VHOST_USER_PROTOCOL_FEATURES: %llx",
1350 msg.size =
sizeof (msg.u64);
1354 DBG_SOCK (
"if %d msg VHOST_USER_SET_PROTOCOL_FEATURES",
1357 DBG_SOCK (
"VHOST_USER_SET_PROTOCOL_FEATURES: 0x%lx", msg.u64);
1362 DBG_SOCK (
"%d VPP VHOST_USER_SET_VRING_ENABLE IDX: %d, Enable: %d",
1364 dpdk_vhost_user_set_vring_enable
1372 msg.u64 = xd->vu_vhost_dev.virt_qp_nb;
1373 msg.size =
sizeof (msg.u64);
1377 DBG_SOCK (
"unknown vhost-user message %d received. closing socket",
1409 dpdk_vhost_user_if_disconnect (xd);
1427 xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1429 dpdk_vhost_user_if_disconnect (xd);
1434 dpdk_vhost_user_socksvr_accept_ready (
unix_file_t * uf)
1436 int client_fd, client_len;
1437 struct sockaddr_un client;
1441 dpdk_vu_intf_t *vui;
1451 xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1455 client_len =
sizeof (client);
1457 (
struct sockaddr *) &client,
1458 (socklen_t *) & client_len);
1463 template.read_function = dpdk_vhost_user_socket_read;
1464 template.error_function = dpdk_vhost_user_socket_error;
1465 template.file_descriptor = client_fd;
1468 vui->client_fd = client_fd;
1477 dpdk_vhost_user_init_server_sock (
const char *sock_filename,
int *sockfd)
1480 struct sockaddr_un un = { };
1483 fd = socket (AF_UNIX, SOCK_STREAM, 0);
1487 return VNET_API_ERROR_SYSCALL_ERROR_1;
1490 un.sun_family = AF_UNIX;
1491 strcpy ((
char *) un.sun_path, (
char *) sock_filename);
1494 unlink ((
char *) sock_filename);
1496 if (bind (fd, (
struct sockaddr *) &un,
sizeof (un)) == -1)
1498 rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1502 if (listen (fd, 1) == -1)
1504 rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1509 template.
read_function = dpdk_vhost_user_socksvr_accept_ready;
1510 template.file_descriptor = fd;
1526 const char *sock_filename,
1530 u8 renumber,
u32 custom_dev_instance,
u8 * hwaddr)
1542 sw_if_index, feature_mask, renumber,
1543 custom_dev_instance, hwaddr);
1549 dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1561 dpdk_create_vhost_user_if_internal (&hw_if_idx, custom_dev_instance,
1565 dpdk_create_vhost_user_if_internal (&hw_if_idx, (
u32) ~ 0, hwaddr);
1566 DBG_SOCK (
"dpdk vhost-user interface created hw_if_index %d", hw_if_idx);
1568 xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_idx);
1571 dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1572 feature_mask, sw_if_index);
1574 dpdk_vhost_user_vui_register (vm, xd);
1580 const char *sock_filename,
1584 u8 renumber,
u32 custom_dev_instance)
1588 dpdk_vu_intf_t *vui =
NULL;
1597 sw_if_index, feature_mask, renumber,
1598 custom_dev_instance);
1601 xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1604 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1611 dpdk_vhost_user_if_disconnect (xd);
1616 dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1622 dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1623 feature_mask, &sw_if_idx);
1630 dpdk_vhost_user_vui_register (vm, xd);
1641 dpdk_vu_intf_t *vui;
1650 xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1653 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1660 dpdk_vhost_user_if_disconnect (xd);
1665 DBG_SOCK (
"deleted (deactivated) vhost-user interface sw_if_index %d",
1678 dpdk_vu_intf_t *vui;
1679 struct virtio_net *vhost_dev;
1682 u32 *hw_if_indices = 0;
1702 for (i = 0; i <
vec_len (hw_if_indices); i++)
1705 xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i]);
1708 clib_warning (
"invalid vhost-user interface hw_if_index %d",
1715 vhost_dev = &xd->vu_vhost_dev;
1716 u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
1717 vhost_dev->virtqueue[0]->vhost_hlen : 0);
1722 vuid->
features = vhost_dev->features;
1725 (vhost_dev->mem !=
NULL ? vhost_dev->mem->nregions : 0);
1727 strncpy ((
char *) vuid->
sock_filename, (
char *) vui->sock_filename,
1732 strncpy ((
char *) vuid->
if_name, (
char *) s,
1740 *out_vuids = r_vuids;
1751 struct sockaddr_un sun;
1755 } dpdk_vu_process_state;
1758 dpdk_vhost_user_process_init (
void **ctx)
1760 dpdk_vu_process_state *
state =
1762 memset (state, 0,
sizeof (*state));
1763 state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1764 state->sun.sun_family = AF_UNIX;
1765 state->template.read_function = dpdk_vhost_user_socket_read;
1766 state->template.error_function = dpdk_vhost_user_socket_error;
1767 state->event_data = 0;
1772 dpdk_vhost_user_process_cleanup (
void *ctx)
1781 dpdk_vu_process_state *
state = (dpdk_vu_process_state *) ctx;
1782 dpdk_vu_intf_t *vui = xd->vu_intf;
1784 if (vui->sock_is_server || !vui->active)
1787 if (vui->unix_fd == -1)
1790 strncpy (state->sun.sun_path, (
char *) vui->sock_filename,
1791 sizeof (state->sun.sun_path) - 1);
1794 (state->sockfd, (
struct sockaddr *) &(state->sun),
1795 sizeof (
struct sockaddr_un)) == 0)
1797 vui->sock_errno = 0;
1798 vui->unix_fd = state->sockfd;
1799 state->template.file_descriptor = state->sockfd;
1800 vui->unix_file_index =
1805 state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1806 if (state->sockfd < 0)
1811 vui->sock_errno = errno;
1818 socklen_t len =
sizeof (error);
1820 getsockopt (vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len);
1823 dpdk_vhost_user_if_disconnect (xd);
1841 u8 *sock_filename =
NULL;
1844 u64 feature_mask = (
u64) ~ 0;
1846 u32 custom_dev_instance = ~0;
1863 if (
unformat (line_input,
"socket %s", &sock_filename))
1865 else if (
unformat (line_input,
"server"))
1867 else if (
unformat (line_input,
"feature-mask 0x%llx", &feature_mask))
1873 else if (
unformat (line_input,
"renumber %d", &custom_dev_instance))
1884 if (sock_filename ==
NULL)
1887 dpdk_vhost_user_create_if (vnm, vm, (
char *) sock_filename,
1888 is_server, &sw_if_index, feature_mask,
1889 renumber, custom_dev_instance, hw);
1900 .path =
"create vhost-user",
1901 .short_help =
"create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]",
1914 u32 sw_if_index = ~0;
1927 if (
unformat (line_input,
"sw_if_index %d", &sw_if_index))
1935 if (sw_if_index == ~0)
1945 dpdk_vhost_user_delete_if (vnm, vm, sw_if_index);
1955 .path =
"delete vhost-user",
1956 .short_help =
"delete vhost-user sw_if_index <nn>",
1961 #define foreach_dpdk_vhost_feature \ 1962 _ (VIRTIO_NET_F_MRG_RXBUF) \ 1963 _ (VIRTIO_NET_F_CTRL_VQ) \ 1964 _ (VIRTIO_NET_F_CTRL_RX) 1976 dpdk_vu_intf_t *vui;
1977 struct virtio_net *vhost_dev;
1978 u32 hw_if_index, *hw_if_indices = 0;
1982 struct virtio_memory *mem;
1988 struct feat_struct *feat_entry;
1990 static struct feat_struct feat_array[] = {
1991 #define _(f) { .str = #f, .bit = f, }, 2009 vec_add1 (hw_if_indices, hw_if_index);
2021 if (
vec_len (hw_if_indices) == 0)
2035 for (i = 0; i <
vec_len (hw_if_indices); i++)
2039 if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i])))
2046 vhost_dev = &xd->vu_vhost_dev;
2047 mem = vhost_dev->mem;
2048 u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
2049 vhost_dev->virtqueue[0]->vhost_hlen : 0);
2052 hi->
name, hw_if_indices[i]);
2055 virtio_net_hdr_sz, xd->vu_vhost_dev.features);
2057 feat_entry = (
struct feat_struct *) &feat_array;
2058 while (feat_entry->str)
2060 if (xd->vu_vhost_dev.features & (1 << feat_entry->bit))
2070 vui->sock_is_server ?
"server" :
"client",
2071 strerror (vui->sock_errno));
2078 " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2080 " ====== ===== ================== ================== ================== ================== ==================\n");
2082 for (j = 0; j < mem->nregions; j++)
2085 " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2086 j, vui->region_fd[j],
2087 mem->regions[j].guest_phys_address,
2088 mem->regions[j].memory_size,
2089 mem->regions[j].userspace_address,
2090 mem->regions[j].address_offset,
2091 vui->region_addr[j]);
2093 for (q = 0; q < vui->num_vrings; q++)
2095 struct vhost_virtqueue *vq = vhost_dev->virtqueue[q];
2096 const char *qtype = (q & 1) ?
"TX" :
"RX";
2101 " qsz %d last_used_idx %d last_used_idx_res %d\n",
2102 vq->size, vq->last_used_idx,
2103 vq->last_used_idx_res);
2105 if (vq->avail && vq->used)
2107 " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2108 vq->avail->flags, vq->avail->idx,
2109 vq->used->flags, vq->used->idx);
2112 vq->kickfd, vq->callfd, vui->vrings[q].errfd,
2115 if (show_descr && vq->enabled)
2119 " id addr len flags next user_addr\n");
2121 " ===== ================== ===== ====== ===== ==================\n");
2122 for (j = 0; j < vq->size; j++)
2125 " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
2126 j, vq->desc[j].addr, vq->desc[j].len,
2127 vq->desc[j].flags, vq->desc[j].next,
2129 (xd, vq->desc[j].addr)));
2143 .path =
"show vhost-user",
2144 .short_help =
"show vhost-user interface",
unformat_function_t unformat_vnet_hw_interface
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
void dpdk_device_lock_free(dpdk_device_t *xd)
#define hash_set(h, key, value)
sll srl srl sll sra u16x4 i
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
unix_file_function_t * read_function
#define hash_unset(h, key)
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
static vlib_main_t * vlib_get_main(void)
vnet_device_class_t dpdk_device_class
u32 vhost_coalesce_frames
static f64 vlib_time_now(vlib_main_t *vm)
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
#define VHOST_USER_MSG_HDR_SZ
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
u32 per_interface_next_index
vlib_worker_thread_t * vlib_worker_threads
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
#define VHOST_USER_PROTOCOL_FEATURES
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
#define VNET_HW_INTERFACE_FLAG_LINK_UP
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
format_function_t format_vnet_sw_if_index_name
static uword unix_file_add(unix_main_t *um, unix_file_t *template)
#define VHOST_USER_REPLY_MASK
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
vnet_main_t * vnet_get_main(void)
struct rte_mbuf *** tx_vectors
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
int input_cpu_first_index
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define foreach_dpdk_vhost_feature
#define clib_warning(format, args...)
static uword pointer_to_uword(const void *p)
vlib_main_t ** vlib_mains
static void unix_file_del(unix_main_t *um, unix_file_t *f)
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
dpdk_device_and_queue_t ** devices_by_cpu
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
#define clib_error_return_unix(e, args...)
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
#define uword_to_pointer(u, type)
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
u16 * cpu_socket_id_by_queue
#define vec_free(V)
Free vector's memory (no header).
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
static vlib_thread_main_t * vlib_get_thread_main()
struct rte_mbuf *** rx_vectors
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
#define clib_memcpy(a, b, c)
#define VHOST_MEMORY_MAX_NREGIONS
static_always_inline void * map_guest_mem(vhost_user_intf_t *vui, uword addr, u32 *hint)
#define DPDK_DEVICE_FLAG_VHOST_USER
#define clib_unix_warning(format, args...)
static clib_error_t * dpdk_vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
static clib_error_t * show_dpdk_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
#define VLIB_CLI_COMMAND(x,...)
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
u32 * vu_inactive_interfaces_device_index
static long get_huge_page_size(int fd)
vhost_vring_state_t state
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
static void clib_mem_free(void *p)
void dpdk_device_lock_init(dpdk_device_t *xd)
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
static void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Set node dispatch state.
static void * clib_mem_alloc(uword size)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
#define VHOST_NET_VRING_IDX_RX
#define VHOST_USER_F_PROTOCOL_FEATURES
static u32 random_u32(u32 *seed)
32-bit random number generator
#define VHOST_NET_VRING_IDX_TX
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
vnet_sw_interface_type_t type
#define vec_foreach(var, vec)
Vector iterator.
#define clib_error_return(e, args...)
#define CLIB_CACHE_LINE_BYTES
static clib_error_t * dpdk_vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
#define DBG_SOCK(args...)
uword * vu_sw_if_index_by_listener_fd
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
dpdk_config_main_t * conf
uword * vu_sw_if_index_by_sock_fd