FD.io VPP  v16.12-rc0-258-g513da53
Vector Packet Processing
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
vhost_user.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <assert.h>
16 #include <sys/socket.h>
17 #include <sys/un.h>
18 #include <sys/stat.h>
19 #include <sys/vfs.h>
20 
21 #include <vlib/vlib.h>
22 #include <vlib/unix/unix.h>
23 
24 #include <vnet/vnet.h>
25 #include <vppinfra/vec.h>
26 #include <vppinfra/error.h>
27 #include <vppinfra/format.h>
28 
29 #include <vnet/ethernet/ethernet.h>
30 #include <vnet/devices/dpdk/dpdk.h>
31 
33 
34 #define VHOST_USER_DEBUG_SOCKET 0
35 
36 #if VHOST_USER_DEBUG_SOCKET == 1
37 #define DBG_SOCK(args...) clib_warning(args);
38 #else
39 #define DBG_SOCK(args...)
40 #endif
41 
42 #if DPDK_VHOST_USER
43 
44 /* *INDENT-OFF* */
45 static const char *vhost_message_str[] __attribute__ ((unused)) =
46 {
47  [VHOST_USER_NONE] = "VHOST_USER_NONE",
48  [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
49  [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
50  [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
51  [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
52  [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
53  [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
54  [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
55  [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
56  [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
57  [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
58  [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
59  [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
60  [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
61  [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
62  [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
63  [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
64  [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
65  [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
66 };
67 /* *INDENT-ON* */
68 
69 static int dpdk_vhost_user_set_vring_enable (u32 hw_if_index,
70  u8 idx, int enable);
71 
72 /*
73  * DPDK vhost-user functions
74  */
75 
76 /* portions taken from dpdk
77  * BSD LICENSE
78  *
79  * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
80  * All rights reserved.
81  *
82  * Redistribution and use in source and binary forms, with or without
83  * modification, are permitted provided that the following conditions
84  * are met:
85  *
86  * * Redistributions of source code must retain the above copyright
87  * notice, this list of conditions and the following disclaimer.
88  * * Redistributions in binary form must reproduce the above copyright
89  * notice, this list of conditions and the following disclaimer in
90  * the documentation and/or other materials provided with the
91  * distribution.
92  * * Neither the name of Intel Corporation nor the names of its
93  * contributors may be used to endorse or promote products derived
94  * from this software without specific prior written permission.
95  *
96  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
97  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
98  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
99  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
101  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
102  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
103  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
104  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
105  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
106  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107  */
108 
109 
110 static uword
111 qva_to_vva (struct virtio_net *dev, uword qemu_va)
112 {
113  struct virtio_memory_regions *region;
114  uword vhost_va = 0;
115  uint32_t regionidx = 0;
116 
117  /* Find the region where the address lives. */
118  for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++)
119  {
120  region = &dev->mem->regions[regionidx];
121  if ((qemu_va >= region->userspace_address) &&
122  (qemu_va <= region->userspace_address + region->memory_size))
123  {
124  vhost_va = qemu_va + region->guest_phys_address +
125  region->address_offset - region->userspace_address;
126  break;
127  }
128  }
129  return vhost_va;
130 }
131 
132 static dpdk_device_t *
133 dpdk_vhost_user_device_from_hw_if_index (u32 hw_if_index)
134 {
135  vnet_main_t *vnm = vnet_get_main ();
136  dpdk_main_t *dm = &dpdk_main;
137  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
139 
140  if ((xd->flags DPDK_DEVICE_FLAG_VHOST_USER) == 0)
141  return 0;
142 
143  return xd;
144 }
145 
146 static dpdk_device_t *
147 dpdk_vhost_user_device_from_sw_if_index (u32 sw_if_index)
148 {
149  vnet_main_t *vnm = vnet_get_main ();
150  vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
152 
153  return dpdk_vhost_user_device_from_hw_if_index (sw->hw_if_index);
154 }
155 
156 static void
157 stop_processing_packets (u32 hw_if_index, u8 idx)
158 {
159  dpdk_device_t *xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
160  assert (xd);
161  xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
162 }
163 
164 static void
165 disable_interface (dpdk_device_t * xd)
166 {
167  u8 idx;
168  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
169  for (idx = 0; idx < numqs; idx++)
170  xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
171 
172  xd->vu_is_running = 0;
173 }
174 
175 static inline void *
176 map_guest_mem (dpdk_device_t * xd, uword addr)
177 {
178  dpdk_vu_intf_t *vui = xd->vu_intf;
179  struct virtio_memory *mem = xd->vu_vhost_dev.mem;
180  int i;
181  for (i = 0; i < mem->nregions; i++)
182  {
183  if ((mem->regions[i].guest_phys_address <= addr) &&
184  ((mem->regions[i].guest_phys_address +
185  mem->regions[i].memory_size) > addr))
186  {
187  return (void *) ((uword) vui->region_addr[i] + addr -
188  (uword) mem->regions[i].guest_phys_address);
189  }
190  }
191  DBG_SOCK ("failed to map guest mem addr %lx", addr);
192  return 0;
193 }
194 
195 static clib_error_t *
196 dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 * hwaddr)
197 {
198  dpdk_main_t *dm = &dpdk_main;
199  vlib_main_t *vm = vlib_get_main ();
202  clib_error_t *error;
204  int num_qpairs = 1;
205  dpdk_vu_intf_t *vui = NULL;
206 
207  num_qpairs = dm->use_rss < 1 ? 1 : tm->n_vlib_mains;
208 
209  dpdk_device_t *xd = NULL;
210  u8 addr[6];
211  int j;
212 
214 
215  int inactive_cnt = vec_len (dm->vu_inactive_interfaces_device_index);
216  // if there are any inactive ifaces
217  if (inactive_cnt > 0)
218  {
219  // take last
220  u32 vui_idx = dm->vu_inactive_interfaces_device_index[inactive_cnt - 1];
221  if (vec_len (dm->devices) > vui_idx)
222  {
223  xd = vec_elt_at_index (dm->devices, vui_idx);
225  {
226  DBG_SOCK
227  ("reusing inactive vhost-user interface sw_if_index %d",
228  xd->vlib_sw_if_index);
229  }
230  else
231  {
233  ("error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!",
234  xd->vlib_sw_if_index);
235  // reset so new interface is created
236  xd = NULL;
237  }
238  }
239  // "remove" from inactive list
240  _vec_len (dm->vu_inactive_interfaces_device_index) -= 1;
241  }
242 
243  if (xd)
244  {
245  // existing interface used - do not overwrite if_id if not needed
246  if (if_id != (u32) ~ 0)
247  xd->vu_if_id = if_id;
248 
249  // reset virtqueues
250  vui = xd->vu_intf;
251  for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
252  {
253  memset (xd->vu_vhost_dev.virtqueue[j], 0,
254  sizeof (struct vhost_virtqueue));
255  xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
256  xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
257  xd->vu_vhost_dev.virtqueue[j]->backend = -1;
258  vui->vrings[j].packets = 0;
259  vui->vrings[j].bytes = 0;
260  }
261 
262  // reset lockp
265 
266  // reset tx vectors
267  for (j = 0; j < tm->n_vlib_mains; j++)
268  {
271  vec_reset_length (xd->tx_vectors[j]);
272  }
273 
274  // reset rx vector
275  for (j = 0; j < xd->rx_q_used; j++)
276  {
279  vec_reset_length (xd->rx_vectors[j]);
280  }
281  }
282  else
283  {
284  // vui was not retrieved from inactive ifaces - create new
287  xd->rx_q_used = num_qpairs;
288  xd->tx_q_used = num_qpairs;
289  xd->vu_vhost_dev.virt_qp_nb = num_qpairs;
290 
293 
294  if (if_id == (u32) ~ 0)
295  xd->vu_if_id = dm->next_vu_if_id++;
296  else
297  xd->vu_if_id = if_id;
298 
299  xd->device_index = xd - dm->devices;
300  xd->per_interface_next_index = ~0;
301  xd->vu_intf = clib_mem_alloc (sizeof (*(xd->vu_intf)));
302 
303  xd->vu_vhost_dev.mem = clib_mem_alloc (sizeof (struct virtio_memory) +
305  sizeof (struct
306  virtio_memory_regions));
307 
308  /* Will be set when guest sends VHOST_USER_SET_MEM_TABLE cmd */
309  xd->vu_vhost_dev.mem->nregions = 0;
310 
311  /*
312  * New virtqueue structure is an array of VHOST_MAX_QUEUE_PAIRS * 2
313  * We need to allocate numq pairs.
314  */
315  vui = xd->vu_intf;
316  for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
317  {
318  xd->vu_vhost_dev.virtqueue[j] =
319  clib_mem_alloc (sizeof (struct vhost_virtqueue));
320  memset (xd->vu_vhost_dev.virtqueue[j], 0,
321  sizeof (struct vhost_virtqueue));
322  xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
323  xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
324  xd->vu_vhost_dev.virtqueue[j]->backend = -1;
325  vui->vrings[j].packets = 0;
326  vui->vrings[j].bytes = 0;
327  }
328 
330 
331  DBG_SOCK
332  ("tm->n_vlib_mains: %d. TX %d, RX: %d, num_qpairs: %d, Lock: %p",
333  tm->n_vlib_mains, xd->tx_q_used, xd->rx_q_used, num_qpairs,
334  xd->lockp);
335 
338 
339  for (j = 0; j < tm->n_vlib_mains; j++)
340  {
343  vec_reset_length (xd->tx_vectors[j]);
344  }
345 
346  // reset rx vector
347  for (j = 0; j < xd->rx_q_used; j++)
348  {
351  vec_reset_length (xd->rx_vectors[j]);
352  }
353 
354  }
355  /*
356  * Generate random MAC address for the interface
357  */
358  if (hwaddr)
359  {
360  clib_memcpy (addr, hwaddr, sizeof (addr));
361  }
362  else
363  {
364  f64 now = vlib_time_now (vm);
365  u32 rnd;
366  rnd = (u32) (now * 1e6);
367  rnd = random_u32 (&rnd);
368 
369  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
370  addr[0] = 2;
371  addr[1] = 0xfe;
372  }
373 
375  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
376  /* ethernet address */ addr,
377  &xd->vlib_hw_if_index, 0);
378 
379  if (error)
380  return error;
381 
383  xd->vlib_sw_if_index = sw->sw_if_index;
384 
385  *hw_if_index = xd->vlib_hw_if_index;
386 
387  DBG_SOCK ("xd->device_index: %d, dm->input_cpu_count: %d, "
388  "dm->input_cpu_first_index: %d\n", xd->device_index,
390 
391  int q, next_cpu = 0;
392  for (q = 0; q < num_qpairs; q++)
393  {
394  int cpu = dm->input_cpu_first_index + (next_cpu % dm->input_cpu_count);
395 
396  unsigned lcore = vlib_worker_threads[cpu].lcore_id;
398  xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore);
399 
400  vec_add2 (dm->devices_by_cpu[cpu], dq, 1);
401  dq->device = xd->device_index;
402  dq->queue_id = q;
403  DBG_SOCK ("CPU for %d = %d. QID: %d", *hw_if_index, cpu, dq->queue_id);
404 
405  // start polling if it was not started yet (because of no phys ifaces)
406  if (tm->n_vlib_mains == 1
407  && dpdk_input_node.state != VLIB_NODE_STATE_POLLING)
409  VLIB_NODE_STATE_POLLING);
410 
411  if (tm->n_vlib_mains > 1)
413  VLIB_NODE_STATE_POLLING);
414  next_cpu++;
415  }
416 
418  return 0;
419 }
420 
421 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
422 static long
423 get_huge_page_size (int fd)
424 {
425  struct statfs s;
426  fstatfs (fd, &s);
427  return s.f_bsize;
428 }
429 #endif
430 
431 static clib_error_t *
432 dpdk_vhost_user_set_protocol_features (u32 hw_if_index, u64 prot_features)
433 {
434  dpdk_device_t *xd;
435  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
436  assert (xd);
437  xd->vu_vhost_dev.protocol_features = prot_features;
438  return 0;
439 }
440 
441 static clib_error_t *
442 dpdk_vhost_user_get_features (u32 hw_if_index, u64 * features)
443 {
444  *features = rte_vhost_feature_get ();
445 
446 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
447 #define OFFLOAD_FEATURES ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \
448  (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
449  (1ULL << VIRTIO_NET_F_CSUM) | \
450  (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
451  (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
452  (1ULL << VIRTIO_NET_F_GUEST_TSO6))
453 
454  /* These are not suppoted as bridging/tunneling VHOST
455  * interfaces with hardware interfaces/drivers that does
456  * not support offloading breaks L4 traffic.
457  */
458  *features &= (~OFFLOAD_FEATURES);
459 #endif
460 
461  DBG_SOCK ("supported features: 0x%lx", *features);
462  return 0;
463 }
464 
465 static clib_error_t *
466 dpdk_vhost_user_set_features (u32 hw_if_index, u64 features)
467 {
468  dpdk_device_t *xd;
469  u16 hdr_len = sizeof (struct virtio_net_hdr);
470 
471 
472  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
473  {
474  clib_warning ("not a vhost-user interface");
475  return 0;
476  }
477 
478  xd->vu_vhost_dev.features = features;
479 
480  if (xd->vu_vhost_dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))
481  hdr_len = sizeof (struct virtio_net_hdr_mrg_rxbuf);
482 
483  int numqs = VIRTIO_QNUM;
484  u8 idx;
485  int prot_feature = features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
486  numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
487  for (idx = 0; idx < numqs; idx++)
488  {
489  xd->vu_vhost_dev.virtqueue[idx]->vhost_hlen = hdr_len;
490  /*
491  * Spec says, if F_PROTOCOL_FEATURE is not set by the
492  * slave, then all the vrings should start off as
493  * enabled. If slave negotiates F_PROTOCOL_FEATURE, then
494  * slave is responsible to enable it.
495  */
496  if (!prot_feature)
497  dpdk_vhost_user_set_vring_enable (hw_if_index, idx, 1);
498  }
499 
500  return 0;
501 }
502 
503 static clib_error_t *
504 dpdk_vhost_user_set_mem_table (u32 hw_if_index, vhost_user_memory_t * vum,
505  int fd[])
506 {
507  struct virtio_memory *mem;
508  int i;
509  dpdk_device_t *xd;
510  dpdk_vu_intf_t *vui;
511 
512  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
513  {
514  clib_warning ("not a vhost-user interface");
515  return 0;
516  }
517 
518  vui = xd->vu_intf;
519  mem = xd->vu_vhost_dev.mem;
520 
521  mem->nregions = vum->nregions;
522 
523  for (i = 0; i < mem->nregions; i++)
524  {
525  u64 mapped_size, mapped_address;
526 
527  mem->regions[i].guest_phys_address = vum->regions[i].guest_phys_addr;
528  mem->regions[i].guest_phys_address_end =
530  mem->regions[i].memory_size = vum->regions[i].memory_size;
531  mem->regions[i].userspace_address = vum->regions[i].userspace_addr;
532 
533  mapped_size = mem->regions[i].memory_size + vum->regions[i].mmap_offset;
534  mapped_address =
535  pointer_to_uword (mmap
536  (NULL, mapped_size, PROT_READ | PROT_WRITE,
537  MAP_SHARED, fd[i], 0));
538 
539  if (uword_to_pointer (mapped_address, void *) == MAP_FAILED)
540  {
541  clib_warning ("mmap error");
542  return 0;
543  }
544 
545  mapped_address += vum->regions[i].mmap_offset;
546  vui->region_addr[i] = mapped_address;
547  vui->region_fd[i] = fd[i];
548  vui->region_offset[i] = vum->regions[i].mmap_offset;
549  mem->regions[i].address_offset =
550  mapped_address - mem->regions[i].guest_phys_address;
551 
552  DBG_SOCK ("map memory region %d addr 0x%lx off 0x%lx len 0x%lx",
553  i, vui->region_addr[i], vui->region_offset[i], mapped_size);
554 
555  if (vum->regions[i].guest_phys_addr == 0)
556  {
557  mem->base_address = vum->regions[i].userspace_addr;
558  mem->mapped_address = mem->regions[i].address_offset;
559  }
560  }
561 
562  disable_interface (xd);
563  return 0;
564 }
565 
566 static clib_error_t *
567 dpdk_vhost_user_set_vring_num (u32 hw_if_index, u8 idx, u32 num)
568 {
569  dpdk_device_t *xd;
570  struct vhost_virtqueue *vq;
571 
572  DBG_SOCK ("idx %u num %u", idx, num);
573 
574  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
575  {
576  clib_warning ("not a vhost-user interface");
577  return 0;
578  }
579  vq = xd->vu_vhost_dev.virtqueue[idx];
580  vq->size = num;
581 
582  stop_processing_packets (hw_if_index, idx);
583 
584  return 0;
585 }
586 
587 static clib_error_t *
588 dpdk_vhost_user_set_vring_addr (u32 hw_if_index, u8 idx, uword desc,
589  uword used, uword avail, uword log)
590 {
591  dpdk_device_t *xd;
592  struct vhost_virtqueue *vq;
593 
594  DBG_SOCK ("idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx",
595  idx, desc, used, avail, log);
596 
597  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
598  {
599  clib_warning ("not a vhost-user interface");
600  return 0;
601  }
602  vq = xd->vu_vhost_dev.virtqueue[idx];
603 
604  vq->desc = (struct vring_desc *) qva_to_vva (&xd->vu_vhost_dev, desc);
605  vq->used = (struct vring_used *) qva_to_vva (&xd->vu_vhost_dev, used);
606  vq->avail = (struct vring_avail *) qva_to_vva (&xd->vu_vhost_dev, avail);
607 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
608  vq->log_guest_addr = log;
609 #endif
610 
611  if (!(vq->desc && vq->used && vq->avail))
612  {
613  clib_warning ("falied to set vring addr");
614  }
615 
616  if (vq->last_used_idx != vq->used->idx)
617  {
618  clib_warning ("last_used_idx (%u) and vq->used->idx (%u) mismatches; "
619  "some packets maybe resent for Tx and dropped for Rx",
620  vq->last_used_idx, vq->used->idx);
621  vq->last_used_idx = vq->used->idx;
622  vq->last_used_idx_res = vq->used->idx;
623  }
624 
625  /*
626  * Inform the guest that there is no need to inform (kick) the
627  * host when it adds buffers. kick results in vmexit and will
628  * incur performance degradation.
629  *
630  * The below function sets a flag in used table. Therefore,
631  * should be initialized after initializing vq->used.
632  */
633  rte_vhost_enable_guest_notification (&xd->vu_vhost_dev, idx, 0);
634  stop_processing_packets (hw_if_index, idx);
635 
636  return 0;
637 }
638 
639 static clib_error_t *
640 dpdk_vhost_user_get_vring_base (u32 hw_if_index, u8 idx, u32 * num)
641 {
642  dpdk_device_t *xd;
643  struct vhost_virtqueue *vq;
644 
645  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
646  {
647  clib_warning ("not a vhost-user interface");
648  return 0;
649  }
650 
651  vq = xd->vu_vhost_dev.virtqueue[idx];
652  *num = vq->last_used_idx;
653 
654 /*
655  * From spec:
656  * Client must start ring upon receiving a kick
657  * (that is, detecting that file descriptor is readable)
658  * on the descriptor specified by VHOST_USER_SET_VRING_KICK,
659  * and stop ring upon receiving VHOST_USER_GET_VRING_BASE.
660  */
661  DBG_SOCK ("Stopping vring Q %u of device %d", idx, hw_if_index);
662  dpdk_vu_intf_t *vui = xd->vu_intf;
663 
664  /* if there is old fd, delete it */
665  if (vui->vrings[idx].callfd > 0)
666  {
668  vui->vrings[idx].callfd_idx);
669  unix_file_del (&unix_main, uf);
670  }
671 
672  vui->vrings[idx].enabled = 0; /* Reset local copy */
673  vui->vrings[idx].callfd = -1; /* Reset FD */
674  vq->enabled = 0;
675  vq->desc = NULL;
676  vq->used = NULL;
677  vq->avail = NULL;
678 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
679  vq->log_guest_addr = 0;
680 #endif
681 
682  /* Check if all Qs are disabled */
683  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
684  for (idx = 0; idx < numqs; idx++)
685  {
686  if (xd->vu_vhost_dev.virtqueue[idx]->enabled)
687  break;
688  }
689 
690  /* If all vrings are disabed then disable device */
691  if (idx == numqs)
692  {
693  DBG_SOCK ("Device %d disabled", hw_if_index);
694  xd->vu_is_running = 0;
695  }
696 
697  return 0;
698 }
699 
700 static clib_error_t *
701 dpdk_vhost_user_set_vring_base (u32 hw_if_index, u8 idx, u32 num)
702 {
703  dpdk_device_t *xd;
704  struct vhost_virtqueue *vq;
705 
706  DBG_SOCK ("idx %u num %u", idx, num);
707 
708  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
709  {
710  clib_warning ("not a vhost-user interface");
711  return 0;
712  }
713 
714  vq = xd->vu_vhost_dev.virtqueue[idx];
715  vq->last_used_idx = num;
716  vq->last_used_idx_res = num;
717 
718  stop_processing_packets (hw_if_index, idx);
719 
720  return 0;
721 }
722 
723 static clib_error_t *
724 dpdk_vhost_user_set_vring_kick (u32 hw_if_index, u8 idx, int fd)
725 {
726  dpdk_main_t *dm = &dpdk_main;
727  dpdk_device_t *xd;
728  dpdk_vu_vring *vring;
729  struct vhost_virtqueue *vq0, *vq1, *vq;
730  int index, vu_is_running = 0;
731 
732  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
733  {
734  clib_warning ("not a vhost-user interface");
735  return 0;
736  }
737 
738  vq = xd->vu_vhost_dev.virtqueue[idx];
739  vq->kickfd = fd;
740 
741  vring = &xd->vu_intf->vrings[idx];
742  vq->enabled = (vq->desc && vq->avail && vq->used && vring->enabled) ? 1 : 0;
743 
744  /*
745  * Set xd->vu_is_running if at least one pair of
746  * RX/TX queues are enabled.
747  */
748  int numqs = VIRTIO_QNUM;
749  numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
750 
751  for (index = 0; index < numqs; index += 2)
752  {
753  vq0 = xd->vu_vhost_dev.virtqueue[index]; /* RX */
754  vq1 = xd->vu_vhost_dev.virtqueue[index + 1]; /* TX */
755  if (vq0->enabled && vq1->enabled)
756  {
757  vu_is_running = 1;
758  break;
759  }
760  }
761  DBG_SOCK ("SET_VRING_KICK - idx %d, running %d, fd: %d",
762  idx, vu_is_running, fd);
763 
764  xd->vu_is_running = vu_is_running;
765  if (xd->vu_is_running && xd->admin_up)
766  {
768  xd->vlib_hw_if_index,
770  ETH_LINK_FULL_DUPLEX);
771  }
772 
773  return 0;
774 }
775 
776 static int
777 dpdk_vhost_user_set_vring_enable (u32 hw_if_index, u8 idx, int enable)
778 {
779  dpdk_device_t *xd;
780  struct vhost_virtqueue *vq;
781  dpdk_vu_intf_t *vui;
782 
783  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
784  {
785  clib_warning ("not a vhost-user interface");
786  return 0;
787  }
788 
789  vui = xd->vu_intf;
790  /*
791  * Guest vhost driver wrongly enables queue before
792  * setting the vring address. Therefore, save a
793  * local copy. Reflect it in vq structure if addresses
794  * are set. If not, vq will be enabled when vring
795  * is kicked.
796  */
797  vui->vrings[idx].enabled = enable; /* Save local copy */
798 
799  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
800  while (numqs--)
801  {
802  if (!vui->vrings[numqs].enabled)
803  break;
804  }
805 
806  if (numqs == -1) /* All Qs are enabled */
807  xd->need_txlock = 0;
808  else
809  xd->need_txlock = 1;
810 
811  vq = xd->vu_vhost_dev.virtqueue[idx];
812  if (vq->desc && vq->avail && vq->used)
813  xd->vu_vhost_dev.virtqueue[idx]->enabled = enable;
814 
815  return 0;
816 }
817 
818 static clib_error_t *
819 dpdk_vhost_user_callfd_read_ready (unix_file_t * uf)
820 {
821  __attribute__ ((unused)) int n;
822  u8 buff[8];
823  n = read (uf->file_descriptor, ((char *) &buff), 8);
824  return 0;
825 }
826 
827 static clib_error_t *
828 dpdk_vhost_user_set_vring_call (u32 hw_if_index, u8 idx, int fd)
829 {
830  dpdk_device_t *xd;
831  struct vhost_virtqueue *vq;
832  unix_file_t template = { 0 };
833 
834  DBG_SOCK ("SET_VRING_CALL - idx %d, fd %d", idx, fd);
835 
836  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
837  {
838  clib_warning ("not a vhost-user interface");
839  return 0;
840  }
841 
842  dpdk_vu_intf_t *vui = xd->vu_intf;
843 
844  /* if there is old fd, delete it */
845  if (vui->vrings[idx].callfd > -1)
846  {
848  vui->vrings[idx].callfd_idx);
849  unix_file_del (&unix_main, uf);
850  }
851  vui->vrings[idx].callfd = fd;
852  template.read_function = dpdk_vhost_user_callfd_read_ready;
853  template.file_descriptor = fd;
854  vui->vrings[idx].callfd_idx = unix_file_add (&unix_main, &template);
855 
856  vq = xd->vu_vhost_dev.virtqueue[idx];
857  vq->callfd = -1; /* We use locally saved vring->callfd; */
858 
859  return 0;
860 }
861 
862 u8
863 dpdk_vhost_user_want_interrupt (dpdk_device_t * xd, int idx)
864 {
865  dpdk_vu_intf_t *vui = xd->vu_intf;
866  ASSERT (vui != NULL);
867 
868  if (PREDICT_FALSE (vui->num_vrings <= 0))
869  return 0;
870 
871  dpdk_vu_vring *vring = &(vui->vrings[idx]);
872  struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
873 
874  /* return if vm is interested in interrupts */
875  return (vring->callfd > -1)
876  && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
877 }
878 
879 void
880 dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd, int idx)
881 {
882  dpdk_main_t *dm = &dpdk_main;
883  dpdk_vu_intf_t *vui = xd->vu_intf;
884  ASSERT (vui != NULL);
885 
886  if (PREDICT_FALSE (vui->num_vrings <= 0))
887  return;
888 
889  dpdk_vu_vring *vring = &(vui->vrings[idx]);
890  struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
891 
892  /* if vm is interested in interrupts */
893  if ((vring->callfd > -1)
894  && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
895  {
896  eventfd_write (vring->callfd, (eventfd_t) 1);
897  vring->n_since_last_int = 0;
898  vring->int_deadline =
900  }
901 }
902 
903 /*
904  * vhost-user interface management functions
905  */
906 
907 // initialize vui with specified attributes
908 static void
909 dpdk_vhost_user_vui_init (vnet_main_t * vnm,
910  dpdk_device_t * xd, int sockfd,
911  const char *sock_filename,
912  u8 is_server, u64 feature_mask, u32 * sw_if_index)
913 {
914  int q;
915  dpdk_vu_intf_t *vui = xd->vu_intf;
916  memset (vui, 0, sizeof (*vui));
917 
918  vui->unix_fd = sockfd;
919  vui->num_vrings = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
920  DBG_SOCK ("dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings);
921  vui->sock_is_server = is_server;
922  strncpy (vui->sock_filename, sock_filename,
923  ARRAY_LEN (vui->sock_filename) - 1);
924  vui->sock_errno = 0;
925  vui->is_up = 0;
926  vui->feature_mask = feature_mask;
927  vui->active = 1;
928  vui->unix_file_index = ~0;
929 
930  for (q = 0; q < vui->num_vrings; q++)
931  {
932  vui->vrings[q].enabled = 0;
933  vui->vrings[q].callfd = -1;
934  vui->vrings[q].kickfd = -1;
935  }
936 
938 
939  if (sw_if_index)
940  *sw_if_index = xd->vlib_sw_if_index;
941 }
942 
943 // register vui and start polling on it
944 static void
945 dpdk_vhost_user_vui_register (vlib_main_t * vm, dpdk_device_t * xd)
946 {
947  dpdk_main_t *dm = &dpdk_main;
948  dpdk_vu_intf_t *vui = xd->vu_intf;
949 
950  hash_set (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd,
951  xd->vlib_sw_if_index);
952 }
953 
954 static void
955 dpdk_unmap_all_mem_regions (dpdk_device_t * xd)
956 {
957  int i, r;
958  dpdk_vu_intf_t *vui = xd->vu_intf;
959  struct virtio_memory *mem = xd->vu_vhost_dev.mem;
960 
961  for (i = 0; i < mem->nregions; i++)
962  {
963  if (vui->region_addr[i] != -1)
964  {
965 
966  long page_sz = get_huge_page_size (vui->region_fd[i]);
967 
968  ssize_t map_sz = RTE_ALIGN_CEIL (mem->regions[i].memory_size +
969  vui->region_offset[i], page_sz);
970 
971  r =
972  munmap ((void *) (vui->region_addr[i] - vui->region_offset[i]),
973  map_sz);
974 
975  DBG_SOCK
976  ("unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x",
977  i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz);
978 
979  vui->region_addr[i] = -1;
980 
981  if (r == -1)
982  {
983  clib_unix_warning ("failed to unmap memory region");
984  }
985  close (vui->region_fd[i]);
986  }
987  }
988  mem->nregions = 0;
989 }
990 
991 static inline void
992 dpdk_vhost_user_if_disconnect (dpdk_device_t * xd)
993 {
994  dpdk_vu_intf_t *vui = xd->vu_intf;
995  vnet_main_t *vnm = vnet_get_main ();
996  dpdk_main_t *dm = &dpdk_main;
997  struct vhost_virtqueue *vq;
998  int q;
999 
1000  xd->admin_up = 0;
1002 
1003  if (vui->unix_file_index != ~0)
1004  {
1005  unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index);
1006  vui->unix_file_index = ~0;
1007  }
1008 
1009  hash_unset (dm->vu_sw_if_index_by_sock_fd, vui->unix_fd);
1010  hash_unset (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd);
1011  close (vui->unix_fd);
1012  vui->unix_fd = -1;
1013  vui->is_up = 0;
1014 
1015  for (q = 0; q < vui->num_vrings; q++)
1016  {
1017  vq = xd->vu_vhost_dev.virtqueue[q];
1018  if (vui->vrings[q].callfd > -1)
1019  {
1021  vui->vrings[q].callfd_idx);
1022  unix_file_del (&unix_main, uf);
1023  }
1024 
1025  if (vui->vrings[q].kickfd > -1)
1026  {
1027  close (vui->vrings[q].kickfd);
1028  vui->vrings[q].kickfd = -1;
1029  }
1030 
1031  vui->vrings[q].enabled = 0; /* Reset local copy */
1032  vui->vrings[q].callfd = -1; /* Reset FD */
1033  vq->enabled = 0;
1034 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
1035  vq->log_guest_addr = 0;
1036 #endif
1037  vq->desc = NULL;
1038  vq->used = NULL;
1039  vq->avail = NULL;
1040  }
1041  xd->vu_is_running = 0;
1042 
1043  dpdk_unmap_all_mem_regions (xd);
1044  DBG_SOCK ("interface ifindex %d disconnected", xd->vlib_sw_if_index);
1045 }
1046 
1047 static clib_error_t *
1048 dpdk_vhost_user_socket_read (unix_file_t * uf)
1049 {
1050  int n;
1051  int fd, number_of_fds = 0;
1052  int fds[VHOST_MEMORY_MAX_NREGIONS];
1053  vhost_user_msg_t msg;
1054  struct msghdr mh;
1055  struct iovec iov[1];
1056  dpdk_main_t *dm = &dpdk_main;
1057  dpdk_device_t *xd;
1058  dpdk_vu_intf_t *vui;
1059  struct cmsghdr *cmsg;
1060  uword *p;
1061  u8 q;
1062  vnet_main_t *vnm = vnet_get_main ();
1063 
1065  if (p == 0)
1066  {
1067  DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor);
1068  return 0;
1069  }
1070  else
1071  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1072 
1073  ASSERT (xd != NULL);
1074  vui = xd->vu_intf;
1075 
1076  char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
1077 
1078  memset (&mh, 0, sizeof (mh));
1079  memset (control, 0, sizeof (control));
1080 
1081  /* set the payload */
1082  iov[0].iov_base = (void *) &msg;
1083  iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
1084 
1085  mh.msg_iov = iov;
1086  mh.msg_iovlen = 1;
1087  mh.msg_control = control;
1088  mh.msg_controllen = sizeof (control);
1089 
1090  n = recvmsg (uf->file_descriptor, &mh, 0);
1091 
1092  if (n != VHOST_USER_MSG_HDR_SZ)
1093  goto close_socket;
1094 
1095  if (mh.msg_flags & MSG_CTRUNC)
1096  {
1097  goto close_socket;
1098  }
1099 
1100  cmsg = CMSG_FIRSTHDR (&mh);
1101 
1102  if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
1103  (cmsg->cmsg_type == SCM_RIGHTS) &&
1104  (cmsg->cmsg_len - CMSG_LEN (0) <=
1105  VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
1106  {
1107  number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
1108  clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
1109  }
1110 
1111  /* version 1, no reply bit set */
1112  if ((msg.flags & 7) != 1)
1113  {
1114  DBG_SOCK ("malformed message received. closing socket");
1115  goto close_socket;
1116  }
1117 
1118  {
1119  int rv __attribute__ ((unused));
1120  /* $$$$ pay attention to rv */
1121  rv = read (uf->file_descriptor, ((char *) &msg) + n, msg.size);
1122  }
1123 
1124  DBG_SOCK ("VPP VHOST message %s", vhost_message_str[msg.request]);
1125  switch (msg.request)
1126  {
1128  DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES", xd->vlib_hw_if_index);
1129 
1130  msg.flags |= VHOST_USER_REPLY_MASK;
1131 
1132  dpdk_vhost_user_get_features (xd->vlib_hw_if_index, &msg.u64);
1133  msg.u64 &= vui->feature_mask;
1134  msg.size = sizeof (msg.u64);
1135  break;
1136 
1138  DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016lx",
1139  xd->vlib_hw_if_index, msg.u64);
1140 
1141  dpdk_vhost_user_set_features (xd->vlib_hw_if_index, msg.u64);
1142  break;
1143 
1145  DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
1146  xd->vlib_hw_if_index, msg.memory.nregions);
1147 
1148  if ((msg.memory.nregions < 1) ||
1149  (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
1150  {
1151 
1152  DBG_SOCK ("number of mem regions must be between 1 and %i",
1154 
1155  goto close_socket;
1156  }
1157 
1158  if (msg.memory.nregions != number_of_fds)
1159  {
1160  DBG_SOCK ("each memory region must have FD");
1161  goto close_socket;
1162  }
1163 
1164  /* Unmap previously configured memory if necessary */
1165  dpdk_unmap_all_mem_regions (xd);
1166 
1167  dpdk_vhost_user_set_mem_table (xd->vlib_hw_if_index, &msg.memory, fds);
1168  break;
1169 
1171  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
1172  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1173 
1174  if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
1175  (msg.state.num == 0) || /* it cannot be zero */
1176  (msg.state.num % 2)) /* must be power of 2 */
1177  goto close_socket;
1178 
1179  dpdk_vhost_user_set_vring_num (xd->vlib_hw_if_index, msg.state.index,
1180  msg.state.num);
1181  break;
1182 
1184  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
1185  xd->vlib_hw_if_index, msg.state.index);
1186 
1187  dpdk_vhost_user_set_vring_addr (xd->vlib_hw_if_index, msg.state.index,
1188  msg.addr.desc_user_addr,
1189  msg.addr.used_user_addr,
1190  msg.addr.avail_user_addr,
1191  msg.addr.log_guest_addr);
1192  break;
1193 
1194  case VHOST_USER_SET_OWNER:
1195  DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", xd->vlib_hw_if_index);
1196  break;
1197 
1199  DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", xd->vlib_hw_if_index);
1200  break;
1201 
1203  q = (u8) (msg.u64 & 0xFF);
1204 
1205  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %lx, idx: %d",
1206  xd->vlib_hw_if_index, msg.u64, q);
1207 
1208  if (!(msg.u64 & 0x100))
1209  {
1210  if (number_of_fds != 1)
1211  goto close_socket;
1212  fd = fds[0];
1213  }
1214  else
1215  {
1216  fd = -1;
1217  }
1218  dpdk_vhost_user_set_vring_call (xd->vlib_hw_if_index, q, fd);
1219 
1220  break;
1221 
1223 
1224  q = (u8) (msg.u64 & 0xFF);
1225 
1226  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %lx, idx: %d",
1227  xd->vlib_hw_if_index, msg.u64, q);
1228 
1229  if (!(msg.u64 & 0x100))
1230  {
1231  if (number_of_fds != 1)
1232  goto close_socket;
1233 
1234  if (vui->vrings[q].kickfd > -1)
1235  close (vui->vrings[q].kickfd);
1236 
1237  vui->vrings[q].kickfd = fds[0];
1238  }
1239  else
1240  vui->vrings[q].kickfd = -1;
1241 
1242  dpdk_vhost_user_set_vring_kick (xd->vlib_hw_if_index, q,
1243  vui->vrings[q].kickfd);
1244  break;
1245 
1247 
1248  q = (u8) (msg.u64 & 0xFF);
1249 
1250  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %lx, idx: %d",
1251  xd->vlib_hw_if_index, msg.u64, q);
1252 
1253  if (!(msg.u64 & 0x100))
1254  {
1255  if (number_of_fds != 1)
1256  goto close_socket;
1257 
1258  fd = fds[0];
1259  }
1260  else
1261  fd = -1;
1262 
1263  vui->vrings[q].errfd = fd;
1264  break;
1265 
1267  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1268  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1269 
1270  dpdk_vhost_user_set_vring_base (xd->vlib_hw_if_index, msg.state.index,
1271  msg.state.num);
1272  break;
1273 
1275  DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1276  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1277 
1278  msg.flags |= VHOST_USER_REPLY_MASK;
1279  msg.size = sizeof (msg.state);
1280 
1281  dpdk_vhost_user_get_vring_base (xd->vlib_hw_if_index, msg.state.index,
1282  &msg.state.num);
1283  break;
1284 
1285  case VHOST_USER_NONE:
1286  DBG_SOCK ("if %d msg VHOST_USER_NONE", xd->vlib_hw_if_index);
1287  break;
1288 
1290 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
1291  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", xd->vlib_hw_if_index);
1292 
1293  if (msg.size != sizeof (msg.log))
1294  {
1295  DBG_SOCK
1296  ("invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu",
1297  msg.size, sizeof (msg.log));
1298  goto close_socket;
1299  }
1300 
1301  if (!
1302  (xd->vu_vhost_dev.protocol_features & (1 <<
1304  {
1305  DBG_SOCK
1306  ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1307  goto close_socket;
1308  }
1309 
1310  fd = fds[0];
1311  /* align size to 2M page */
1312  long page_sz = get_huge_page_size (fd);
1313  ssize_t map_sz =
1314  RTE_ALIGN_CEIL (msg.log.size + msg.log.offset, page_sz);
1315 
1316  void *addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1317  MAP_SHARED, fd, 0);
1318 
1319  DBG_SOCK ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p",
1320  map_sz, msg.log.offset, fd, addr);
1321 
1322  if (addr == MAP_FAILED)
1323  {
1324  clib_warning ("failed to map memory. errno is %d", errno);
1325  goto close_socket;
1326  }
1327 
1328  xd->vu_vhost_dev.log_base += pointer_to_uword (addr) + msg.log.offset;
1329  xd->vu_vhost_dev.log_size = msg.log.size;
1331  msg.size = sizeof (msg.u64);
1332 #else
1333  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented",
1334  xd->vlib_hw_if_index);
1335 #endif
1336  break;
1337 
1338  case VHOST_USER_SET_LOG_FD:
1339  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", xd->vlib_hw_if_index);
1340  break;
1341 
1343  DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
1344  xd->vlib_hw_if_index);
1345 
1346  msg.flags |= VHOST_USER_REPLY_MASK;
1347  msg.u64 = VHOST_USER_PROTOCOL_FEATURES;
1348  DBG_SOCK ("VHOST_USER_PROTOCOL_FEATURES: %llx",
1350  msg.size = sizeof (msg.u64);
1351  break;
1352 
1354  DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES",
1355  xd->vlib_hw_if_index);
1356 
1357  DBG_SOCK ("VHOST_USER_SET_PROTOCOL_FEATURES: 0x%lx", msg.u64);
1358  dpdk_vhost_user_set_protocol_features (xd->vlib_hw_if_index, msg.u64);
1359  break;
1360 
1362  DBG_SOCK ("%d VPP VHOST_USER_SET_VRING_ENABLE IDX: %d, Enable: %d",
1363  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1364  dpdk_vhost_user_set_vring_enable
1365  (xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1366  break;
1367 
1369  DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM:", xd->vlib_hw_if_index);
1370 
1371  msg.flags |= VHOST_USER_REPLY_MASK;
1372  msg.u64 = xd->vu_vhost_dev.virt_qp_nb;
1373  msg.size = sizeof (msg.u64);
1374  break;
1375 
1376  default:
1377  DBG_SOCK ("unknown vhost-user message %d received. closing socket",
1378  msg.request);
1379  goto close_socket;
1380  }
1381 
1382  /* if we have pointers to descriptor table, go up */
1383  if (!vui->is_up &&
1384  xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_TX]->desc &&
1385  xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_RX]->desc)
1386  {
1387 
1388  DBG_SOCK ("interface %d connected", xd->vlib_sw_if_index);
1389 
1392  vui->is_up = 1;
1393  xd->admin_up = 1;
1394  }
1395 
1396  /* if we need to reply */
1397  if (msg.flags & VHOST_USER_REPLY_MASK)
1398  {
1399  n =
1400  send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1401  if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1402  goto close_socket;
1403  }
1404 
1405  return 0;
1406 
1407 close_socket:
1408  DBG_SOCK ("error: close_socket");
1409  dpdk_vhost_user_if_disconnect (xd);
1410  return 0;
1411 }
1412 
1413 static clib_error_t *
1414 dpdk_vhost_user_socket_error (unix_file_t * uf)
1415 {
1416  dpdk_main_t *dm = &dpdk_main;
1417  dpdk_device_t *xd;
1418  uword *p;
1419 
1421  if (p == 0)
1422  {
1423  DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor);
1424  return 0;
1425  }
1426  else
1427  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1428 
1429  dpdk_vhost_user_if_disconnect (xd);
1430  return 0;
1431 }
1432 
1433 static clib_error_t *
1434 dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf)
1435 {
1436  int client_fd, client_len;
1437  struct sockaddr_un client;
1438  unix_file_t template = { 0 };
1439  dpdk_main_t *dm = &dpdk_main;
1440  dpdk_device_t *xd = NULL;
1441  dpdk_vu_intf_t *vui;
1442  uword *p;
1443 
1445  if (p == 0)
1446  {
1447  DBG_SOCK ("fd %d doesn't belong to any interface", uf->file_descriptor);
1448  return 0;
1449  }
1450 
1451  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1452  ASSERT (xd != NULL);
1453  vui = xd->vu_intf;
1454 
1455  client_len = sizeof (client);
1456  client_fd = accept (uf->file_descriptor,
1457  (struct sockaddr *) &client,
1458  (socklen_t *) & client_len);
1459 
1460  if (client_fd < 0)
1461  return clib_error_return_unix (0, "accept");
1462 
1463  template.read_function = dpdk_vhost_user_socket_read;
1464  template.error_function = dpdk_vhost_user_socket_error;
1465  template.file_descriptor = client_fd;
1466  vui->unix_file_index = unix_file_add (&unix_main, &template);
1467 
1468  vui->client_fd = client_fd;
1469  hash_set (dm->vu_sw_if_index_by_sock_fd, vui->client_fd,
1470  xd->vlib_sw_if_index);
1471 
1472  return 0;
1473 }
1474 
1475 // init server socket on specified sock_filename
1476 static int
1477 dpdk_vhost_user_init_server_sock (const char *sock_filename, int *sockfd)
1478 {
1479  int rv = 0;
1480  struct sockaddr_un un = { };
1481  int fd;
1482  /* create listening socket */
1483  fd = socket (AF_UNIX, SOCK_STREAM, 0);
1484 
1485  if (fd < 0)
1486  {
1487  return VNET_API_ERROR_SYSCALL_ERROR_1;
1488  }
1489 
1490  un.sun_family = AF_UNIX;
1491  strcpy ((char *) un.sun_path, (char *) sock_filename);
1492 
1493  /* remove if exists */
1494  unlink ((char *) sock_filename);
1495 
1496  if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
1497  {
1498  rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1499  goto error;
1500  }
1501 
1502  if (listen (fd, 1) == -1)
1503  {
1504  rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1505  goto error;
1506  }
1507 
1508  unix_file_t template = { 0 };
1509  template.read_function = dpdk_vhost_user_socksvr_accept_ready;
1510  template.file_descriptor = fd;
1511  unix_file_add (&unix_main, &template);
1512  *sockfd = fd;
1513  return rv;
1514 
1515 error:
1516  close (fd);
1517  return rv;
1518 }
1519 
1520 /*
1521  * vhost-user interface control functions used from vpe api
1522  */
1523 
1524 int
1525 dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
1526  const char *sock_filename,
1527  u8 is_server,
1528  u32 * sw_if_index,
1529  u64 feature_mask,
1530  u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
1531 {
1532  dpdk_main_t *dm = &dpdk_main;
1533  dpdk_device_t *xd;
1534  u32 hw_if_idx = ~0;
1535  int sockfd = -1;
1536  int rv = 0;
1537 
1538  // using virtio vhost user?
1539  if (dm->conf->use_virtio_vhost)
1540  {
1541  return vhost_user_create_if (vnm, vm, sock_filename, is_server,
1542  sw_if_index, feature_mask, renumber,
1543  custom_dev_instance, hwaddr);
1544  }
1545 
1546  if (is_server)
1547  {
1548  if ((rv =
1549  dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1550  {
1551  return rv;
1552  }
1553  }
1554 
1555  if (renumber)
1556  {
1557  // set next vhost-user if id if custom one is higher or equal
1558  if (custom_dev_instance >= dm->next_vu_if_id)
1559  dm->next_vu_if_id = custom_dev_instance + 1;
1560 
1561  dpdk_create_vhost_user_if_internal (&hw_if_idx, custom_dev_instance,
1562  hwaddr);
1563  }
1564  else
1565  dpdk_create_vhost_user_if_internal (&hw_if_idx, (u32) ~ 0, hwaddr);
1566  DBG_SOCK ("dpdk vhost-user interface created hw_if_index %d", hw_if_idx);
1567 
1568  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_idx);
1569  ASSERT (xd != NULL);
1570 
1571  dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1572  feature_mask, sw_if_index);
1573 
1574  dpdk_vhost_user_vui_register (vm, xd);
1575  return rv;
1576 }
1577 
1578 int
1579 dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
1580  const char *sock_filename,
1581  u8 is_server,
1582  u32 sw_if_index,
1583  u64 feature_mask,
1584  u8 renumber, u32 custom_dev_instance)
1585 {
1586  dpdk_main_t *dm = &dpdk_main;
1587  dpdk_device_t *xd;
1588  dpdk_vu_intf_t *vui = NULL;
1589  u32 sw_if_idx = ~0;
1590  int sockfd = -1;
1591  int rv = 0;
1592 
1593  // using virtio vhost user?
1594  if (dm->conf->use_virtio_vhost)
1595  {
1596  return vhost_user_modify_if (vnm, vm, sock_filename, is_server,
1597  sw_if_index, feature_mask, renumber,
1598  custom_dev_instance);
1599  }
1600 
1601  xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1602 
1603  if (xd == NULL)
1604  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1605 
1606  vui = xd->vu_intf;
1607 
1608  // interface is inactive
1609  vui->active = 0;
1610  // disconnect interface sockets
1611  dpdk_vhost_user_if_disconnect (xd);
1612 
1613  if (is_server)
1614  {
1615  if ((rv =
1616  dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1617  {
1618  return rv;
1619  }
1620  }
1621 
1622  dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1623  feature_mask, &sw_if_idx);
1624 
1625  if (renumber)
1626  {
1627  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1628  }
1629 
1630  dpdk_vhost_user_vui_register (vm, xd);
1631 
1632  return rv;
1633 }
1634 
1635 int
1636 dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
1637  u32 sw_if_index)
1638 {
1639  dpdk_main_t *dm = &dpdk_main;
1640  dpdk_device_t *xd = NULL;
1641  dpdk_vu_intf_t *vui;
1642  int rv = 0;
1643 
1644  // using virtio vhost user?
1645  if (dm->conf->use_virtio_vhost)
1646  {
1647  return vhost_user_delete_if (vnm, vm, sw_if_index);
1648  }
1649 
1650  xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1651 
1652  if (xd == NULL)
1653  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1654 
1655  vui = xd->vu_intf;
1656 
1657  // interface is inactive
1658  vui->active = 0;
1659  // disconnect interface sockets
1660  dpdk_vhost_user_if_disconnect (xd);
1661  // add to inactive interface list
1663 
1665  DBG_SOCK ("deleted (deactivated) vhost-user interface sw_if_index %d",
1666  sw_if_index);
1667 
1668  return rv;
1669 }
1670 
1671 int
1672 dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
1673  vhost_user_intf_details_t ** out_vuids)
1674 {
1675  int rv = 0;
1676  dpdk_main_t *dm = &dpdk_main;
1677  dpdk_device_t *xd;
1678  dpdk_vu_intf_t *vui;
1679  struct virtio_net *vhost_dev;
1680  vhost_user_intf_details_t *r_vuids = NULL;
1682  u32 *hw_if_indices = 0;
1684  u8 *s = NULL;
1685  int i;
1686 
1687  if (!out_vuids)
1688  return -1;
1689 
1690  // using virtio vhost user?
1691  if (dm->conf->use_virtio_vhost)
1692  {
1693  return vhost_user_dump_ifs (vnm, vm, out_vuids);
1694  }
1695 
1696  vec_foreach (xd, dm->devices)
1697  {
1698  if ((xd->flags & DPDK_DEVICE_FLAG_VHOST_USER) && xd->vu_intf->active)
1699  vec_add1 (hw_if_indices, xd->vlib_hw_if_index);
1700  }
1701 
1702  for (i = 0; i < vec_len (hw_if_indices); i++)
1703  {
1704  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
1705  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i]);
1706  if (!xd)
1707  {
1708  clib_warning ("invalid vhost-user interface hw_if_index %d",
1709  hw_if_indices[i]);
1710  continue;
1711  }
1712 
1713  vui = xd->vu_intf;
1714  ASSERT (vui != NULL);
1715  vhost_dev = &xd->vu_vhost_dev;
1716  u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
1717  vhost_dev->virtqueue[0]->vhost_hlen : 0);
1718 
1719  vec_add2 (r_vuids, vuid, 1);
1720  vuid->sw_if_index = xd->vlib_sw_if_index;
1721  vuid->virtio_net_hdr_sz = virtio_net_hdr_sz;
1722  vuid->features = vhost_dev->features;
1723  vuid->is_server = vui->sock_is_server;
1724  vuid->num_regions =
1725  (vhost_dev->mem != NULL ? vhost_dev->mem->nregions : 0);
1726  vuid->sock_errno = vui->sock_errno;
1727  strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
1728  ARRAY_LEN (vuid->sock_filename) - 1);
1729 
1730  s = format (s, "%v%c", hi->name, 0);
1731 
1732  strncpy ((char *) vuid->if_name, (char *) s,
1733  ARRAY_LEN (vuid->if_name) - 1);
1734  _vec_len (s) = 0;
1735  }
1736 
1737  vec_free (s);
1738  vec_free (hw_if_indices);
1739 
1740  *out_vuids = r_vuids;
1741 
1742  return rv;
1743 }
1744 
1745 /*
1746  * Processing functions called from dpdk process fn
1747  */
1748 
1749 typedef struct
1750 {
1751  struct sockaddr_un sun;
1752  int sockfd;
1753  unix_file_t template;
1754  uword *event_data;
1755 } dpdk_vu_process_state;
1756 
1757 void
1758 dpdk_vhost_user_process_init (void **ctx)
1759 {
1760  dpdk_vu_process_state *state =
1761  clib_mem_alloc (sizeof (dpdk_vu_process_state));
1762  memset (state, 0, sizeof (*state));
1763  state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1764  state->sun.sun_family = AF_UNIX;
1765  state->template.read_function = dpdk_vhost_user_socket_read;
1766  state->template.error_function = dpdk_vhost_user_socket_error;
1767  state->event_data = 0;
1768  *ctx = state;
1769 }
1770 
1771 void
1772 dpdk_vhost_user_process_cleanup (void *ctx)
1773 {
1774  clib_mem_free (ctx);
1775 }
1776 
1777 uword
1778 dpdk_vhost_user_process_if (vlib_main_t * vm, dpdk_device_t * xd, void *ctx)
1779 {
1780  dpdk_main_t *dm = &dpdk_main;
1781  dpdk_vu_process_state *state = (dpdk_vu_process_state *) ctx;
1782  dpdk_vu_intf_t *vui = xd->vu_intf;
1783 
1784  if (vui->sock_is_server || !vui->active)
1785  return 0;
1786 
1787  if (vui->unix_fd == -1)
1788  {
1789  /* try to connect */
1790  strncpy (state->sun.sun_path, (char *) vui->sock_filename,
1791  sizeof (state->sun.sun_path) - 1);
1792 
1793  if (connect
1794  (state->sockfd, (struct sockaddr *) &(state->sun),
1795  sizeof (struct sockaddr_un)) == 0)
1796  {
1797  vui->sock_errno = 0;
1798  vui->unix_fd = state->sockfd;
1799  state->template.file_descriptor = state->sockfd;
1800  vui->unix_file_index =
1801  unix_file_add (&unix_main, &(state->template));
1802  hash_set (dm->vu_sw_if_index_by_sock_fd, state->sockfd,
1803  xd->vlib_sw_if_index);
1804 
1805  state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1806  if (state->sockfd < 0)
1807  return -1;
1808  }
1809  else
1810  {
1811  vui->sock_errno = errno;
1812  }
1813  }
1814  else
1815  {
1816  /* check if socket is alive */
1817  int error = 0;
1818  socklen_t len = sizeof (error);
1819  int retval =
1820  getsockopt (vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len);
1821 
1822  if (retval)
1823  dpdk_vhost_user_if_disconnect (xd);
1824  }
1825  return 0;
1826 }
1827 #endif
1828 
1829 /*
1830  * CLI functions
1831  */
1832 
1833 static clib_error_t *
1835  unformat_input_t * input,
1836  vlib_cli_command_t * cmd)
1837 {
1838 #if DPDK_VHOST_USER
1839  dpdk_main_t *dm = &dpdk_main;
1840  unformat_input_t _line_input, *line_input = &_line_input;
1841  u8 *sock_filename = NULL;
1842  u32 sw_if_index;
1843  u8 is_server = 0;
1844  u64 feature_mask = (u64) ~ 0;
1845  u8 renumber = 0;
1846  u32 custom_dev_instance = ~0;
1847  u8 hwaddr[6];
1848  u8 *hw = NULL;
1849 
1850  if (dm->conf->use_virtio_vhost)
1851  {
1852 #endif
1853  return vhost_user_connect_command_fn (vm, input, cmd);
1854 #if DPDK_VHOST_USER
1855  }
1856 
1857  /* Get a line of input. */
1858  if (!unformat_user (input, unformat_line_input, line_input))
1859  return 0;
1860 
1861  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1862  {
1863  if (unformat (line_input, "socket %s", &sock_filename))
1864  ;
1865  else if (unformat (line_input, "server"))
1866  is_server = 1;
1867  else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
1868  ;
1869  else
1870  if (unformat
1871  (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
1872  hw = hwaddr;
1873  else if (unformat (line_input, "renumber %d", &custom_dev_instance))
1874  {
1875  renumber = 1;
1876  }
1877  else
1878  return clib_error_return (0, "unknown input `%U'",
1879  format_unformat_error, input);
1880  }
1881  unformat_free (line_input);
1882 
1883  vnet_main_t *vnm = vnet_get_main ();
1884  if (sock_filename == NULL)
1885  return clib_error_return (0, "missing socket file");
1886 
1887  dpdk_vhost_user_create_if (vnm, vm, (char *) sock_filename,
1888  is_server, &sw_if_index, feature_mask,
1889  renumber, custom_dev_instance, hw);
1890 
1891  vec_free (sock_filename);
1893  sw_if_index);
1894  return 0;
1895 #endif
1896 }
1897 
1898 /* *INDENT-OFF* */
1899 VLIB_CLI_COMMAND (dpdk_vhost_user_connect_command, static) = {
1900  .path = "create vhost-user",
1901  .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]",
1903 };
1904 /* *INDENT-ON* */
1905 
1906 static clib_error_t *
1908  unformat_input_t * input,
1909  vlib_cli_command_t * cmd)
1910 {
1911  dpdk_main_t *dm = &dpdk_main;
1912  clib_error_t *error = 0;
1913  unformat_input_t _line_input, *line_input = &_line_input;
1914  u32 sw_if_index = ~0;
1915 
1916  if (dm->conf->use_virtio_vhost)
1917  {
1918  return vhost_user_delete_command_fn (vm, input, cmd);
1919  }
1920 
1921  /* Get a line of input. */
1922  if (!unformat_user (input, unformat_line_input, line_input))
1923  return 0;
1924 
1925  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1926  {
1927  if (unformat (line_input, "sw_if_index %d", &sw_if_index))
1928  ;
1929  else
1930  return clib_error_return (0, "unknown input `%U'",
1931  format_unformat_error, input);
1932  }
1933  unformat_free (line_input);
1934 
1935  if (sw_if_index == ~0)
1936  {
1937  error = clib_error_return (0, "invalid sw_if_index",
1938  format_unformat_error, input);
1939  return error;
1940  }
1941 
1942  vnet_main_t *vnm = vnet_get_main ();
1943 
1944 #if DPDK_VHOST_USER
1945  dpdk_vhost_user_delete_if (vnm, vm, sw_if_index);
1946 #else
1947  vhost_user_delete_if (vnm, vm, sw_if_index);
1948 #endif
1949 
1950  return 0;
1951 }
1952 
1953 /* *INDENT-OFF* */
1954 VLIB_CLI_COMMAND (dpdk_vhost_user_delete_command, static) = {
1955  .path = "delete vhost-user",
1956  .short_help = "delete vhost-user sw_if_index <nn>",
1958 };
1959 /* *INDENT-ON* */
1960 
1961 #define foreach_dpdk_vhost_feature \
1962  _ (VIRTIO_NET_F_MRG_RXBUF) \
1963  _ (VIRTIO_NET_F_CTRL_VQ) \
1964  _ (VIRTIO_NET_F_CTRL_RX)
1965 
1966 static clib_error_t *
1968  unformat_input_t * input,
1969  vlib_cli_command_t * cmd)
1970 {
1971 #if DPDK_VHOST_USER
1972  clib_error_t *error = 0;
1973  dpdk_main_t *dm = &dpdk_main;
1974  vnet_main_t *vnm = vnet_get_main ();
1975  dpdk_device_t *xd;
1976  dpdk_vu_intf_t *vui;
1977  struct virtio_net *vhost_dev;
1978  u32 hw_if_index, *hw_if_indices = 0;
1980  int i, j, q;
1981  int show_descr = 0;
1982  struct virtio_memory *mem;
1983  struct feat_struct
1984  {
1985  u8 bit;
1986  char *str;
1987  };
1988  struct feat_struct *feat_entry;
1989 
1990  static struct feat_struct feat_array[] = {
1991 #define _(f) { .str = #f, .bit = f, },
1993 #undef _
1994  {.str = NULL}
1995  };
1996 
1997  if (dm->conf->use_virtio_vhost)
1998  {
1999 #endif
2000  return show_vhost_user_command_fn (vm, input, cmd);
2001 #if DPDK_VHOST_USER
2002  }
2003 
2004  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2005  {
2006  if (unformat
2007  (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
2008  {
2009  vec_add1 (hw_if_indices, hw_if_index);
2010  vlib_cli_output (vm, "add %d", hw_if_index);
2011  }
2012  else if (unformat (input, "descriptors") || unformat (input, "desc"))
2013  show_descr = 1;
2014  else
2015  {
2016  error = clib_error_return (0, "unknown input `%U'",
2017  format_unformat_error, input);
2018  goto done;
2019  }
2020  }
2021  if (vec_len (hw_if_indices) == 0)
2022  {
2023  vec_foreach (xd, dm->devices)
2024  {
2025  if ((xd->flags DPDK_DEVICE_FLAG_VHOST_USER) && xd->vu_intf->active)
2026  vec_add1 (hw_if_indices, xd->vlib_hw_if_index);
2027  }
2028  }
2029 
2030  vlib_cli_output (vm, "DPDK vhost-user interfaces");
2031  vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n",
2033  dm->conf->vhost_coalesce_time);
2034 
2035  for (i = 0; i < vec_len (hw_if_indices); i++)
2036  {
2037  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2038 
2039  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i])))
2040  {
2041  error = clib_error_return (0, "not dpdk vhost-user interface: '%s'",
2042  hi->name);
2043  goto done;
2044  }
2045  vui = xd->vu_intf;
2046  vhost_dev = &xd->vu_vhost_dev;
2047  mem = vhost_dev->mem;
2048  u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
2049  vhost_dev->virtqueue[0]->vhost_hlen : 0);
2050 
2051  vlib_cli_output (vm, "Interface: %v (ifindex %d)",
2052  hi->name, hw_if_indices[i]);
2053 
2054  vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n",
2055  virtio_net_hdr_sz, xd->vu_vhost_dev.features);
2056 
2057  feat_entry = (struct feat_struct *) &feat_array;
2058  while (feat_entry->str)
2059  {
2060  if (xd->vu_vhost_dev.features & (1 << feat_entry->bit))
2061  vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2062  feat_entry->bit);
2063  feat_entry++;
2064  }
2065 
2066  vlib_cli_output (vm, "\n");
2067 
2068  vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
2069  vui->sock_filename,
2070  vui->sock_is_server ? "server" : "client",
2071  strerror (vui->sock_errno));
2072 
2073  vlib_cli_output (vm, " Memory regions (total %d)\n", mem->nregions);
2074 
2075  if (mem->nregions)
2076  {
2077  vlib_cli_output (vm,
2078  " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2079  vlib_cli_output (vm,
2080  " ====== ===== ================== ================== ================== ================== ==================\n");
2081  }
2082  for (j = 0; j < mem->nregions; j++)
2083  {
2084  vlib_cli_output (vm,
2085  " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2086  j, vui->region_fd[j],
2087  mem->regions[j].guest_phys_address,
2088  mem->regions[j].memory_size,
2089  mem->regions[j].userspace_address,
2090  mem->regions[j].address_offset,
2091  vui->region_addr[j]);
2092  }
2093  for (q = 0; q < vui->num_vrings; q++)
2094  {
2095  struct vhost_virtqueue *vq = vhost_dev->virtqueue[q];
2096  const char *qtype = (q & 1) ? "TX" : "RX";
2097 
2098  vlib_cli_output (vm, "\n Virtqueue %d (%s)\n", q / 2, qtype);
2099 
2100  vlib_cli_output (vm,
2101  " qsz %d last_used_idx %d last_used_idx_res %d\n",
2102  vq->size, vq->last_used_idx,
2103  vq->last_used_idx_res);
2104 
2105  if (vq->avail && vq->used)
2106  vlib_cli_output (vm,
2107  " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2108  vq->avail->flags, vq->avail->idx,
2109  vq->used->flags, vq->used->idx);
2110 
2111  vlib_cli_output (vm, " kickfd %d callfd %d errfd %d enabled %d\n",
2112  vq->kickfd, vq->callfd, vui->vrings[q].errfd,
2113  vq->enabled);
2114 
2115  if (show_descr && vq->enabled)
2116  {
2117  vlib_cli_output (vm, "\n descriptor table:\n");
2118  vlib_cli_output (vm,
2119  " id addr len flags next user_addr\n");
2120  vlib_cli_output (vm,
2121  " ===== ================== ===== ====== ===== ==================\n");
2122  for (j = 0; j < vq->size; j++)
2123  {
2124  vlib_cli_output (vm,
2125  " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
2126  j, vq->desc[j].addr, vq->desc[j].len,
2127  vq->desc[j].flags, vq->desc[j].next,
2129  (xd, vq->desc[j].addr)));
2130  }
2131  }
2132  }
2133  vlib_cli_output (vm, "\n");
2134  }
2135 done:
2136  vec_free (hw_if_indices);
2137  return error;
2138 #endif
2139 }
2140 
2141 /* *INDENT-OFF* */
2142 VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
2143  .path = "show vhost-user",
2144  .short_help = "show vhost-user interface",
2145  .function = show_dpdk_vhost_user_command_fn,
2146 };
2147 /* *INDENT-ON* */
2148 
2149 /*
2150  * fd.io coding-style-patch-verification: ON
2151  *
2152  * Local Variables:
2153  * eval: (c-set-style "gnu")
2154  * End:
2155  */
unformat_function_t unformat_vnet_hw_interface
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:396
unix_file_t * file_pool
Definition: unix.h:89
void dpdk_device_lock_free(dpdk_device_t *xd)
Definition: init.c:228
vmrglw vmrglh hi
#define hash_set(h, key, value)
Definition: hash.h:254
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
uword unformat(unformat_input_t *i, char *fmt,...)
Definition: unformat.c:966
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:522
unix_file_function_t * read_function
Definition: unix.h:62
#define hash_unset(h, key)
Definition: hash.h:260
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:254
dpdk_main_t dpdk_main
Definition: dpdk.h:522
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
u8 use_rss
Definition: dpdk.h:499
unsigned int uint32_t
Definition: fix_types.h:29
vnet_device_class_t dpdk_device_class
u32 vhost_coalesce_frames
Definition: dpdk.h:434
u8 need_txlock
Definition: dpdk.h:298
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
#define NULL
Definition: clib.h:55
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:533
u16 flags
Definition: dpdk.h:237
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:482
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
Definition: interface.c:1177
#define VHOST_USER_MSG_HDR_SZ
Definition: vhost-user.h:20
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:521
u32 per_interface_next_index
Definition: dpdk.h:225
vlib_worker_thread_t * vlib_worker_threads
Definition: threads.h:112
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
#define VHOST_USER_PROTOCOL_FEATURES
Definition: vhost-user.h:40
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2262
u32 next_vu_if_id
Definition: dpdk.h:488
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:348
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:407
format_function_t format_vnet_sw_if_index_name
static uword unix_file_add(unix_main_t *um, unix_file_t *template)
Definition: unix.h:136
#define VHOST_USER_REPLY_MASK
Definition: vhost-user.h:28
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
vnet_main_t * vnet_get_main(void)
Definition: misc.c:46
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:228
f64 vhost_coalesce_time
Definition: dpdk.h:435
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:774
int input_cpu_first_index
Definition: dpdk.h:502
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
u16 rx_q_used
Definition: dpdk.h:256
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
#define foreach_dpdk_vhost_feature
Definition: vhost_user.c:1961
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
uword unformat_user(unformat_input_t *input, unformat_function_t *func,...)
Definition: unformat.c:977
vhost_user_log_t log
Definition: vhost-user.h:87
u32 device_index
Definition: dpdk.h:219
static uword pointer_to_uword(const void *p)
Definition: types.h:131
vlib_main_t ** vlib_mains
Definition: dpdk_buffer.c:157
u32 vlib_sw_if_index
Definition: dpdk.h:222
static void unix_file_del(unix_main_t *um, unix_file_t *f)
Definition: unix.h:146
#define hash_get(h, key)
Definition: hash.h:248
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:369
u32 file_descriptor
Definition: unix.h:52
dpdk_device_and_queue_t ** devices_by_cpu
Definition: dpdk.h:451
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
Definition: vhost-user.c:1809
u32 vlib_hw_if_index
Definition: dpdk.h:221
#define clib_error_return_unix(e, args...)
Definition: error.h:114
#define PREDICT_FALSE(x)
Definition: clib.h:97
int input_cpu_count
Definition: dpdk.h:503
#define VLIB_FRAME_SIZE
Definition: node.h:328
u16 tx_q_used
Definition: dpdk.h:255
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:575
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
Definition: vhost-user.c:2209
#define uword_to_pointer(u, type)
Definition: types.h:136
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
Definition: vhost-user.c:2031
dpdk_device_t * devices
Definition: dpdk.h:450
u16 * cpu_socket_id_by_queue
Definition: dpdk.h:258
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:300
volatile u32 ** lockp
Definition: dpdk.h:216
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:92
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:229
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Definition: vhost-user.c:2072
#define clib_memcpy(a, b, c)
Definition: string.h:64
#define VHOST_MEMORY_MAX_NREGIONS
Definition: vhost-user.h:19
static_always_inline void * map_guest_mem(vhost_user_intf_t *vui, uword addr, u32 *hint)
Definition: vhost-user.c:152
#define DPDK_DEVICE_FLAG_VHOST_USER
Definition: dpdk.h:242
#define clib_unix_warning(format, args...)
Definition: error.h:68
static clib_error_t * dpdk_vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1834
void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
Definition: threads.c:1182
#define ARRAY_LEN(x)
Definition: clib.h:59
static clib_error_t * show_dpdk_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1967
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
Definition: vhost-user.h:31
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:376
#define VLIB_CLI_COMMAND(x,...)
Definition: cli.h:154
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
Definition: format.c:245
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
u32 * vu_inactive_interfaces_device_index
Definition: dpdk.h:486
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
static long get_huge_page_size(int fd)
Definition: vhost-user.c:237
vhost_vring_state_t state
Definition: vhost-user.h:84
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2180
static void clib_mem_free(void *p)
Definition: mem.h:176
void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:214
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:205
u8 use_virtio_vhost
Definition: dpdk.h:431
static void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Set node dispatch state.
Definition: node_funcs.h:144
unix_main_t unix_main
Definition: main.c:57
static void * clib_mem_alloc(uword size)
Definition: mem.h:109
u64 uword
Definition: types.h:112
unsigned short u16
Definition: types.h:57
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
#define VHOST_NET_VRING_IDX_RX
Definition: vhost-user.h:22
Definition: unix.h:49
#define VHOST_USER_F_PROTOCOL_FEATURES
Definition: vhost-user.h:34
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VHOST_NET_VRING_IDX_TX
Definition: vhost-user.h:23
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
Definition: threads.c:1214
vnet_sw_interface_type_t type
Definition: interface.h:485
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:418
#define vec_foreach(var, vec)
Vector iterator.
#define clib_error_return(e, args...)
Definition: error.h:111
struct _unformat_input_t unformat_input_t
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
vnet_main_t * vnet_main
Definition: dpdk.h:518
u16 nb_tx_desc
Definition: dpdk.h:246
unformat_function_t unformat_line_input
Definition: format.h:281
static clib_error_t * dpdk_vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1907
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2122
#define DBG_SOCK(args...)
Definition: vhost_user.c:39
uword * vu_sw_if_index_by_listener_fd
Definition: dpdk.h:484
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
dpdk_config_main_t * conf
Definition: dpdk.h:519
uword * vu_sw_if_index_by_sock_fd
Definition: dpdk.h:485