FD.io VPP  v18.01-8-g0eacf49
Vector Packet Processing
init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/bitmap.h>
20 #include <vppinfra/linux/sysfs.h>
21 #include <vlib/unix/unix.h>
22 
23 #include <vnet/ethernet/ethernet.h>
24 #include <dpdk/device/dpdk.h>
25 #include <vlib/pci/pci.h>
26 
27 #include <rte_ring.h>
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/stat.h>
33 #include <sys/mount.h>
34 #include <string.h>
35 #include <fcntl.h>
36 
37 #include <dpdk/device/dpdk_priv.h>
38 
41 
42 #define LINK_STATE_ELOGS 0
43 
44 /* Port configuration, mildly modified Intel app values */
45 
46 static struct rte_eth_conf port_conf_template = {
47  .rxmode = {
48  .split_hdr_size = 0,
49  .header_split = 0, /**< Header Split disabled */
50  .hw_ip_checksum = 0, /**< IP checksum offload disabled */
51  .hw_vlan_filter = 0, /**< VLAN filtering disabled */
52  .hw_strip_crc = 0, /**< CRC stripped by hardware */
53  },
54  .txmode = {
55  .mq_mode = ETH_MQ_TX_NONE,
56  },
57 };
58 
59 static dpdk_port_type_t
60 port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
61 {
62 
63  if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
65  else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
67  else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
69  else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
71  else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
73  else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
75 
77 }
78 
79 
80 static u32
82 {
83  dpdk_main_t *dm = &dpdk_main;
85  u32 old = 0;
86 
88  {
89  old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
90 
93  else
95 
97  {
99  rte_eth_promiscuous_enable (xd->device_index);
100  else
101  rte_eth_promiscuous_disable (xd->device_index);
102  }
103  }
104  else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags))
105  {
106  xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
107  dpdk_device_setup (xd);
108  }
109  return old;
110 }
111 
112 static void
114 {
115  int q;
116  vec_validate (xd->lockp, xd->tx_q_used - 1);
117  for (q = 0; q < xd->tx_q_used; q++)
118  {
121  memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
122  }
123 }
124 
125 static struct rte_mempool_ops *
126 get_ops_by_name (i8 * ops_name)
127 {
128  u32 i;
129 
130  for (i = 0; i < rte_mempool_ops_table.num_ops; i++)
131  {
132  if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name))
133  return &rte_mempool_ops_table.ops[i];
134  }
135 
136  return 0;
137 }
138 
139 static int
140 dpdk_ring_alloc (struct rte_mempool *mp)
141 {
142  u32 rg_flags = 0, count;
143  i32 ret;
144  i8 rg_name[RTE_RING_NAMESIZE];
145  struct rte_ring *r;
146 
147  ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name);
148  if (ret < 0 || ret >= (i32) sizeof (rg_name))
149  return -ENAMETOOLONG;
150 
151  /* ring flags */
152  if (mp->flags & MEMPOOL_F_SP_PUT)
153  rg_flags |= RING_F_SP_ENQ;
154  if (mp->flags & MEMPOOL_F_SC_GET)
155  rg_flags |= RING_F_SC_DEQ;
156 
157  count = rte_align32pow2 (mp->size + 1);
158  /*
159  * Allocate the ring that will be used to store objects.
160  * Ring functions will return appropriate errors if we are
161  * running as a secondary process etc., so no checks made
162  * in this function for that condition.
163  */
164  /* XXX can we get memory from the right socket? */
165  r = clib_mem_alloc_aligned (rte_ring_get_memsize (count),
167 
168  /* XXX rte_ring_lookup will not work */
169 
170  ret = rte_ring_init (r, rg_name, count, rg_flags);
171  if (ret)
172  return ret;
173 
174  mp->pool_data = r;
175 
176  return 0;
177 }
178 
179 static clib_error_t *
181 {
182  u32 nports;
183  u32 nb_desc = 0;
184  int i;
185  clib_error_t *error;
191  dpdk_device_t *xd;
192  vlib_pci_addr_t last_pci_addr;
193  u32 last_pci_addr_port = 0;
195  uword *p_hqos;
196 
197  u32 next_hqos_cpu = 0;
198  u8 af_packet_port_id = 0;
199  u8 bond_ether_port_id = 0;
200  last_pci_addr.as_u32 = ~0;
201 
202  dm->hqos_cpu_first_index = 0;
203  dm->hqos_cpu_count = 0;
204 
205  /* find out which cpus will be used for I/O TX */
206  p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
207  tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
208 
209  if (tr_hqos && tr_hqos->count > 0)
210  {
211  dm->hqos_cpu_first_index = tr_hqos->first_index;
212  dm->hqos_cpu_count = tr_hqos->count;
213  }
214 
217 
218  nports = rte_eth_dev_count ();
219  if (nports < 1)
220  {
221  clib_warning ("DPDK drivers found no ports...");
222  }
223 
224  if (CLIB_DEBUG > 0)
225  clib_warning ("DPDK drivers found %d ports...", nports);
226 
227  /*
228  * All buffers are all allocated from the same rte_mempool.
229  * Thus they all have the same number of data bytes.
230  */
234  "dpdk rx");
235 
236  if (dm->conf->enable_tcp_udp_checksum)
237  dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
238  | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
239 
240  /* vlib_buffer_t template */
243  for (i = 0; i < tm->n_vlib_mains; i++)
244  {
247  fl = vlib_buffer_get_free_list (vm,
250  bt->flags = dm->buffer_flags_template;
251  vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
252  }
253 
254  for (i = 0; i < nports; i++)
255  {
256  u8 addr[6];
257  u8 vlan_strip = 0;
258  int j;
259  struct rte_eth_dev_info dev_info;
260  struct rte_eth_link l;
261  dpdk_device_config_t *devconf = 0;
262  vlib_pci_addr_t pci_addr;
263  uword *p = 0;
264 
265  rte_eth_dev_info_get (i, &dev_info);
266  if (dev_info.pci_dev) /* bonded interface has no pci info */
267  {
268  pci_addr.domain = dev_info.pci_dev->addr.domain;
269  pci_addr.bus = dev_info.pci_dev->addr.bus;
270  pci_addr.slot = dev_info.pci_dev->addr.devid;
271  pci_addr.function = dev_info.pci_dev->addr.function;
272  p =
274  pci_addr.as_u32);
275  }
276 
277  if (p)
278  devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
279  else
280  devconf = &dm->conf->default_devconf;
281 
282  /* Create vnet interface */
286  xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
287 
288  /* Handle interface naming for devices with multiple ports sharing same PCI ID */
289  if (dev_info.pci_dev)
290  {
291  struct rte_eth_dev_info di = { 0 };
292  rte_eth_dev_info_get (i + 1, &di);
293  if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
294  memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
295  sizeof (struct rte_pci_addr)) == 0)
296  {
297  xd->interface_name_suffix = format (0, "0");
298  last_pci_addr.as_u32 = pci_addr.as_u32;
299  last_pci_addr_port = i;
300  }
301  else if (pci_addr.as_u32 == last_pci_addr.as_u32)
302  {
304  format (0, "%u", i - last_pci_addr_port);
305  }
306  else
307  {
308  last_pci_addr.as_u32 = ~0;
309  }
310  }
311  else
312  last_pci_addr.as_u32 = ~0;
313 
314  clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
315  sizeof (struct rte_eth_txconf));
316 
317  if (dm->conf->no_multi_seg)
318  {
319  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
320  port_conf_template.rxmode.jumbo_frame = 0;
321  port_conf_template.rxmode.enable_scatter = 0;
322  }
323  else
324  {
325  xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
326  port_conf_template.rxmode.jumbo_frame = 1;
327  port_conf_template.rxmode.enable_scatter = 1;
329  }
330 
332  sizeof (struct rte_eth_conf));
333 
334  xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
335 
336  if (devconf->num_tx_queues > 0
337  && devconf->num_tx_queues < xd->tx_q_used)
338  xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
339 
340  if (devconf->num_rx_queues > 1
341  && dev_info.max_rx_queues >= devconf->num_rx_queues)
342  {
343  xd->rx_q_used = devconf->num_rx_queues;
344  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
345  if (devconf->rss_fn == 0)
346  xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
347  ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
348  else
349  xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
350  }
351  else
352  xd->rx_q_used = 1;
353 
355 
356  /* workaround for drivers not setting driver_name */
357  if ((!dev_info.driver_name) && (dev_info.pci_dev))
358  dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
359 
360  ASSERT (dev_info.driver_name);
361 
362  if (!xd->pmd)
363  {
364 
365 
366 #define _(s,f) else if (dev_info.driver_name && \
367  !strcmp(dev_info.driver_name, s)) \
368  xd->pmd = VNET_DPDK_PMD_##f;
369  if (0)
370  ;
372 #undef _
373  else
375 
379 
380  switch (xd->pmd)
381  {
382  /* Drivers with valid speed_capa set */
383  case VNET_DPDK_PMD_E1000EM:
384  case VNET_DPDK_PMD_IGB:
385  case VNET_DPDK_PMD_IXGBE:
386  case VNET_DPDK_PMD_I40E:
387  xd->port_type = port_type_from_speed_capa (&dev_info);
388  if (dm->conf->no_tx_checksum_offload == 0)
389  {
390  xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS;
391  xd->flags |=
394  }
395 
396 
397  break;
398  case VNET_DPDK_PMD_CXGBE:
399  case VNET_DPDK_PMD_MLX4:
400  case VNET_DPDK_PMD_MLX5:
401  xd->port_type = port_type_from_speed_capa (&dev_info);
402  break;
403 
404  /* SR-IOV VFs */
405  case VNET_DPDK_PMD_IGBVF:
406  case VNET_DPDK_PMD_IXGBEVF:
407  case VNET_DPDK_PMD_I40EVF:
409  xd->port_conf.rxmode.hw_strip_crc = 1;
410  break;
411 
412  case VNET_DPDK_PMD_THUNDERX:
414  xd->port_conf.rxmode.hw_strip_crc = 1;
415  break;
416 
417  case VNET_DPDK_PMD_ENA:
419  break;
420 
421  case VNET_DPDK_PMD_DPAA2:
423  break;
424 
425  /* Cisco VIC */
426  case VNET_DPDK_PMD_ENIC:
427  rte_eth_link_get_nowait (i, &l);
428  if (l.link_speed == 40000)
430  else
432  break;
433 
434  /* Intel Red Rock Canyon */
435  case VNET_DPDK_PMD_FM10K:
437  xd->port_conf.rxmode.hw_strip_crc = 1;
438  break;
439 
440  /* virtio */
441  case VNET_DPDK_PMD_VIRTIO:
445  break;
446 
447  /* vmxnet3 */
448  case VNET_DPDK_PMD_VMXNET3:
450  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
451  break;
452 
453  case VNET_DPDK_PMD_AF_PACKET:
455  xd->port_id = af_packet_port_id++;
456  break;
457 
458  case VNET_DPDK_PMD_BOND:
460  xd->port_id = bond_ether_port_id++;
461  break;
462 
463  case VNET_DPDK_PMD_VIRTIO_USER:
465  break;
466 
467  default:
469  }
470 
471  if (devconf->num_rx_desc)
472  xd->nb_rx_desc = devconf->num_rx_desc;
473 
474  if (devconf->num_tx_desc)
475  xd->nb_tx_desc = devconf->num_tx_desc;
476  }
477 
478  /*
479  * Ensure default mtu is not > the mtu read from the hardware.
480  * Otherwise rte_eth_dev_configure() will fail and the port will
481  * not be available.
482  */
483  if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
484  {
485  /*
486  * This device does not support the platforms's max frame
487  * size. Use it's advertised mru instead.
488  */
489  xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
490  }
491  else
492  {
493  xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
494 
495  /*
496  * Some platforms do not account for Ethernet FCS (4 bytes) in
497  * MTU calculations. To interop with them increase mru but only
498  * if the device's settings can support it.
499  */
500  if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
501  xd->port_conf.rxmode.hw_strip_crc)
502  {
503  /*
504  * Allow additional 4 bytes (for Ethernet FCS). These bytes are
505  * stripped by h/w and so will not consume any buffer memory.
506  */
507  xd->port_conf.rxmode.max_rx_pkt_len += 4;
508  }
509  }
510 
511  if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
512  {
513  f64 now = vlib_time_now (vm);
514  u32 rnd;
515  rnd = (u32) (now * 1e6);
516  rnd = random_u32 (&rnd);
517  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
518  addr[0] = 2;
519  addr[1] = 0xfe;
520  }
521  else
522  rte_eth_macaddr_get (i, (struct ether_addr *) addr);
523 
524  if (xd->tx_q_used < tm->n_vlib_mains)
526 
527  xd->device_index = xd - dm->devices;
528  ASSERT (i == xd->device_index);
529  xd->per_interface_next_index = ~0;
530 
531  /* assign interface to input thread */
532  int q;
533 
534  if (devconf->hqos_enabled)
535  {
537 
538  int cpu;
539  if (devconf->hqos.hqos_thread_valid)
540  {
541  if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
542  return clib_error_return (0, "invalid HQoS thread index");
543 
544  cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
545  }
546  else
547  {
548  if (dm->hqos_cpu_count == 0)
549  return clib_error_return (0, "no HQoS threads available");
550 
551  cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
552 
553  next_hqos_cpu++;
554  if (next_hqos_cpu == dm->hqos_cpu_count)
555  next_hqos_cpu = 0;
556 
557  devconf->hqos.hqos_thread_valid = 1;
558  devconf->hqos.hqos_thread = cpu;
559  }
560 
562  vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
563  dq->device = xd->device_index;
564  dq->queue_id = 0;
565  }
566 
569  for (j = 0; j < tm->n_vlib_mains; j++)
570  {
573  vec_reset_length (xd->tx_vectors[j]);
574  }
575 
578  for (j = 0; j < xd->rx_q_used; j++)
579  {
582  vec_reset_length (xd->rx_vectors[j]);
583  }
584 
587 
588 
589  /* count the number of descriptors used for this device */
590  nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
591 
593  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
594  /* ethernet address */ addr,
596  if (error)
597  return error;
598 
600  xd->vlib_sw_if_index = sw->sw_if_index;
602  dpdk_input_node.index);
603 
604  if (devconf->workers)
605  {
606  int i;
607  q = 0;
608  /* *INDENT-OFF* */
609  clib_bitmap_foreach (i, devconf->workers, ({
610  vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++,
611  vdm->first_worker_thread_index + i);
612  }));
613  /* *INDENT-ON* */
614  }
615  else
616  for (q = 0; q < xd->rx_q_used; q++)
617  {
619  ~1);
620  }
621 
623 
624  if (dm->conf->no_tx_checksum_offload == 0)
627 
628  dpdk_device_setup (xd);
629 
630  if (vec_len (xd->errors))
631  clib_warning ("setup failed for device %U. Errors:\n %U",
634 
635  if (devconf->hqos_enabled)
636  {
637  clib_error_t *rv;
638  rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
639  if (rv)
640  return rv;
641  }
642 
643  /*
644  * For cisco VIC vNIC, set default to VLAN strip enabled, unless
645  * specified otherwise in the startup config.
646  * For other NICs default to VLAN strip disabled, unless specified
647  * otherwis in the startup config.
648  */
649  if (xd->pmd == VNET_DPDK_PMD_ENIC)
650  {
651  if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
652  vlan_strip = 1; /* remove vlan tag from VIC port by default */
653  else
654  clib_warning ("VLAN strip disabled for interface\n");
655  }
656  else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
657  vlan_strip = 1;
658 
659  if (vlan_strip)
660  {
661  int vlan_off;
662  vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
663  vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
664  xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
665  if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
666  clib_warning ("VLAN strip enabled for interface\n");
667  else
668  clib_warning ("VLAN strip cannot be supported by interface\n");
669  }
670 
672  xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
673 
674  rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
675  }
676 
677  if (nb_desc > dm->conf->num_mbufs)
678  clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
679  dm->conf->num_mbufs, nb_desc);
680 
681  return 0;
682 }
683 
684 static void
686 {
687  clib_error_t *error;
688  u8 *pci_addr = 0;
689  int num_whitelisted = vec_len (conf->dev_confs);
690  vlib_pci_device_info_t *d = 0;
691  vlib_pci_addr_t *addr = 0, *addrs;
692 
693  addrs = vlib_pci_get_all_dev_addrs ();
694  /* *INDENT-OFF* */
695  vec_foreach (addr, addrs)
696  {
697  dpdk_device_config_t * devconf = 0;
698  vec_reset_length (pci_addr);
699  pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, addr, 0);
700  if (d)
701  {
703  d = 0;
704  }
705  d = vlib_pci_get_device_info (addr, &error);
706  if (error)
707  {
708  clib_error_report (error);
709  continue;
710  }
711 
713  continue;
714 
715  if (num_whitelisted)
716  {
717  uword * p = hash_get (conf->device_config_index_by_pci_addr, addr->as_u32);
718 
719  if (!p)
720  continue;
721 
722  devconf = pool_elt_at_index (conf->dev_confs, p[0]);
723  }
724 
725  /* virtio */
726  if (d->vendor_id == 0x1af4 &&
729  ;
730  /* vmxnet3 */
731  else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
732  ;
733  /* all Intel network devices */
734  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
735  ;
736  /* all Intel QAT devices VFs */
737  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
738  (d->device_id == 0x0443 || d->device_id == 0x37c9 || d->device_id == 0x19e3))
739  ;
740  /* Cisco VIC */
741  else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
742  ;
743  /* Chelsio T4/T5 */
744  else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
745  ;
746  /* Amazen Elastic Network Adapter */
747  else if (d->vendor_id == 0x1d0f && d->device_id >= 0xec20 && d->device_id <= 0xec21)
748  ;
749  /* Mellanox */
750  else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a)
751  {
752  continue;
753  }
754  else
755  {
756  clib_warning ("Unsupported PCI device 0x%04x:0x%04x found "
757  "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
758  pci_addr);
759  continue;
760  }
761 
762  error = vlib_pci_bind_to_uio (addr, (char *) conf->uio_driver_name);
763 
764  if (error)
765  {
766  if (devconf == 0)
767  {
768  pool_get (conf->dev_confs, devconf);
769  hash_set (conf->device_config_index_by_pci_addr, addr->as_u32,
770  devconf - conf->dev_confs);
771  devconf->pci_addr.as_u32 = addr->as_u32;
772  }
773  devconf->is_blacklisted = 1;
774  clib_error_report (error);
775  }
776  }
777  /* *INDENT-ON* */
778  vec_free (pci_addr);
780 }
781 
782 static clib_error_t *
783 dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
784  unformat_input_t * input, u8 is_default)
785 {
786  clib_error_t *error = 0;
787  uword *p;
788  dpdk_device_config_t *devconf;
789  unformat_input_t sub_input;
790 
791  if (is_default)
792  {
793  devconf = &conf->default_devconf;
794  }
795  else
796  {
797  p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32);
798 
799  if (!p)
800  {
801  pool_get (conf->dev_confs, devconf);
802  hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32,
803  devconf - conf->dev_confs);
804  }
805  else
806  return clib_error_return (0,
807  "duplicate configuration for PCI address %U",
808  format_vlib_pci_addr, &pci_addr);
809  }
810 
811  devconf->pci_addr.as_u32 = pci_addr.as_u32;
812  devconf->hqos_enabled = 0;
814 
815  if (!input)
816  return 0;
817 
820  {
821  if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
822  ;
823  else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
824  ;
825  else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
826  ;
827  else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
828  ;
829  else if (unformat (input, "workers %U", unformat_bitmap_list,
830  &devconf->workers))
831  ;
832  else
833  if (unformat
834  (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
835  {
836  error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
837  if (error)
838  break;
839  }
840  else if (unformat (input, "vlan-strip-offload off"))
842  else if (unformat (input, "vlan-strip-offload on"))
844  else
845  if (unformat
846  (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
847  {
848  devconf->hqos_enabled = 1;
849  error = unformat_hqos (&sub_input, &devconf->hqos);
850  if (error)
851  break;
852  }
853  else if (unformat (input, "hqos"))
854  {
855  devconf->hqos_enabled = 1;
856  }
857  else
858  {
859  error = clib_error_return (0, "unknown input `%U'",
860  format_unformat_error, input);
861  break;
862  }
863  }
864 
865  if (error)
866  return error;
867 
868  if (devconf->workers && devconf->num_rx_queues == 0)
869  devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
870  else if (devconf->workers &&
871  clib_bitmap_count_set_bits (devconf->workers) !=
872  devconf->num_rx_queues)
873  error =
875  "%U: number of worker threadds must be "
876  "equal to number of rx queues", format_vlib_pci_addr,
877  &pci_addr);
878 
879  return error;
880 }
881 
882 static clib_error_t *
884 {
885  clib_error_t *error = 0;
886  dpdk_main_t *dm = &dpdk_main;
889  dpdk_device_config_t *devconf;
890  vlib_pci_addr_t pci_addr;
891  unformat_input_t sub_input;
892  uword x;
893  u8 *s, *tmp = 0;
894  u8 *rte_cmd = 0, *ethname = 0;
895  u32 log_level;
896  int ret, i;
897  int num_whitelisted = 0;
898  u8 no_pci = 0;
899  u8 no_huge = 0;
900  u8 huge_dir = 0;
901  u8 file_prefix = 0;
902  u8 *socket_mem = 0;
903  u8 *huge_dir_path = 0;
904 
905  huge_dir_path =
906  format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
907 
908  conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
909  log_level = RTE_LOG_NOTICE;
910 
912  {
913  /* Prime the pump */
914  if (unformat (input, "no-hugetlb"))
915  {
916  vec_add1 (conf->eal_init_args, (u8 *) "no-huge");
917  no_huge = 1;
918  }
919 
920  else if (unformat (input, "enable-tcp-udp-checksum"))
921  conf->enable_tcp_udp_checksum = 1;
922 
923  else if (unformat (input, "no-tx-checksum-offload"))
924  conf->no_tx_checksum_offload = 1;
925 
926  else if (unformat (input, "decimal-interface-names"))
928 
929  else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x))
930  log_level = x;
931 
932  else if (unformat (input, "no-multi-seg"))
933  conf->no_multi_seg = 1;
934 
935  else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
936  &sub_input))
937  {
938  error =
939  dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input,
940  1);
941 
942  if (error)
943  return error;
944  }
945  else
946  if (unformat
947  (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
948  unformat_vlib_cli_sub_input, &sub_input))
949  {
950  error = dpdk_device_config (conf, pci_addr, &sub_input, 0);
951 
952  if (error)
953  return error;
954 
955  num_whitelisted++;
956  }
957  else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
958  {
959  error = dpdk_device_config (conf, pci_addr, 0, 0);
960 
961  if (error)
962  return error;
963 
964  num_whitelisted++;
965  }
966  else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
967  ;
968  else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
969  ;
970  else if (unformat (input, "socket-mem %s", &socket_mem))
971  ;
972  else if (unformat (input, "no-pci"))
973  {
974  no_pci = 1;
975  tmp = format (0, "--no-pci%c", 0);
976  vec_add1 (conf->eal_init_args, tmp);
977  }
978  else if (unformat (input, "poll-sleep %d", &dm->poll_sleep_usec))
979  ;
980 
981 #define _(a) \
982  else if (unformat(input, #a)) \
983  { \
984  tmp = format (0, "--%s%c", #a, 0); \
985  vec_add1 (conf->eal_init_args, tmp); \
986  }
988 #undef _
989 #define _(a) \
990  else if (unformat(input, #a " %s", &s)) \
991  { \
992  if (!strncmp(#a, "huge-dir", 8)) \
993  huge_dir = 1; \
994  else if (!strncmp(#a, "file-prefix", 11)) \
995  file_prefix = 1; \
996  tmp = format (0, "--%s%c", #a, 0); \
997  vec_add1 (conf->eal_init_args, tmp); \
998  vec_add1 (s, 0); \
999  if (!strncmp(#a, "vdev", 4)) \
1000  if (strstr((char*)s, "af_packet")) \
1001  clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \
1002  vec_add1 (conf->eal_init_args, s); \
1003  }
1005 #undef _
1006 #define _(a,b) \
1007  else if (unformat(input, #a " %s", &s)) \
1008  { \
1009  tmp = format (0, "-%s%c", #b, 0); \
1010  vec_add1 (conf->eal_init_args, tmp); \
1011  vec_add1 (s, 0); \
1012  vec_add1 (conf->eal_init_args, s); \
1013  }
1015 #undef _
1016 #define _(a,b) \
1017  else if (unformat(input, #a " %s", &s)) \
1018  { \
1019  tmp = format (0, "-%s%c", #b, 0); \
1020  vec_add1 (conf->eal_init_args, tmp); \
1021  vec_add1 (s, 0); \
1022  vec_add1 (conf->eal_init_args, s); \
1023  conf->a##_set_manually = 1; \
1024  }
1026 #undef _
1027  else if (unformat (input, "default"))
1028  ;
1029 
1030  else if (unformat_skip_white_space (input))
1031  ;
1032  else
1033  {
1034  error = clib_error_return (0, "unknown input `%U'",
1035  format_unformat_error, input);
1036  goto done;
1037  }
1038  }
1039 
1040  if (!conf->uio_driver_name)
1041  conf->uio_driver_name = format (0, "auto%c", 0);
1042 
1043  /*
1044  * Use 1G huge pages if available.
1045  */
1046  if (!no_huge && !huge_dir)
1047  {
1048  u32 x, *mem_by_socket = 0;
1049  uword c = 0;
1050  int rv;
1051 
1052  umount ((char *) huge_dir_path);
1053 
1054  /* Process "socket-mem" parameter value */
1055  if (vec_len (socket_mem))
1056  {
1057  unformat_input_t in;
1058  unformat_init_vector (&in, socket_mem);
1060  {
1061  if (unformat (&in, "%u,", &x))
1062  ;
1063  else if (unformat (&in, "%u", &x))
1064  ;
1065  else if (unformat (&in, ","))
1066  x = 0;
1067  else
1068  break;
1069 
1070  vec_add1 (mem_by_socket, x);
1071  }
1072  /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
1073  unformat_free (&in);
1074  socket_mem = 0;
1075  }
1076  else
1077  {
1078  /* *INDENT-OFF* */
1080  {
1081  vec_validate(mem_by_socket, c);
1082  mem_by_socket[c] = 64; /* default per-socket mem */
1083  }
1084  ));
1085  /* *INDENT-ON* */
1086  }
1087 
1088  /* *INDENT-OFF* */
1090  {
1091  clib_error_t *e;
1092 
1093  vec_validate(mem_by_socket, c);
1094 
1095  e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2);
1096  if (e)
1097  clib_error_report (e);
1098  }));
1099  /* *INDENT-ON* */
1100 
1101  if (mem_by_socket == 0)
1102  {
1103  error = clib_error_return (0, "mem_by_socket NULL");
1104  goto done;
1105  }
1106  _vec_len (mem_by_socket) = c + 1;
1107 
1108  /* regenerate socket_mem string */
1109  vec_foreach_index (x, mem_by_socket)
1110  socket_mem = format (socket_mem, "%s%u",
1111  socket_mem ? "," : "", mem_by_socket[x]);
1112  socket_mem = format (socket_mem, "%c", 0);
1113 
1114  vec_free (mem_by_socket);
1115 
1116  error = vlib_unix_recursive_mkdir ((char *) huge_dir_path);
1117  if (error)
1118  {
1119  goto done;
1120  }
1121 
1122  rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
1123 
1124  if (rv)
1125  {
1126  error = clib_error_return (0, "mount failed %d", errno);
1127  goto done;
1128  }
1129 
1130  tmp = format (0, "--huge-dir%c", 0);
1131  vec_add1 (conf->eal_init_args, tmp);
1132  tmp = format (0, "%s%c", huge_dir_path, 0);
1133  vec_add1 (conf->eal_init_args, tmp);
1134  if (!file_prefix)
1135  {
1136  tmp = format (0, "--file-prefix%c", 0);
1137  vec_add1 (conf->eal_init_args, tmp);
1138  tmp = format (0, "vpp%c", 0);
1139  vec_add1 (conf->eal_init_args, tmp);
1140  }
1141  }
1142 
1143  vec_free (rte_cmd);
1144  vec_free (ethname);
1145 
1146  if (error)
1147  return error;
1148 
1149  /* I'll bet that -c and -n must be the first and second args... */
1150  if (!conf->coremask_set_manually)
1151  {
1153  uword *coremask = 0;
1154  int i;
1155 
1156  /* main thread core */
1157  coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
1158 
1159  for (i = 0; i < vec_len (tm->registrations); i++)
1160  {
1161  tr = tm->registrations[i];
1162  coremask = clib_bitmap_or (coremask, tr->coremask);
1163  }
1164 
1165  vec_insert (conf->eal_init_args, 2, 1);
1166  conf->eal_init_args[1] = (u8 *) "-c";
1167  tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
1168  conf->eal_init_args[2] = tmp;
1169  clib_bitmap_free (coremask);
1170  }
1171 
1172  if (!conf->nchannels_set_manually)
1173  {
1174  vec_insert (conf->eal_init_args, 2, 3);
1175  conf->eal_init_args[3] = (u8 *) "-n";
1176  tmp = format (0, "%d", conf->nchannels);
1177  conf->eal_init_args[4] = tmp;
1178  }
1179 
1180  if (no_pci == 0 && geteuid () == 0)
1181  dpdk_bind_devices_to_uio (conf);
1182 
1183 #define _(x) \
1184  if (devconf->x == 0 && conf->default_devconf.x > 0) \
1185  devconf->x = conf->default_devconf.x ;
1186 
1187  /* *INDENT-OFF* */
1188  pool_foreach (devconf, conf->dev_confs, ({
1189 
1190  /* default per-device config items */
1191  foreach_dpdk_device_config_item
1192 
1193  /* add DPDK EAL whitelist/blacklist entry */
1194  if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
1195  {
1196  tmp = format (0, "-w%c", 0);
1197  vec_add1 (conf->eal_init_args, tmp);
1198  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1199  vec_add1 (conf->eal_init_args, tmp);
1200  }
1201  else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
1202  {
1203  tmp = format (0, "-b%c", 0);
1204  vec_add1 (conf->eal_init_args, tmp);
1205  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1206  vec_add1 (conf->eal_init_args, tmp);
1207  }
1208  }));
1209  /* *INDENT-ON* */
1210 
1211 #undef _
1212 
1213  /* set master-lcore */
1214  tmp = format (0, "--master-lcore%c", 0);
1215  vec_add1 (conf->eal_init_args, tmp);
1216  tmp = format (0, "%u%c", tm->main_lcore, 0);
1217  vec_add1 (conf->eal_init_args, tmp);
1218 
1219  /* set socket-mem */
1220  tmp = format (0, "--socket-mem%c", 0);
1221  vec_add1 (conf->eal_init_args, tmp);
1222  tmp = format (0, "%s%c", socket_mem, 0);
1223  vec_add1 (conf->eal_init_args, tmp);
1224 
1225  /* NULL terminate the "argv" vector, in case of stupidity */
1226  vec_add1 (conf->eal_init_args, 0);
1227  _vec_len (conf->eal_init_args) -= 1;
1228 
1229  /* Set up DPDK eal and packet mbuf pool early. */
1230 
1231  rte_log_set_global_level (log_level);
1232 
1233  vm = vlib_get_main ();
1234 
1235  /* make copy of args as rte_eal_init tends to mess up with arg array */
1236  for (i = 1; i < vec_len (conf->eal_init_args); i++)
1237  conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
1238  conf->eal_init_args[i]);
1239 
1240  clib_warning ("EAL init args: %s", conf->eal_init_args_str);
1241  ret =
1242  rte_eal_init (vec_len (conf->eal_init_args),
1243  (char **) conf->eal_init_args);
1244 
1245  /* lazy umount hugepages */
1246  umount2 ((char *) huge_dir_path, MNT_DETACH);
1247  rmdir ((char *) huge_dir_path);
1248  vec_free (huge_dir_path);
1249 
1250  if (ret < 0)
1251  return clib_error_return (0, "rte_eal_init returned %d", ret);
1252 
1253  /* Dump the physical memory layout prior to creating the mbuf_pool */
1254  fprintf (stdout, "DPDK physical memory layout:\n");
1255  rte_dump_physmem_layout (stdout);
1256 
1257  /* set custom ring memory allocator */
1258  {
1259  struct rte_mempool_ops *ops = NULL;
1260 
1261  ops = get_ops_by_name ("ring_sp_sc");
1262  ops->alloc = dpdk_ring_alloc;
1263 
1264  ops = get_ops_by_name ("ring_mp_sc");
1265  ops->alloc = dpdk_ring_alloc;
1266 
1267  ops = get_ops_by_name ("ring_sp_mc");
1268  ops->alloc = dpdk_ring_alloc;
1269 
1270  ops = get_ops_by_name ("ring_mp_mc");
1271  ops->alloc = dpdk_ring_alloc;
1272  }
1273 
1274  /* main thread 1st */
1275  error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
1276  if (error)
1277  return error;
1278 
1279  for (i = 0; i < RTE_MAX_LCORE; i++)
1280  {
1281  error = dpdk_buffer_pool_create (vm, conf->num_mbufs,
1282  rte_lcore_to_socket_id (i));
1283  if (error)
1284  return error;
1285  }
1286 
1287 done:
1288  return error;
1289 }
1290 
1292 
1293 void
1295 {
1296  vnet_main_t *vnm = vnet_get_main ();
1297  struct rte_eth_link prev_link = xd->link;
1298  u32 hw_flags = 0;
1299  u8 hw_flags_chg = 0;
1300 
1301  /* only update link state for PMD interfaces */
1302  if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
1303  return;
1304 
1305  xd->time_last_link_update = now ? now : xd->time_last_link_update;
1306  memset (&xd->link, 0, sizeof (xd->link));
1307  rte_eth_link_get_nowait (xd->device_index, &xd->link);
1308 
1309  if (LINK_STATE_ELOGS)
1310  {
1312  ELOG_TYPE_DECLARE (e) =
1313  {
1314  .format =
1315  "update-link-state: sw_if_index %d, admin_up %d,"
1316  "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
1317 
1318  struct
1319  {
1320  u32 sw_if_index;
1321  u8 admin_up;
1322  u8 old_link_state;
1323  u8 new_link_state;
1324  } *ed;
1325  ed = ELOG_DATA (&vm->elog_main, e);
1326  ed->sw_if_index = xd->vlib_sw_if_index;
1327  ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
1328  ed->old_link_state = (u8)
1330  ed->new_link_state = (u8) xd->link.link_status;
1331  }
1332 
1334  && ((xd->link.link_status != 0) ^
1336  {
1337  hw_flags_chg = 1;
1338  hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
1339  }
1340 
1341  if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
1342  {
1343  hw_flags_chg = 1;
1344  switch (xd->link.link_duplex)
1345  {
1346  case ETH_LINK_HALF_DUPLEX:
1348  break;
1349  case ETH_LINK_FULL_DUPLEX:
1351  break;
1352  default:
1353  break;
1354  }
1355  }
1356  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
1357  {
1358  hw_flags_chg = 1;
1359  switch (xd->link.link_speed)
1360  {
1361  case ETH_SPEED_NUM_10M:
1363  break;
1364  case ETH_SPEED_NUM_100M:
1366  break;
1367  case ETH_SPEED_NUM_1G:
1368  hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
1369  break;
1370  case ETH_SPEED_NUM_10G:
1372  break;
1373  case ETH_SPEED_NUM_40G:
1375  break;
1376  case 0:
1377  break;
1378  default:
1379  clib_warning ("unknown link speed %d", xd->link.link_speed);
1380  break;
1381  }
1382  }
1383  if (hw_flags_chg)
1384  {
1385  if (LINK_STATE_ELOGS)
1386  {
1388 
1389  ELOG_TYPE_DECLARE (e) =
1390  {
1391  .format =
1392  "update-link-state: sw_if_index %d, new flags %d",.format_args
1393  = "i4i4",};
1394 
1395  struct
1396  {
1397  u32 sw_if_index;
1398  u32 flags;
1399  } *ed;
1400  ed = ELOG_DATA (&vm->elog_main, e);
1401  ed->sw_if_index = xd->vlib_sw_if_index;
1402  ed->flags = hw_flags;
1403  }
1404  vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
1405  }
1406 }
1407 
1408 static uword
1410 {
1411  clib_error_t *error;
1412  vnet_main_t *vnm = vnet_get_main ();
1413  dpdk_main_t *dm = &dpdk_main;
1415  dpdk_device_t *xd;
1417  int i;
1418 
1419  error = dpdk_lib_init (dm);
1420 
1421  if (error)
1422  clib_error_report (error);
1423 
1424  tm->worker_thread_release = 1;
1425 
1426  f64 now = vlib_time_now (vm);
1427  vec_foreach (xd, dm->devices)
1428  {
1429  dpdk_update_link_state (xd, now);
1430  }
1431 
1432  {
1433  /*
1434  * Extra set up for bond interfaces:
1435  * 1. Setup MACs for bond interfaces and their slave links which was set
1436  * in dpdk_device_setup() but needs to be done again here to take
1437  * effect.
1438  * 2. Set up info and register slave link state change callback handling.
1439  * 3. Set up info for bond interface related CLI support.
1440  */
1441  int nports = rte_eth_dev_count ();
1442  if (nports > 0)
1443  {
1444  for (i = 0; i < nports; i++)
1445  {
1446  xd = &dm->devices[i];
1447  ASSERT (i == xd->device_index);
1448  if (xd->pmd == VNET_DPDK_PMD_BOND)
1449  {
1450  u8 addr[6];
1451  dpdk_portid_t slink[16];
1452  int nlink = rte_eth_bond_slaves_get (i, slink, 16);
1453  if (nlink > 0)
1454  {
1455  vnet_hw_interface_t *bhi;
1456  ethernet_interface_t *bei;
1457  int rv;
1458 
1459  /* Get MAC of 1st slave link */
1460  rte_eth_macaddr_get
1461  (slink[0], (struct ether_addr *) addr);
1462 
1463  /* Set MAC of bounded interface to that of 1st slave link */
1464  clib_warning ("Set MAC for bond port %d BondEthernet%d",
1465  i, xd->port_id);
1466  rv = rte_eth_bond_mac_address_set
1467  (i, (struct ether_addr *) addr);
1468  if (rv)
1469  clib_warning ("Set MAC addr failure rv=%d", rv);
1470 
1471  /* Populate MAC of bonded interface in VPP hw tables */
1472  bhi = vnet_get_hw_interface
1473  (vnm, dm->devices[i].hw_if_index);
1474  bei = pool_elt_at_index
1475  (em->interfaces, bhi->hw_instance);
1476  clib_memcpy (bhi->hw_address, addr, 6);
1477  clib_memcpy (bei->address, addr, 6);
1478 
1479  /* Init l3 packet size allowed on bonded interface */
1484  while (nlink >= 1)
1485  { /* for all slave links */
1486  int slave = slink[--nlink];
1487  dpdk_device_t *sdev = &dm->devices[slave];
1488  vnet_hw_interface_t *shi;
1489  vnet_sw_interface_t *ssi;
1490  ethernet_interface_t *sei;
1491  /* Add MAC to all slave links except the first one */
1492  if (nlink)
1493  {
1494  clib_warning ("Add MAC for slave port %d", slave);
1495  rv = rte_eth_dev_mac_addr_add
1496  (slave, (struct ether_addr *) addr, 0);
1497  if (rv)
1498  clib_warning ("Add MAC addr failure rv=%d", rv);
1499  }
1500  /* Setup slave link state change callback handling */
1501  rte_eth_dev_callback_register
1502  (slave, RTE_ETH_EVENT_INTR_LSC,
1504  dpdk_device_t *sxd = &dm->devices[slave];
1506  sxd->bond_port = i;
1507  /* Set slaves bitmap for bonded interface */
1508  bhi->bond_info = clib_bitmap_set
1509  (bhi->bond_info, sdev->hw_if_index, 1);
1510  /* Set MACs and slave link flags on slave interface */
1511  shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
1512  ssi = vnet_get_sw_interface
1513  (vnm, sdev->vlib_sw_if_index);
1514  sei = pool_elt_at_index
1515  (em->interfaces, shi->hw_instance);
1518  clib_memcpy (shi->hw_address, addr, 6);
1519  clib_memcpy (sei->address, addr, 6);
1520  /* Set l3 packet size allowed as the lowest of slave */
1521  if (bhi->max_l3_packet_bytes[VLIB_RX] >
1526  /* Set max packet size allowed as the lowest of slave */
1527  if (bhi->max_packet_bytes > shi->max_packet_bytes)
1528  bhi->max_packet_bytes = shi->max_packet_bytes;
1529  }
1530  }
1531  }
1532  }
1533  }
1534  }
1535 
1536  while (1)
1537  {
1538  /*
1539  * check each time through the loop in case intervals are changed
1540  */
1541  f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
1543 
1544  vlib_process_wait_for_event_or_clock (vm, min_wait);
1545 
1546  if (dm->admin_up_down_in_progress)
1547  /* skip the poll if an admin up down is in progress (on any interface) */
1548  continue;
1549 
1550  vec_foreach (xd, dm->devices)
1551  {
1552  f64 now = vlib_time_now (vm);
1553  if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
1554  dpdk_update_counters (xd, now);
1555  if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
1556  dpdk_update_link_state (xd, now);
1557 
1558  }
1559  }
1560 
1561  return 0;
1562 }
1563 
1564 /* *INDENT-OFF* */
1566  .function = dpdk_process,
1567  .type = VLIB_NODE_TYPE_PROCESS,
1568  .name = "dpdk-process",
1569  .process_log2_n_stack_bytes = 17,
1570 };
1571 /* *INDENT-ON* */
1572 
1573 static clib_error_t *
1575 {
1576  dpdk_main_t *dm = &dpdk_main;
1577  clib_error_t *error = 0;
1579 
1580  /* verify that structs are cacheline aligned */
1581  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
1582  "Cache line marker must be 1st element in dpdk_device_t");
1583  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
1585  "Data in cache line 0 is bigger than cache line size");
1586  STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
1587  "Cache line marker must be 1st element in frame_queue_trace_t");
1588 
1589  dm->vlib_main = vm;
1590  dm->vnet_main = vnet_get_main ();
1591  dm->conf = &dpdk_config_main;
1592 
1593  dm->conf->nchannels = 4;
1594  dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
1595  vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
1596 
1597  vec_validate (dm->recycle, tm->n_thread_stacks - 1);
1598 
1599  /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
1600  dm->buffer_flags_template =
1602  | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
1603  VNET_BUFFER_F_L4_CHECKSUM_CORRECT | VNET_BUFFER_F_L2_HDR_OFFSET_VALID);
1604 
1607 
1608  /* init CLI */
1609  if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
1610  return error;
1611 
1612  return error;
1613 }
1614 
1616 
1617 
1618 /*
1619  * fd.io coding-style-patch-verification: ON
1620  *
1621  * Local Variables:
1622  * eval: (c-set-style "gnu")
1623  * End:
1624  */
u32 ** d_trace_buffers
Definition: dpdk.h:176
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:432
#define DPDK_DEVICE_FLAG_PROMISC
Definition: dpdk.h:183
static void dpdk_bind_devices_to_uio(dpdk_config_main_t *conf)
Definition: init.c:685
f64 time_last_link_update
Definition: dpdk.h:223
vmrglw vmrglh hi
static u8 * format_bitmap_hex(u8 *s, va_list *args)
Format a bitmap as a string of hex bytes.
Definition: bitmap.h:744
format_function_t format_vlib_pci_addr
Definition: pci.h:256
#define vec_foreach_index(var, v)
Iterate over vector indices.
#define hash_set(h, key, value)
Definition: hash.h:254
#define VIRTIO_PCI_MODERN_DEVICEID_NET
Definition: pci_config.h:169
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:337
#define clib_min(x, y)
Definition: clib.h:340
#define VNET_HW_INTERFACE_FLAG_SPEED_1G
Definition: interface.h:407
ethernet_main_t ethernet_main
Definition: init.c:45
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:538
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
Definition: node_funcs.h:699
u8 interface_name_format_decimal
Definition: dpdk.h:338
vnet_main_t * vnet_get_main(void)
Definition: misc.c:47
#define NB_MBUF
Definition: dpdk.h:57
vnet_device_class_t dpdk_device_class
#define DPDK_DEVICE_VLAN_STRIP_OFF
Definition: dpdk.h:305
#define DPDK_DEVICE_FLAG_TX_OFFLOAD
Definition: dpdk.h:191
#define NULL
Definition: clib.h:55
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:224
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:569
static uword * clib_bitmap_or(uword *ai, uword *bi)
Logical operator across two bitmaps.
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: init.c:81
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1294
vlib_pci_device_info_t * vlib_pci_get_device_info(vlib_pci_addr_t *addr, clib_error_t **error)
Definition: pci.c:156
u16 flags
Definition: dpdk.h:181
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define LINK_STATE_ELOGS
Definition: init.c:42
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:518
dpdk_device_and_queue_t ** devices_by_hqos_cpu
Definition: dpdk.h:354
#define DPDK_NB_RX_DESC_VIRTIO
Definition: dpdk_priv.h:21
clib_error_t * errors
Definition: dpdk.h:237
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:557
#define DPDK_DEVICE_FLAG_HQOS
Definition: dpdk.h:188
u32 per_interface_next_index
Definition: dpdk.h:169
u8 enable_tcp_udp_checksum
Definition: dpdk.h:324
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
#define DPDK_DEVICE_VLAN_STRIP_ON
Definition: dpdk.h:306
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
vlib_buffer_t * buffer_templates
Definition: dpdk.h:360
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
#define DPDK_NB_TX_DESC_DEFAULT
Definition: dpdk_priv.h:20
#define foreach_eal_double_hyphen_predicate_arg
Definition: dpdk_priv.h:32
unformat_function_t unformat_vlib_pci_addr
Definition: pci.h:255
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:394
int dpdk_port_state_callback(dpdk_portid_t port_id, enum rte_eth_event_type type, void *param, void *ret_param)
Definition: common.c:301
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:443
dpdk_device_config_hqos_t hqos
Definition: dpdk.h:313
vlib_pci_addr_t * vlib_pci_get_all_dev_addrs()
Definition: pci.c:1027
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:225
dpdk_config_main_t dpdk_config_main
Definition: init.c:40
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define DPDK_DEVICE_FLAG_PMD
Definition: dpdk.h:184
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM
Definition: dpdk.h:192
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:172
foreach_dpdk_device_config_item clib_bitmap_t * workers
Definition: dpdk.h:311
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:438
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:111
dpdk_portid_t port_id
Definition: dpdk.h:216
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:619
dpdk_device_config_t default_devconf
Definition: dpdk.h:341
f64 stat_poll_interval
Definition: dpdk.h:387
static char * vlib_unix_get_runtime_dir(void)
Definition: unix.h:138
static dpdk_port_type_t port_type_from_speed_capa(struct rte_eth_dev_info *dev_info)
Definition: init.c:60
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
Definition: buffer.h:97
int i32
Definition: types.h:81
clib_error_t * vlib_pci_bind_to_uio(vlib_pci_addr_t *addr, char *uio_drv_name)
Definition: pci.c:330
char i8
Definition: types.h:45
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
Definition: error.h:99
u16 rx_q_used
Definition: dpdk.h:204
#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES
Definition: buffer.h:435
#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG
Definition: dpdk.h:186
void dpdk_device_setup(dpdk_device_t *xd)
Definition: common.c:39
#define vlib_call_init_function(vm, x)
Definition: init.h:162
clib_error_t * unformat_rss_fn(unformat_input_t *input, uword *rss_fn)
Definition: format.c:757
#define DPDK_NB_TX_DESC_VIRTIO
Definition: dpdk_priv.h:22
struct rte_eth_conf port_conf
Definition: dpdk.h:208
static clib_error_t * dpdk_init(vlib_main_t *vm)
Definition: init.c:1574
#define fl(x, y)
f64 time_last_stats_update
Definition: dpdk.h:230
u32 vlib_sw_if_index
Definition: dpdk.h:166
struct rte_eth_txconf tx_conf
Definition: dpdk.h:209
#define hash_get(h, key)
Definition: hash.h:248
#define clib_bitmap_foreach(i, ai, body)
Macro to iterate across set bits in a bitmap.
Definition: bitmap.h:361
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:459
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
Definition: vec.h:682
vlib_pci_addr_t pci_addr
Definition: dpdk.h:301
clib_error_t * dpdk_buffer_pool_create(vlib_main_t *vm, unsigned num_mbufs, unsigned socket_id)
Definition: buffer.c:514
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
Definition: init.c:883
#define foreach_eal_double_hyphen_arg
Definition: dpdk_priv.h:48
dpdk_portid_t bond_port
Definition: dpdk.h:220
#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)
Definition: ethernet.h:125
u8 ** eal_init_args
Definition: dpdk.h:320
#define VNET_HW_INTERFACE_FLAG_SPEED_10M
Definition: interface.h:405
#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE
Definition: interface.h:585
#define foreach_eal_single_hyphen_mandatory_arg
Definition: dpdk_priv.h:38
struct _unformat_input_t unformat_input_t
#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX
Definition: interface.h:397
#define foreach_dpdk_pmd
Definition: dpdk.h:62
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:434
#define ELOG_DATA(em, f)
Definition: elog.h:481
#define VIRTIO_PCI_LEGACY_DEVICEID_NET
Definition: pci_config.h:168
dpdk_port_type_t port_type
Definition: dpdk.h:231
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:119
#define VLIB_FRAME_SIZE
Definition: node.h:328
u16 tx_q_used
Definition: dpdk.h:203
u16 nb_rx_desc
Definition: dpdk.h:205
uint16_t dpdk_portid_t
Definition: dpdk.h:125
u32 hw_if_index
Definition: dpdk.h:165
void unformat_init_vector(unformat_input_t *input, u8 *vector_string)
Definition: unformat.c:1031
#define DPDK_DEVICE_FLAG_ADMIN_UP
Definition: dpdk.h:182
u32 ** recycle
Definition: dpdk.h:357
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE
Definition: interface.h:486
#define foreach_eal_single_hyphen_arg
Definition: dpdk_priv.h:42
#define DPDK_NB_RX_DESC_DEFAULT
Definition: dpdk_priv.h:19
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
svmdb_client_t * c
static clib_error_t * dpdk_device_config(dpdk_config_main_t *conf, vlib_pci_addr_t pci_addr, unformat_input_t *input, u8 is_default)
Definition: init.c:783
dpdk_device_t * devices
Definition: dpdk.h:353
vlib_main_t * vm
Definition: buffer.c:283
u32 vlib_buffer_get_or_create_free_list(vlib_main_t *vm, u32 n_data_bytes, char *fmt,...)
Definition: buffer.c:438
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:86
u8 nchannels_set_manually
Definition: dpdk.h:329
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:336
volatile u32 ** lockp
Definition: dpdk.h:160
dpdk_device_config_t * dev_confs
Definition: dpdk.h:342
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:173
#define clib_warning(format, args...)
Definition: error.h:59
#define clib_memcpy(a, b, c)
Definition: string.h:75
dpdk_pmd_t pmd
Definition: dpdk.h:178
format_function_t format_dpdk_device_errors
Definition: dpdk.h:456
elog_main_t elog_main
Definition: main.h:155
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
Definition: ethernet.h:119
#define VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD
Definition: interface.h:423
u8 coremask_set_manually
Definition: dpdk.h:328
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:439
#define VNET_HW_INTERFACE_FLAG_SPEED_10G
Definition: interface.h:408
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:412
static void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:113
u8 * interface_name_suffix
Definition: dpdk.h:197
#define hash_create(elts, value_bytes)
Definition: hash.h:681
#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
Definition: interface.h:398
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:472
#define ASSERT(truth)
void dpdk_device_config_hqos_default(dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:205
format_function_t format_dpdk_device_name
Definition: dpdk.h:454
unsigned int u32
Definition: types.h:88
int hqos_cpu_count
Definition: dpdk.h:383
void vnet_hw_interface_assign_rx_thread(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id, uword thread_index)
Definition: devices.c:138
u32 poll_sleep_usec
Definition: dpdk.h:390
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: init.c:1409
Bitmaps built as vectors of machine words.
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:273
#define clib_error_report(e)
Definition: error.h:113
#define clib_bitmap_free(v)
Free a bitmap.
Definition: bitmap.h:92
#define DPDK_LINK_POLL_INTERVAL
Definition: dpdk.h:243
size_t count
Definition: vapi.c:42
dpdk_main_t dpdk_main
Definition: init.c:39
uword * thread_registrations_by_name
Definition: threads.h:297
clib_error_t * dpdk_cli_init(vlib_main_t *vm)
Definition: cli.c:1942
dpdk_portid_t device_index
Definition: dpdk.h:163
struct rte_eth_link link
Definition: dpdk.h:222
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
clib_error_t * dpdk_port_setup_hqos(dpdk_device_t *xd, dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:247
u64 uword
Definition: types.h:112
dpdk_port_type_t
Definition: dpdk.h:94
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
Definition: bitmap.h:441
Definition: defs.h:47
clib_error_t * vlib_unix_recursive_mkdir(char *path)
Definition: util.c:102
#define DPDK_STATS_POLL_INTERVAL
Definition: dpdk.h:240
unsigned short u16
Definition: types.h:57
#define VNET_HW_INTERFACE_FLAG_SPEED_100M
Definition: interface.h:406
static vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
Definition: init.c:1565
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
uword unformat_vlib_cli_sub_input(unformat_input_t *i, va_list *args)
Definition: cli.c:152
u8 admin_up_down_in_progress
Definition: dpdk.h:379
#define STATIC_ASSERT(truth,...)
static struct rte_eth_conf port_conf_template
Definition: init.c:46
u8 no_tx_checksum_offload
Definition: dpdk.h:325
static uword unformat_bitmap_list(unformat_input_t *input, va_list *va)
unformat a list of bit ranges into a bitmap (eg "0-3,5-7,11" )
Definition: bitmap.h:693
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
#define DPDK_DEVICE_FLAG_BOND_SLAVE
Definition: dpdk.h:189
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
Definition: init.c:180
static struct rte_mempool_ops * get_ops_by_name(i8 *ops_name)
Definition: init.c:126
#define hash_get_mem(h, key)
Definition: hash.h:268
u32 buffer_flags_template
Definition: dpdk.h:363
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:120
#define VLIB_BUFFER_EXT_HDR_VALID
Definition: buffer.h:101
static void vlib_buffer_init_for_free_list(vlib_buffer_t *dst, vlib_buffer_free_list_t *fl)
Definition: buffer_funcs.h:809
#define vnet_buffer(b)
Definition: buffer.h:326
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VNET_HW_INTERFACE_FLAG_SPEED_40G
Definition: interface.h:409
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
u32 vlib_buffer_free_list_index
Definition: dpdk.h:366
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
int hqos_cpu_first_index
Definition: dpdk.h:382
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
#define ETHERNET_MAX_PACKET_BYTES
Definition: ethernet.h:112
#define vec_foreach(var, vec)
Vector iterator.
i8 cpu_socket
Definition: dpdk.h:179
#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)
Definition: ethernet.h:120
uword * cpu_socket_bitmap
Definition: threads.h:332
static int dpdk_ring_alloc(struct rte_mempool *mp)
Definition: init.c:140
vhost_vring_addr_t addr
Definition: vhost-user.h:83
static vlib_buffer_free_list_t * vlib_buffer_get_free_list(vlib_main_t *vm, u32 free_list_index)
Definition: buffer_funcs.h:451
u8 * uio_driver_name
Definition: dpdk.h:322
vlib_thread_registration_t ** registrations
Definition: threads.h:295
static void vlib_pci_free_device_info(vlib_pci_device_info_t *di)
Definition: pci.h:104
u32 flags
Definition: vhost-user.h:77
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
unformat_function_t unformat_dpdk_log_level
Definition: dpdk.h:461
ethernet_interface_t * interfaces
Definition: ethernet.h:250
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:75
vnet_main_t * vnet_main
Definition: dpdk.h:394
u16 nb_tx_desc
Definition: dpdk.h:194
clib_error_t * unformat_hqos(unformat_input_t *input, dpdk_device_config_hqos_t *hqos)
Definition: format.c:794
u16 device_class
Definition: pci.h:69
uword * device_config_index_by_pci_addr
Definition: dpdk.h:343
uword unformat_skip_white_space(unformat_input_t *input)
Definition: unformat.c:815
static uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
volatile u32 worker_thread_release
Definition: threads.h:338
static void vnet_hw_interface_set_input_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: devices.h:79
vnet_device_main_t vnet_device_main
Definition: devices.c:22
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
Definition: defs.h:46
f64 link_state_poll_interval
Definition: dpdk.h:386
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
dpdk_config_main_t * conf
Definition: dpdk.h:395
vlib_main_t * vlib_main
Definition: dpdk.h:393