FD.io VPP  v17.10-9-gd594711
Vector Packet Processing
init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/bitmap.h>
20 #include <vppinfra/linux/sysfs.h>
21 #include <vlib/unix/unix.h>
22 
23 #include <vnet/ethernet/ethernet.h>
24 #include <dpdk/device/dpdk.h>
25 #include <vlib/pci/pci.h>
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31 #include <sys/mount.h>
32 #include <string.h>
33 #include <fcntl.h>
34 
35 #include <dpdk/device/dpdk_priv.h>
36 
38 
39 #define LINK_STATE_ELOGS 0
40 
41 /* Port configuration, mildly modified Intel app values */
42 
43 static struct rte_eth_conf port_conf_template = {
44  .rxmode = {
45  .split_hdr_size = 0,
46  .header_split = 0, /**< Header Split disabled */
47  .hw_ip_checksum = 0, /**< IP checksum offload disabled */
48  .hw_vlan_filter = 0, /**< VLAN filtering disabled */
49  .hw_strip_crc = 0, /**< CRC stripped by hardware */
50  },
51  .txmode = {
52  .mq_mode = ETH_MQ_TX_NONE,
53  },
54 };
55 
56 static dpdk_port_type_t
57 port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
58 {
59 
60  if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
62  else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
64  else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
66  else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
68  else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
70  else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
72 
74 }
75 
76 
77 static u32
79 {
80  dpdk_main_t *dm = &dpdk_main;
82  u32 old = 0;
83 
85  {
86  old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
87 
90  else
92 
94  {
96  rte_eth_promiscuous_enable (xd->device_index);
97  else
98  rte_eth_promiscuous_disable (xd->device_index);
99  }
100  }
101  else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags))
102  {
103  int rv;
104 
105  xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
106 
108  dpdk_device_stop (xd);
109 
110  rv = rte_eth_dev_configure
111  (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf);
112 
113  if (rv < 0)
115  "rte_eth_dev_configure[%d]: err %d",
116  xd->device_index, rv);
117 
118  rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
119 
121  dpdk_device_start (xd);
122 
123  }
124  return old;
125 }
126 
127 static void
129 {
130  int q;
131  vec_validate (xd->lockp, xd->tx_q_used - 1);
132  for (q = 0; q < xd->tx_q_used; q++)
133  {
136  memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
137  }
138 }
139 
140 static clib_error_t *
142 {
143  u32 nports;
144  u32 nb_desc = 0;
145  int i;
146  clib_error_t *error;
152  dpdk_device_t *xd;
153  vlib_pci_addr_t last_pci_addr;
154  u32 last_pci_addr_port = 0;
156  uword *p_hqos;
157 
158  u32 next_hqos_cpu = 0;
159  u8 af_packet_port_id = 0;
160  u8 bond_ether_port_id = 0;
161  last_pci_addr.as_u32 = ~0;
162 
163  dm->hqos_cpu_first_index = 0;
164  dm->hqos_cpu_count = 0;
165 
166  /* find out which cpus will be used for I/O TX */
167  p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
168  tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
169 
170  if (tr_hqos && tr_hqos->count > 0)
171  {
172  dm->hqos_cpu_first_index = tr_hqos->first_index;
173  dm->hqos_cpu_count = tr_hqos->count;
174  }
175 
178 
179  nports = rte_eth_dev_count ();
180  if (nports < 1)
181  {
182  clib_warning ("DPDK drivers found no ports...");
183  }
184 
185  if (CLIB_DEBUG > 0)
186  clib_warning ("DPDK drivers found %d ports...", nports);
187 
188  /*
189  * All buffers are all allocated from the same rte_mempool.
190  * Thus they all have the same number of data bytes.
191  */
195  "dpdk rx");
196 
197  if (dm->conf->enable_tcp_udp_checksum)
198  dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
199  | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
200 
201  /* vlib_buffer_t template */
204  for (i = 0; i < tm->n_vlib_mains; i++)
205  {
208  fl = vlib_buffer_get_free_list (vm,
211  bt->flags = dm->buffer_flags_template;
212  bt->current_data = -RTE_PKTMBUF_HEADROOM;
213  vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
214  }
215 
216  for (i = 0; i < nports; i++)
217  {
218  u8 addr[6];
219  u8 vlan_strip = 0;
220  int j;
221  struct rte_eth_dev_info dev_info;
222  struct rte_eth_link l;
223  dpdk_device_config_t *devconf = 0;
224  vlib_pci_addr_t pci_addr;
225  uword *p = 0;
226 
227  rte_eth_dev_info_get (i, &dev_info);
228  if (dev_info.pci_dev) /* bonded interface has no pci info */
229  {
230  pci_addr.domain = dev_info.pci_dev->addr.domain;
231  pci_addr.bus = dev_info.pci_dev->addr.bus;
232  pci_addr.slot = dev_info.pci_dev->addr.devid;
233  pci_addr.function = dev_info.pci_dev->addr.function;
234  p =
236  pci_addr.as_u32);
237  }
238 
239  if (p)
240  devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
241  else
242  devconf = &dm->conf->default_devconf;
243 
244  /* Create vnet interface */
248  xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
249 
250  /* Handle interface naming for devices with multiple ports sharing same PCI ID */
251  if (dev_info.pci_dev)
252  {
253  struct rte_eth_dev_info di = { 0 };
254  rte_eth_dev_info_get (i + 1, &di);
255  if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
256  memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
257  sizeof (struct rte_pci_addr)) == 0)
258  {
259  xd->interface_name_suffix = format (0, "0");
260  last_pci_addr.as_u32 = pci_addr.as_u32;
261  last_pci_addr_port = i;
262  }
263  else if (pci_addr.as_u32 == last_pci_addr.as_u32)
264  {
266  format (0, "%u", i - last_pci_addr_port);
267  }
268  else
269  {
270  last_pci_addr.as_u32 = ~0;
271  }
272  }
273  else
274  last_pci_addr.as_u32 = ~0;
275 
276  clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
277  sizeof (struct rte_eth_txconf));
278  if (dm->conf->no_multi_seg)
279  {
280  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
281  port_conf_template.rxmode.jumbo_frame = 0;
282  port_conf_template.rxmode.enable_scatter = 0;
283  }
284  else
285  {
286  xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
287  port_conf_template.rxmode.jumbo_frame = 1;
288  port_conf_template.rxmode.enable_scatter = 1;
290  }
291 
293  sizeof (struct rte_eth_conf));
294 
295  xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
296 
297  if (devconf->num_tx_queues > 0
298  && devconf->num_tx_queues < xd->tx_q_used)
299  xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
300 
301  if (devconf->num_rx_queues > 1 && dm->use_rss == 0)
302  {
303  dm->use_rss = 1;
304  }
305 
306  if (devconf->num_rx_queues > 1
307  && dev_info.max_rx_queues >= devconf->num_rx_queues)
308  {
309  xd->rx_q_used = devconf->num_rx_queues;
310  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
311  if (devconf->rss_fn == 0)
312  xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
313  ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
314  else
315  xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
316  }
317  else
318  xd->rx_q_used = 1;
319 
321 
322  /* workaround for drivers not setting driver_name */
323  if ((!dev_info.driver_name) && (dev_info.pci_dev))
324  dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
325 
326  ASSERT (dev_info.driver_name);
327 
328  if (!xd->pmd)
329  {
330 
331 
332 #define _(s,f) else if (dev_info.driver_name && \
333  !strcmp(dev_info.driver_name, s)) \
334  xd->pmd = VNET_DPDK_PMD_##f;
335  if (0)
336  ;
338 #undef _
339  else
341 
345 
346  switch (xd->pmd)
347  {
348  /* Drivers with valid speed_capa set */
349  case VNET_DPDK_PMD_E1000EM:
350  case VNET_DPDK_PMD_IGB:
351  case VNET_DPDK_PMD_IXGBE:
352  case VNET_DPDK_PMD_I40E:
353  xd->port_type = port_type_from_speed_capa (&dev_info);
356 
357  break;
358  case VNET_DPDK_PMD_CXGBE:
359  case VNET_DPDK_PMD_MLX4:
360  case VNET_DPDK_PMD_MLX5:
361  xd->port_type = port_type_from_speed_capa (&dev_info);
362  break;
363 
364  /* SR-IOV VFs */
365  case VNET_DPDK_PMD_IGBVF:
366  case VNET_DPDK_PMD_IXGBEVF:
367  case VNET_DPDK_PMD_I40EVF:
369  xd->port_conf.rxmode.hw_strip_crc = 1;
370  break;
371 
372  case VNET_DPDK_PMD_THUNDERX:
374  xd->port_conf.rxmode.hw_strip_crc = 1;
375  break;
376 
377  case VNET_DPDK_PMD_DPAA2:
379  break;
380 
381  /* Cisco VIC */
382  case VNET_DPDK_PMD_ENIC:
383  rte_eth_link_get_nowait (i, &l);
384  if (l.link_speed == 40000)
386  else
388  break;
389 
390  /* Intel Red Rock Canyon */
391  case VNET_DPDK_PMD_FM10K:
393  xd->port_conf.rxmode.hw_strip_crc = 1;
394  break;
395 
396  /* virtio */
397  case VNET_DPDK_PMD_VIRTIO:
401  break;
402 
403  /* vmxnet3 */
404  case VNET_DPDK_PMD_VMXNET3:
406  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
407  break;
408 
409  case VNET_DPDK_PMD_AF_PACKET:
411  xd->port_id = af_packet_port_id++;
412  break;
413 
414  case VNET_DPDK_PMD_BOND:
416  xd->port_id = bond_ether_port_id++;
417  break;
418 
419  case VNET_DPDK_PMD_VIRTIO_USER:
421  break;
422 
423  case VNET_DPDK_PMD_VHOST_ETHER:
425  break;
426 
427  default:
429  }
430 
431  if (devconf->num_rx_desc)
432  xd->nb_rx_desc = devconf->num_rx_desc;
433 
434  if (devconf->num_tx_desc)
435  xd->nb_tx_desc = devconf->num_tx_desc;
436  }
437 
438  /*
439  * Ensure default mtu is not > the mtu read from the hardware.
440  * Otherwise rte_eth_dev_configure() will fail and the port will
441  * not be available.
442  */
443  if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
444  {
445  /*
446  * This device does not support the platforms's max frame
447  * size. Use it's advertised mru instead.
448  */
449  xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
450  }
451  else
452  {
453  xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
454 
455  /*
456  * Some platforms do not account for Ethernet FCS (4 bytes) in
457  * MTU calculations. To interop with them increase mru but only
458  * if the device's settings can support it.
459  */
460  if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
461  xd->port_conf.rxmode.hw_strip_crc)
462  {
463  /*
464  * Allow additional 4 bytes (for Ethernet FCS). These bytes are
465  * stripped by h/w and so will not consume any buffer memory.
466  */
467  xd->port_conf.rxmode.max_rx_pkt_len += 4;
468  }
469  }
470 
471  if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
472  {
473  f64 now = vlib_time_now (vm);
474  u32 rnd;
475  rnd = (u32) (now * 1e6);
476  rnd = random_u32 (&rnd);
477  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
478  addr[0] = 2;
479  addr[1] = 0xfe;
480  }
481  else
482  rte_eth_macaddr_get (i, (struct ether_addr *) addr);
483 
484  if (xd->tx_q_used < tm->n_vlib_mains)
486 
487  xd->device_index = xd - dm->devices;
488  ASSERT (i == xd->device_index);
489  xd->per_interface_next_index = ~0;
490 
491  /* assign interface to input thread */
493  int q;
494 
495  if (devconf->hqos_enabled)
496  {
498 
499  if (devconf->hqos.hqos_thread_valid)
500  {
501  int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
502 
503  if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
504  return clib_error_return (0, "invalid HQoS thread index");
505 
506  vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
507  dq->device = xd->device_index;
508  dq->queue_id = 0;
509  }
510  else
511  {
512  int cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
513 
514  if (dm->hqos_cpu_count == 0)
515  return clib_error_return (0, "no HQoS threads available");
516 
517  vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
518  dq->device = xd->device_index;
519  dq->queue_id = 0;
520 
521  next_hqos_cpu++;
522  if (next_hqos_cpu == dm->hqos_cpu_count)
523  next_hqos_cpu = 0;
524 
525  devconf->hqos.hqos_thread_valid = 1;
526  devconf->hqos.hqos_thread = cpu;
527  }
528  }
529 
532  for (j = 0; j < tm->n_vlib_mains; j++)
533  {
536  vec_reset_length (xd->tx_vectors[j]);
537  }
538 
541  for (j = 0; j < xd->rx_q_used; j++)
542  {
545  vec_reset_length (xd->rx_vectors[j]);
546  }
547 
550 
551 
552  /* count the number of descriptors used for this device */
553  nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
554 
556  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
557  /* ethernet address */ addr,
559  if (error)
560  return error;
561 
563  xd->vlib_sw_if_index = sw->sw_if_index;
565  dpdk_input_node.index);
566 
567  if (devconf->workers)
568  {
569  int i;
570  q = 0;
571  /* *INDENT-OFF* */
572  clib_bitmap_foreach (i, devconf->workers, ({
573  vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++,
574  vdm->first_worker_thread_index + i);
575  }));
576  /* *INDENT-ON* */
577  }
578  else
579  for (q = 0; q < xd->rx_q_used; q++)
580  {
582  ~1);
583  }
584 
586 
589 
590  dpdk_device_setup (xd);
591 
592  if (vec_len (xd->errors))
593  clib_warning ("setup failed for device %U. Errors:\n %U",
596 
597  if (devconf->hqos_enabled)
598  {
599  clib_error_t *rv;
600  rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
601  if (rv)
602  return rv;
603  }
604 
605  /*
606  * For cisco VIC vNIC, set default to VLAN strip enabled, unless
607  * specified otherwise in the startup config.
608  * For other NICs default to VLAN strip disabled, unless specified
609  * otherwis in the startup config.
610  */
611  if (xd->pmd == VNET_DPDK_PMD_ENIC)
612  {
613  if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
614  vlan_strip = 1; /* remove vlan tag from VIC port by default */
615  else
616  clib_warning ("VLAN strip disabled for interface\n");
617  }
618  else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
619  vlan_strip = 1;
620 
621  if (vlan_strip)
622  {
623  int vlan_off;
624  vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
625  vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
626  xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
627  if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
628  clib_warning ("VLAN strip enabled for interface\n");
629  else
630  clib_warning ("VLAN strip cannot be supported by interface\n");
631  }
632 
634  xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
635 
636  rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
637  }
638 
639  if (nb_desc > dm->conf->num_mbufs)
640  clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
641  dm->conf->num_mbufs, nb_desc);
642 
643  return 0;
644 }
645 
646 static void
648 {
649  vlib_pci_main_t *pm = &pci_main;
650  clib_error_t *error;
652  u8 *pci_addr = 0;
653  int num_whitelisted = vec_len (conf->dev_confs);
654 
655  /* *INDENT-OFF* */
656  pool_foreach (d, pm->pci_devs, ({
657  dpdk_device_config_t * devconf = 0;
658  vec_reset_length (pci_addr);
659  pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
660 
661  if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
662  continue;
663 
664  if (num_whitelisted)
665  {
666  uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32);
667 
668  if (!p)
669  continue;
670 
671  devconf = pool_elt_at_index (conf->dev_confs, p[0]);
672  }
673 
674  /* virtio */
675  if (d->vendor_id == 0x1af4 && d->device_id == 0x1000)
676  ;
677  /* vmxnet3 */
678  else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
679  ;
680  /* all Intel network devices */
681  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
682  ;
683  /* all Intel QAT devices VFs */
684  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
685  (d->device_id == 0x0443 || d->device_id == 0x37c9 || d->device_id == 0x19e3))
686  ;
687  /* Cisco VIC */
688  else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
689  ;
690  /* Chelsio T4/T5 */
691  else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
692  ;
693  /* Mellanox */
694  else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a)
695  {
696  continue;
697  }
698  else
699  {
700  clib_warning ("Unsupported PCI device 0x%04x:0x%04x found "
701  "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
702  pci_addr);
703  continue;
704  }
705 
706  error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name);
707 
708  if (error)
709  {
710  if (devconf == 0)
711  {
712  pool_get (conf->dev_confs, devconf);
714  devconf - conf->dev_confs);
715  devconf->pci_addr.as_u32 = d->bus_address.as_u32;
716  }
717  devconf->is_blacklisted = 1;
718  clib_error_report (error);
719  }
720  }));
721  /* *INDENT-ON* */
722  vec_free (pci_addr);
723 }
724 
725 static clib_error_t *
726 dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
727  unformat_input_t * input, u8 is_default)
728 {
729  clib_error_t *error = 0;
730  uword *p;
731  dpdk_device_config_t *devconf;
732  unformat_input_t sub_input;
733 
734  if (is_default)
735  {
736  devconf = &conf->default_devconf;
737  }
738  else
739  {
740  p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32);
741 
742  if (!p)
743  {
744  pool_get (conf->dev_confs, devconf);
745  hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32,
746  devconf - conf->dev_confs);
747  }
748  else
749  return clib_error_return (0,
750  "duplicate configuration for PCI address %U",
751  format_vlib_pci_addr, &pci_addr);
752  }
753 
754  devconf->pci_addr.as_u32 = pci_addr.as_u32;
755  devconf->hqos_enabled = 0;
757 
758  if (!input)
759  return 0;
760 
763  {
764  if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
765  ;
766  else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
767  ;
768  else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
769  ;
770  else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
771  ;
772  else if (unformat (input, "workers %U", unformat_bitmap_list,
773  &devconf->workers))
774  ;
775  else
776  if (unformat
777  (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
778  {
779  error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
780  if (error)
781  break;
782  }
783  else if (unformat (input, "vlan-strip-offload off"))
785  else if (unformat (input, "vlan-strip-offload on"))
787  else
788  if (unformat
789  (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
790  {
791  devconf->hqos_enabled = 1;
792  error = unformat_hqos (&sub_input, &devconf->hqos);
793  if (error)
794  break;
795  }
796  else if (unformat (input, "hqos"))
797  {
798  devconf->hqos_enabled = 1;
799  }
800  else
801  {
802  error = clib_error_return (0, "unknown input `%U'",
803  format_unformat_error, input);
804  break;
805  }
806  }
807 
808  if (error)
809  return error;
810 
811  if (devconf->workers && devconf->num_rx_queues == 0)
812  devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
813  else if (devconf->workers &&
814  clib_bitmap_count_set_bits (devconf->workers) !=
815  devconf->num_rx_queues)
816  error =
818  "%U: number of worker threadds must be "
819  "equal to number of rx queues", format_vlib_pci_addr,
820  &pci_addr);
821 
822  return error;
823 }
824 
825 static clib_error_t *
827 {
828  clib_error_t *error = 0;
829  dpdk_main_t *dm = &dpdk_main;
832  dpdk_device_config_t *devconf;
833  vlib_pci_addr_t pci_addr;
834  unformat_input_t sub_input;
835  uword x;
836  u8 *s, *tmp = 0;
837  u8 *rte_cmd = 0, *ethname = 0;
838  u32 log_level;
839  int ret, i;
840  int num_whitelisted = 0;
841  u8 no_pci = 0;
842  u8 no_huge = 0;
843  u8 huge_dir = 0;
844  u8 file_prefix = 0;
845  u8 *socket_mem = 0;
846  u8 *huge_dir_path = 0;
847 
848  huge_dir_path =
849  format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
850 
851  conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
852  log_level = RTE_LOG_NOTICE;
853 
855  {
856  /* Prime the pump */
857  if (unformat (input, "no-hugetlb"))
858  {
859  vec_add1 (conf->eal_init_args, (u8 *) "no-huge");
860  no_huge = 1;
861  }
862 
863  else if (unformat (input, "enable-tcp-udp-checksum"))
864  conf->enable_tcp_udp_checksum = 1;
865 
866  else if (unformat (input, "decimal-interface-names"))
868 
869  else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x))
870  log_level = x;
871 
872  else if (unformat (input, "no-multi-seg"))
873  conf->no_multi_seg = 1;
874 
875  else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
876  &sub_input))
877  {
878  error =
879  dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input,
880  1);
881 
882  if (error)
883  return error;
884  }
885  else
886  if (unformat
887  (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
888  unformat_vlib_cli_sub_input, &sub_input))
889  {
890  error = dpdk_device_config (conf, pci_addr, &sub_input, 0);
891 
892  if (error)
893  return error;
894 
895  num_whitelisted++;
896  }
897  else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
898  {
899  error = dpdk_device_config (conf, pci_addr, 0, 0);
900 
901  if (error)
902  return error;
903 
904  num_whitelisted++;
905  }
906  else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
907  ;
908  else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
909  ;
910  else if (unformat (input, "socket-mem %s", &socket_mem))
911  ;
912  else if (unformat (input, "no-pci"))
913  {
914  no_pci = 1;
915  tmp = format (0, "--no-pci%c", 0);
916  vec_add1 (conf->eal_init_args, tmp);
917  }
918  else if (unformat (input, "poll-sleep %d", &dm->poll_sleep_usec))
919  ;
920 
921 #define _(a) \
922  else if (unformat(input, #a)) \
923  { \
924  tmp = format (0, "--%s%c", #a, 0); \
925  vec_add1 (conf->eal_init_args, tmp); \
926  }
928 #undef _
929 #define _(a) \
930  else if (unformat(input, #a " %s", &s)) \
931  { \
932  if (!strncmp(#a, "huge-dir", 8)) \
933  huge_dir = 1; \
934  else if (!strncmp(#a, "file-prefix", 11)) \
935  file_prefix = 1; \
936  tmp = format (0, "--%s%c", #a, 0); \
937  vec_add1 (conf->eal_init_args, tmp); \
938  vec_add1 (s, 0); \
939  if (!strncmp(#a, "vdev", 4)) \
940  if (strstr((char*)s, "af_packet")) \
941  clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \
942  vec_add1 (conf->eal_init_args, s); \
943  }
945 #undef _
946 #define _(a,b) \
947  else if (unformat(input, #a " %s", &s)) \
948  { \
949  tmp = format (0, "-%s%c", #b, 0); \
950  vec_add1 (conf->eal_init_args, tmp); \
951  vec_add1 (s, 0); \
952  vec_add1 (conf->eal_init_args, s); \
953  }
955 #undef _
956 #define _(a,b) \
957  else if (unformat(input, #a " %s", &s)) \
958  { \
959  tmp = format (0, "-%s%c", #b, 0); \
960  vec_add1 (conf->eal_init_args, tmp); \
961  vec_add1 (s, 0); \
962  vec_add1 (conf->eal_init_args, s); \
963  conf->a##_set_manually = 1; \
964  }
966 #undef _
967  else if (unformat (input, "default"))
968  ;
969 
970  else if (unformat_skip_white_space (input))
971  ;
972  else
973  {
974  error = clib_error_return (0, "unknown input `%U'",
975  format_unformat_error, input);
976  goto done;
977  }
978  }
979 
980  if (!conf->uio_driver_name)
981  conf->uio_driver_name = format (0, "uio_pci_generic%c", 0);
982 
983  /*
984  * Use 1G huge pages if available.
985  */
986  if (!no_huge && !huge_dir)
987  {
988  u32 x, *mem_by_socket = 0;
989  uword c = 0;
990  u8 use_1g = 1;
991  u8 use_2m = 1;
992  u8 less_than_1g = 1;
993  int rv;
994 
995  umount ((char *) huge_dir_path);
996 
997  /* Process "socket-mem" parameter value */
998  if (vec_len (socket_mem))
999  {
1000  unformat_input_t in;
1001  unformat_init_vector (&in, socket_mem);
1003  {
1004  if (unformat (&in, "%u,", &x))
1005  ;
1006  else if (unformat (&in, "%u", &x))
1007  ;
1008  else if (unformat (&in, ","))
1009  x = 0;
1010  else
1011  break;
1012 
1013  vec_add1 (mem_by_socket, x);
1014 
1015  if (x > 1023)
1016  less_than_1g = 0;
1017  }
1018  /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
1019  unformat_free (&in);
1020  socket_mem = 0;
1021  }
1022  else
1023  {
1024  /* *INDENT-OFF* */
1026  {
1027  vec_validate(mem_by_socket, c);
1028  mem_by_socket[c] = 256; /* default per-socket mem */
1029  }
1030  ));
1031  /* *INDENT-ON* */
1032  }
1033 
1034  /* check if available enough 1GB pages for each socket */
1035  /* *INDENT-OFF* */
1037  {
1038  int pages_avail, page_size, mem;
1039  clib_error_t *e = 0;
1040 
1041  vec_validate(mem_by_socket, c);
1042  mem = mem_by_socket[c];
1043 
1044  page_size = 1024;
1045  e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
1046 
1047  if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
1048  use_1g = 0;
1049 
1050  if (e)
1051  clib_error_free (e);
1052 
1053  page_size = 2;
1054  e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
1055 
1056  if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
1057  use_2m = 0;
1058 
1059  if (e)
1060  clib_error_free (e);
1061  }));
1062  /* *INDENT-ON* */
1063 
1064  if (mem_by_socket == 0)
1065  {
1066  error = clib_error_return (0, "mem_by_socket NULL");
1067  goto done;
1068  }
1069  _vec_len (mem_by_socket) = c + 1;
1070 
1071  /* regenerate socket_mem string */
1072  vec_foreach_index (x, mem_by_socket)
1073  socket_mem = format (socket_mem, "%s%u",
1074  socket_mem ? "," : "", mem_by_socket[x]);
1075  socket_mem = format (socket_mem, "%c", 0);
1076 
1077  vec_free (mem_by_socket);
1078 
1079  error = vlib_unix_recursive_mkdir ((char *) huge_dir_path);
1080  if (error)
1081  {
1082  goto done;
1083  }
1084 
1085  if (use_1g && !(less_than_1g && use_2m))
1086  {
1087  rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0,
1088  "pagesize=1G");
1089  }
1090  else if (use_2m)
1091  {
1092  rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
1093  }
1094  else
1095  {
1096  return clib_error_return (0, "not enough free huge pages");
1097  }
1098 
1099  if (rv)
1100  {
1101  error = clib_error_return (0, "mount failed %d", errno);
1102  goto done;
1103  }
1104 
1105  tmp = format (0, "--huge-dir%c", 0);
1106  vec_add1 (conf->eal_init_args, tmp);
1107  tmp = format (0, "%s%c", huge_dir_path, 0);
1108  vec_add1 (conf->eal_init_args, tmp);
1109  if (!file_prefix)
1110  {
1111  tmp = format (0, "--file-prefix%c", 0);
1112  vec_add1 (conf->eal_init_args, tmp);
1113  tmp = format (0, "vpp%c", 0);
1114  vec_add1 (conf->eal_init_args, tmp);
1115  }
1116  }
1117 
1118  vec_free (rte_cmd);
1119  vec_free (ethname);
1120 
1121  if (error)
1122  return error;
1123 
1124  /* I'll bet that -c and -n must be the first and second args... */
1125  if (!conf->coremask_set_manually)
1126  {
1128  uword *coremask = 0;
1129  int i;
1130 
1131  /* main thread core */
1132  coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
1133 
1134  for (i = 0; i < vec_len (tm->registrations); i++)
1135  {
1136  tr = tm->registrations[i];
1137  coremask = clib_bitmap_or (coremask, tr->coremask);
1138  }
1139 
1140  vec_insert (conf->eal_init_args, 2, 1);
1141  conf->eal_init_args[1] = (u8 *) "-c";
1142  tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
1143  conf->eal_init_args[2] = tmp;
1144  clib_bitmap_free (coremask);
1145  }
1146 
1147  if (!conf->nchannels_set_manually)
1148  {
1149  vec_insert (conf->eal_init_args, 2, 3);
1150  conf->eal_init_args[3] = (u8 *) "-n";
1151  tmp = format (0, "%d", conf->nchannels);
1152  conf->eal_init_args[4] = tmp;
1153  }
1154 
1155  if (no_pci == 0 && geteuid () == 0)
1156  dpdk_bind_devices_to_uio (conf);
1157 
1158 #define _(x) \
1159  if (devconf->x == 0 && conf->default_devconf.x > 0) \
1160  devconf->x = conf->default_devconf.x ;
1161 
1162  /* *INDENT-OFF* */
1163  pool_foreach (devconf, conf->dev_confs, ({
1164 
1165  /* default per-device config items */
1166  foreach_dpdk_device_config_item
1167 
1168  /* add DPDK EAL whitelist/blacklist entry */
1169  if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
1170  {
1171  tmp = format (0, "-w%c", 0);
1172  vec_add1 (conf->eal_init_args, tmp);
1173  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1174  vec_add1 (conf->eal_init_args, tmp);
1175  }
1176  else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
1177  {
1178  tmp = format (0, "-b%c", 0);
1179  vec_add1 (conf->eal_init_args, tmp);
1180  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1181  vec_add1 (conf->eal_init_args, tmp);
1182  }
1183  }));
1184  /* *INDENT-ON* */
1185 
1186 #undef _
1187 
1188  /* set master-lcore */
1189  tmp = format (0, "--master-lcore%c", 0);
1190  vec_add1 (conf->eal_init_args, tmp);
1191  tmp = format (0, "%u%c", tm->main_lcore, 0);
1192  vec_add1 (conf->eal_init_args, tmp);
1193 
1194  /* set socket-mem */
1195  tmp = format (0, "--socket-mem%c", 0);
1196  vec_add1 (conf->eal_init_args, tmp);
1197  tmp = format (0, "%s%c", socket_mem, 0);
1198  vec_add1 (conf->eal_init_args, tmp);
1199 
1200  /* NULL terminate the "argv" vector, in case of stupidity */
1201  vec_add1 (conf->eal_init_args, 0);
1202  _vec_len (conf->eal_init_args) -= 1;
1203 
1204  /* Set up DPDK eal and packet mbuf pool early. */
1205 
1206  rte_log_set_global_level (log_level);
1207 
1208  vm = vlib_get_main ();
1209 
1210  /* make copy of args as rte_eal_init tends to mess up with arg array */
1211  for (i = 1; i < vec_len (conf->eal_init_args); i++)
1212  conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
1213  conf->eal_init_args[i]);
1214 
1215  clib_warning ("EAL init args: %s", conf->eal_init_args_str);
1216  ret =
1217  rte_eal_init (vec_len (conf->eal_init_args),
1218  (char **) conf->eal_init_args);
1219 
1220  /* lazy umount hugepages */
1221  umount2 ((char *) huge_dir_path, MNT_DETACH);
1222  rmdir ((char *) huge_dir_path);
1223  vec_free (huge_dir_path);
1224 
1225  if (ret < 0)
1226  return clib_error_return (0, "rte_eal_init returned %d", ret);
1227 
1228  /* Dump the physical memory layout prior to creating the mbuf_pool */
1229  fprintf (stdout, "DPDK physical memory layout:\n");
1230  rte_dump_physmem_layout (stdout);
1231 
1232  /* main thread 1st */
1233  error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
1234  if (error)
1235  return error;
1236 
1237  for (i = 0; i < RTE_MAX_LCORE; i++)
1238  {
1239  error = dpdk_buffer_pool_create (vm, conf->num_mbufs,
1240  rte_lcore_to_socket_id (i));
1241  if (error)
1242  return error;
1243  }
1244 
1245 done:
1246  return error;
1247 }
1248 
1250 
1251 void
1253 {
1254  vnet_main_t *vnm = vnet_get_main ();
1255  struct rte_eth_link prev_link = xd->link;
1256  u32 hw_flags = 0;
1257  u8 hw_flags_chg = 0;
1258 
1259  /* only update link state for PMD interfaces */
1260  if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
1261  return;
1262 
1263  xd->time_last_link_update = now ? now : xd->time_last_link_update;
1264  memset (&xd->link, 0, sizeof (xd->link));
1265  rte_eth_link_get_nowait (xd->device_index, &xd->link);
1266 
1267  if (LINK_STATE_ELOGS)
1268  {
1270  ELOG_TYPE_DECLARE (e) =
1271  {
1272  .format =
1273  "update-link-state: sw_if_index %d, admin_up %d,"
1274  "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
1275 
1276  struct
1277  {
1278  u32 sw_if_index;
1279  u8 admin_up;
1280  u8 old_link_state;
1281  u8 new_link_state;
1282  } *ed;
1283  ed = ELOG_DATA (&vm->elog_main, e);
1284  ed->sw_if_index = xd->vlib_sw_if_index;
1285  ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
1286  ed->old_link_state = (u8)
1288  ed->new_link_state = (u8) xd->link.link_status;
1289  }
1290 
1292  && ((xd->link.link_status != 0) ^
1294  {
1295  hw_flags_chg = 1;
1296  hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
1297  }
1298 
1299  if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
1300  {
1301  hw_flags_chg = 1;
1302  switch (xd->link.link_duplex)
1303  {
1304  case ETH_LINK_HALF_DUPLEX:
1306  break;
1307  case ETH_LINK_FULL_DUPLEX:
1309  break;
1310  default:
1311  break;
1312  }
1313  }
1314  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
1315  {
1316  hw_flags_chg = 1;
1317  switch (xd->link.link_speed)
1318  {
1319  case ETH_SPEED_NUM_10M:
1321  break;
1322  case ETH_SPEED_NUM_100M:
1324  break;
1325  case ETH_SPEED_NUM_1G:
1326  hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
1327  break;
1328  case ETH_SPEED_NUM_10G:
1330  break;
1331  case ETH_SPEED_NUM_40G:
1333  break;
1334  case 0:
1335  break;
1336  default:
1337  clib_warning ("unknown link speed %d", xd->link.link_speed);
1338  break;
1339  }
1340  }
1341  if (hw_flags_chg)
1342  {
1343  if (LINK_STATE_ELOGS)
1344  {
1346 
1347  ELOG_TYPE_DECLARE (e) =
1348  {
1349  .format =
1350  "update-link-state: sw_if_index %d, new flags %d",.format_args
1351  = "i4i4",};
1352 
1353  struct
1354  {
1355  u32 sw_if_index;
1356  u32 flags;
1357  } *ed;
1358  ed = ELOG_DATA (&vm->elog_main, e);
1359  ed->sw_if_index = xd->vlib_sw_if_index;
1360  ed->flags = hw_flags;
1361  }
1362  vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
1363  }
1364 }
1365 
1366 static uword
1368 {
1369  clib_error_t *error;
1370  vnet_main_t *vnm = vnet_get_main ();
1371  dpdk_main_t *dm = &dpdk_main;
1373  dpdk_device_t *xd;
1375  int i;
1376 
1377  error = dpdk_lib_init (dm);
1378 
1379  if (error)
1380  clib_error_report (error);
1381 
1382  tm->worker_thread_release = 1;
1383 
1384  f64 now = vlib_time_now (vm);
1385  vec_foreach (xd, dm->devices)
1386  {
1387  dpdk_update_link_state (xd, now);
1388  }
1389 
1390  {
1391  /*
1392  * Extra set up for bond interfaces:
1393  * 1. Setup MACs for bond interfaces and their slave links which was set
1394  * in dpdk_device_setup() but needs to be done again here to take
1395  * effect.
1396  * 2. Set up info and register slave link state change callback handling.
1397  * 3. Set up info for bond interface related CLI support.
1398  */
1399  int nports = rte_eth_dev_count ();
1400  if (nports > 0)
1401  {
1402  for (i = 0; i < nports; i++)
1403  {
1404  xd = &dm->devices[i];
1405  ASSERT (i == xd->device_index);
1406  if (xd->pmd == VNET_DPDK_PMD_BOND)
1407  {
1408  u8 addr[6];
1409  u8 slink[16];
1410  int nlink = rte_eth_bond_slaves_get (i, slink, 16);
1411  if (nlink > 0)
1412  {
1413  vnet_hw_interface_t *bhi;
1414  ethernet_interface_t *bei;
1415  int rv;
1416 
1417  /* Get MAC of 1st slave link */
1418  rte_eth_macaddr_get
1419  (slink[0], (struct ether_addr *) addr);
1420 
1421  /* Set MAC of bounded interface to that of 1st slave link */
1422  clib_warning ("Set MAC for bond port %d BondEthernet%d",
1423  i, xd->port_id);
1424  rv = rte_eth_bond_mac_address_set
1425  (i, (struct ether_addr *) addr);
1426  if (rv)
1427  clib_warning ("Set MAC addr failure rv=%d", rv);
1428 
1429  /* Populate MAC of bonded interface in VPP hw tables */
1430  bhi = vnet_get_hw_interface
1431  (vnm, dm->devices[i].hw_if_index);
1432  bei = pool_elt_at_index
1433  (em->interfaces, bhi->hw_instance);
1434  clib_memcpy (bhi->hw_address, addr, 6);
1435  clib_memcpy (bei->address, addr, 6);
1436 
1437  /* Init l3 packet size allowed on bonded interface */
1442  while (nlink >= 1)
1443  { /* for all slave links */
1444  int slave = slink[--nlink];
1445  dpdk_device_t *sdev = &dm->devices[slave];
1446  vnet_hw_interface_t *shi;
1447  vnet_sw_interface_t *ssi;
1448  ethernet_interface_t *sei;
1449  /* Add MAC to all slave links except the first one */
1450  if (nlink)
1451  {
1452  clib_warning ("Add MAC for slave port %d", slave);
1453  rv = rte_eth_dev_mac_addr_add
1454  (slave, (struct ether_addr *) addr, 0);
1455  if (rv)
1456  clib_warning ("Add MAC addr failure rv=%d", rv);
1457  }
1458  /* Setup slave link state change callback handling */
1459  rte_eth_dev_callback_register
1460  (slave, RTE_ETH_EVENT_INTR_LSC,
1462  dpdk_device_t *sxd = &dm->devices[slave];
1464  sxd->bond_port = i;
1465  /* Set slaves bitmap for bonded interface */
1466  bhi->bond_info = clib_bitmap_set
1467  (bhi->bond_info, sdev->hw_if_index, 1);
1468  /* Set MACs and slave link flags on slave interface */
1469  shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
1470  ssi = vnet_get_sw_interface
1471  (vnm, sdev->vlib_sw_if_index);
1472  sei = pool_elt_at_index
1473  (em->interfaces, shi->hw_instance);
1476  clib_memcpy (shi->hw_address, addr, 6);
1477  clib_memcpy (sei->address, addr, 6);
1478  /* Set l3 packet size allowed as the lowest of slave */
1479  if (bhi->max_l3_packet_bytes[VLIB_RX] >
1484  /* Set max packet size allowed as the lowest of slave */
1485  if (bhi->max_packet_bytes > shi->max_packet_bytes)
1486  bhi->max_packet_bytes = shi->max_packet_bytes;
1487  }
1488  }
1489  }
1490  }
1491  }
1492  }
1493 
1494  while (1)
1495  {
1496  /*
1497  * check each time through the loop in case intervals are changed
1498  */
1499  f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
1501 
1502  vlib_process_wait_for_event_or_clock (vm, min_wait);
1503 
1504  if (dm->admin_up_down_in_progress)
1505  /* skip the poll if an admin up down is in progress (on any interface) */
1506  continue;
1507 
1508  vec_foreach (xd, dm->devices)
1509  {
1510  f64 now = vlib_time_now (vm);
1511  if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
1512  dpdk_update_counters (xd, now);
1513  if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
1514  dpdk_update_link_state (xd, now);
1515 
1516  }
1517  }
1518 
1519  return 0;
1520 }
1521 
1522 /* *INDENT-OFF* */
1524  .function = dpdk_process,
1525  .type = VLIB_NODE_TYPE_PROCESS,
1526  .name = "dpdk-process",
1527  .process_log2_n_stack_bytes = 17,
1528 };
1529 /* *INDENT-ON* */
1530 
1531 static clib_error_t *
1533 {
1534  dpdk_main_t *dm = &dpdk_main;
1535  vlib_node_t *ei;
1536  clib_error_t *error = 0;
1538 
1539  /* verify that structs are cacheline aligned */
1540  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
1541  "Cache line marker must be 1st element in dpdk_device_t");
1542  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
1544  "Data in cache line 0 is bigger than cache line size");
1545  STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
1546  "Cache line marker must be 1st element in frame_queue_trace_t");
1547 
1548  dm->vlib_main = vm;
1549  dm->vnet_main = vnet_get_main ();
1550  dm->conf = &dpdk_config_main;
1551 
1552  ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
1553  if (ei == 0)
1554  return clib_error_return (0, "ethernet-input node AWOL");
1555 
1556  dm->ethernet_input_node_index = ei->index;
1557 
1558  dm->conf->nchannels = 4;
1559  dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
1560  vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
1561 
1562  vec_validate (dm->recycle, tm->n_thread_stacks - 1);
1563 
1564  /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
1565  dm->buffer_flags_template =
1567  | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
1568  VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1569 
1572 
1573  /* init CLI */
1574  if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
1575  return error;
1576 
1577  return error;
1578 }
1579 
1581 
1582 
1583 /*
1584  * fd.io coding-style-patch-verification: ON
1585  *
1586  * Local Variables:
1587  * eval: (c-set-style "gnu")
1588  * End:
1589  */
u32 ** d_trace_buffers
Definition: dpdk.h:166
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:432
#define DPDK_DEVICE_FLAG_PROMISC
Definition: dpdk.h:173
static void dpdk_bind_devices_to_uio(dpdk_config_main_t *conf)
Definition: init.c:647
f64 time_last_link_update
Definition: dpdk.h:212
vmrglw vmrglh hi
static u8 * format_bitmap_hex(u8 *s, va_list *args)
Format a bitmap as a string of hex bytes.
Definition: bitmap.h:744
format_function_t format_vlib_pci_addr
Definition: pci.h:238
#define vec_foreach_index(var, v)
Iterate over vector indices.
#define hash_set(h, key, value)
Definition: hash.h:254
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:337
#define clib_min(x, y)
Definition: clib.h:332
#define VNET_HW_INTERFACE_FLAG_SPEED_1G
Definition: interface.h:407
ethernet_main_t ethernet_main
Definition: init.c:45
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:538
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
Definition: node_funcs.h:699
u8 interface_name_format_decimal
Definition: dpdk.h:326
vnet_main_t * vnet_get_main(void)
Definition: misc.c:46
#define NB_MBUF
Definition: dpdk.h:54
u8 use_rss
Definition: dpdk.h:372
vnet_device_class_t dpdk_device_class
#define DPDK_DEVICE_VLAN_STRIP_OFF
Definition: dpdk.h:294
#define DPDK_DEVICE_FLAG_TX_OFFLOAD
Definition: dpdk.h:181
#define NULL
Definition: clib.h:55
u32 index
Definition: node.h:237
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:221
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:569
static uword * clib_bitmap_or(uword *ai, uword *bi)
Logical operator across two bitmaps.
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: init.c:78
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1252
u16 flags
Definition: dpdk.h:171
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define LINK_STATE_ELOGS
Definition: init.c:39
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:518
dpdk_device_and_queue_t ** devices_by_hqos_cpu
Definition: dpdk.h:342
#define DPDK_NB_RX_DESC_VIRTIO
Definition: dpdk_priv.h:21
clib_error_t * errors
Definition: dpdk.h:226
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:557
#define DPDK_DEVICE_FLAG_HQOS
Definition: dpdk.h:178
u32 per_interface_next_index
Definition: dpdk.h:159
u8 enable_tcp_udp_checksum
Definition: dpdk.h:313
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
#define DPDK_DEVICE_VLAN_STRIP_ON
Definition: dpdk.h:295
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
vlib_buffer_t * buffer_templates
Definition: dpdk.h:348
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
#define DPDK_NB_TX_DESC_DEFAULT
Definition: dpdk_priv.h:20
#define foreach_eal_double_hyphen_predicate_arg
Definition: dpdk_priv.h:32
unformat_function_t unformat_vlib_pci_addr
Definition: pci.h:237
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:394
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:443
u16 device_id
Definition: pci.h:80
dpdk_device_config_hqos_t hqos
Definition: dpdk.h:302
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:225
vlib_pci_addr_t bus_address
Definition: pci.h:58
vlib_pci_main_t pci_main
Definition: pci.c:53
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define DPDK_DEVICE_FLAG_PMD
Definition: dpdk.h:174
u8 port_id
Definition: dpdk.h:205
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM
Definition: dpdk.h:182
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:162
foreach_dpdk_device_config_item clib_bitmap_t * workers
Definition: dpdk.h:300
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:68
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:437
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:111
dpdk_config_main_t dpdk_config_main
Definition: dpdk.h:335
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:679
dpdk_device_config_t default_devconf
Definition: dpdk.h:329
f64 stat_poll_interval
Definition: dpdk.h:380
static char * vlib_unix_get_runtime_dir(void)
Definition: unix.h:138
static dpdk_port_type_t port_type_from_speed_capa(struct rte_eth_dev_info *dev_info)
Definition: init.c:57
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
Definition: buffer.h:97
char i8
Definition: types.h:45
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
Definition: error.h:99
u16 rx_q_used
Definition: dpdk.h:194
#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES
Definition: buffer.h:423
#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG
Definition: dpdk.h:176
void dpdk_device_setup(dpdk_device_t *xd)
Definition: common.c:39
u16 vendor_id
Definition: pci.h:79
#define vlib_call_init_function(vm, x)
Definition: init.h:162
clib_error_t * unformat_rss_fn(unformat_input_t *input, uword *rss_fn)
Definition: format.c:742
#define DPDK_NB_TX_DESC_VIRTIO
Definition: dpdk_priv.h:22
u32 device_index
Definition: dpdk.h:153
struct rte_eth_conf port_conf
Definition: dpdk.h:197
static clib_error_t * dpdk_init(vlib_main_t *vm)
Definition: init.c:1532
u16 device_class
Definition: pci.h:78
#define fl(x, y)
f64 time_last_stats_update
Definition: dpdk.h:219
int dpdk_port_state_callback(uint8_t port_id, enum rte_eth_event_type type, void *param, void *ret_param)
Definition: common.c:301
u32 vlib_sw_if_index
Definition: dpdk.h:156
struct rte_eth_txconf tx_conf
Definition: dpdk.h:198
#define hash_get(h, key)
Definition: hash.h:248
void dpdk_device_start(dpdk_device_t *xd)
Definition: common.c:119
#define clib_bitmap_foreach(i, ai, body)
Macro to iterate across set bits in a bitmap.
Definition: bitmap.h:361
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:458
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
Definition: vec.h:682
vlib_pci_addr_t pci_addr
Definition: dpdk.h:290
clib_error_t * dpdk_buffer_pool_create(vlib_main_t *vm, unsigned num_mbufs, unsigned socket_id)
Definition: buffer.c:413
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
Definition: init.c:826
#define foreach_eal_double_hyphen_arg
Definition: dpdk_priv.h:48
#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)
Definition: ethernet.h:119
u8 ** eal_init_args
Definition: dpdk.h:309
#define VNET_HW_INTERFACE_FLAG_SPEED_10M
Definition: interface.h:405
#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE
Definition: interface.h:581
#define foreach_eal_single_hyphen_mandatory_arg
Definition: dpdk_priv.h:38
vlib_pci_device_t * pci_devs
Definition: pci.h:116
struct _unformat_input_t unformat_input_t
#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX
Definition: interface.h:397
#define foreach_dpdk_pmd
Definition: dpdk.h:59
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:422
#define ELOG_DATA(em, f)
Definition: elog.h:481
dpdk_port_type_t port_type
Definition: dpdk.h:220
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:119
#define VLIB_FRAME_SIZE
Definition: node.h:328
u16 tx_q_used
Definition: dpdk.h:193
u16 nb_rx_desc
Definition: dpdk.h:195
u32 hw_if_index
Definition: dpdk.h:155
void unformat_init_vector(unformat_input_t *input, u8 *vector_string)
Definition: unformat.c:1031
#define DPDK_DEVICE_FLAG_ADMIN_UP
Definition: dpdk.h:172
u32 ** recycle
Definition: dpdk.h:345
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE
Definition: interface.h:482
u8 bond_port
Definition: dpdk.h:209
#define foreach_eal_single_hyphen_arg
Definition: dpdk_priv.h:42
#define DPDK_NB_RX_DESC_DEFAULT
Definition: dpdk_priv.h:19
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
svmdb_client_t * c
static clib_error_t * dpdk_device_config(dpdk_config_main_t *conf, vlib_pci_addr_t pci_addr, unformat_input_t *input, u8 is_default)
Definition: init.c:726
dpdk_device_t * devices
Definition: dpdk.h:341
vlib_main_t * vm
Definition: buffer.c:283
u32 vlib_buffer_get_or_create_free_list(vlib_main_t *vm, u32 n_data_bytes, char *fmt,...)
Definition: buffer.c:436
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:78
u8 nchannels_set_manually
Definition: dpdk.h:317
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:336
volatile u32 ** lockp
Definition: dpdk.h:150
dpdk_device_config_t * dev_confs
Definition: dpdk.h:330
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:163
#define clib_warning(format, args...)
Definition: error.h:59
#define clib_memcpy(a, b, c)
Definition: string.h:69
dpdk_pmd_t pmd
Definition: dpdk.h:168
format_function_t format_dpdk_device_errors
Definition: dpdk.h:453
elog_main_t elog_main
Definition: main.h:155
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
Definition: ethernet.h:113
#define VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD
Definition: interface.h:423
u8 coremask_set_manually
Definition: dpdk.h:316
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:439
vlib_node_t * vlib_get_node_by_name(vlib_main_t *vm, u8 *name)
Definition: node.c:45
#define VNET_HW_INTERFACE_FLAG_SPEED_10G
Definition: interface.h:408
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:412
static void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:128
void dpdk_device_stop(dpdk_device_t *xd)
Definition: common.c:163
u8 * interface_name_suffix
Definition: dpdk.h:187
#define hash_create(elts, value_bytes)
Definition: hash.h:658
#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
Definition: interface.h:398
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:468
#define ASSERT(truth)
void dpdk_device_config_hqos_default(dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:205
format_function_t format_dpdk_device_name
Definition: dpdk.h:451
unsigned int u32
Definition: types.h:88
int hqos_cpu_count
Definition: dpdk.h:376
void vnet_hw_interface_assign_rx_thread(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id, uword thread_index)
Definition: devices.c:128
u32 poll_sleep_usec
Definition: dpdk.h:383
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: init.c:1367
Bitmaps built as vectors of machine words.
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:273
#define clib_error_report(e)
Definition: error.h:113
#define clib_bitmap_free(v)
Free a bitmap.
Definition: bitmap.h:92
#define DPDK_LINK_POLL_INTERVAL
Definition: dpdk.h:232
dpdk_main_t dpdk_main
Definition: init.c:37
uword * thread_registrations_by_name
Definition: threads.h:297
clib_error_t * dpdk_cli_init(vlib_main_t *vm)
Definition: cli.c:1942
struct rte_eth_link link
Definition: dpdk.h:211
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
clib_error_t * dpdk_port_setup_hqos(dpdk_device_t *xd, dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:247
u64 uword
Definition: types.h:112
dpdk_port_type_t
Definition: dpdk.h:90
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
Definition: bitmap.h:441
Definition: defs.h:47
clib_error_t * vlib_unix_recursive_mkdir(char *path)
Definition: util.c:102
#define DPDK_STATS_POLL_INTERVAL
Definition: dpdk.h:229
#define VNET_HW_INTERFACE_FLAG_SPEED_100M
Definition: interface.h:406
clib_error_t * vlib_pci_bind_to_uio(vlib_pci_device_t *d, char *uio_driver_name)
Definition: pci.c:97
static vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
Definition: init.c:1523
u32 ethernet_input_node_index
Definition: dpdk.h:357
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
uword unformat_vlib_cli_sub_input(unformat_input_t *i, va_list *args)
Definition: cli.c:152
u8 admin_up_down_in_progress
Definition: dpdk.h:370
#define STATIC_ASSERT(truth,...)
static struct rte_eth_conf port_conf_template
Definition: init.c:43
static uword unformat_bitmap_list(unformat_input_t *input, va_list *va)
unformat a list of bit ranges into a bitmap (eg "0-3,5-7,11" )
Definition: bitmap.h:693
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
#define DPDK_DEVICE_FLAG_BOND_SLAVE
Definition: dpdk.h:179
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
Definition: init.c:141
#define hash_get_mem(h, key)
Definition: hash.h:268
u32 buffer_flags_template
Definition: dpdk.h:351
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:120
#define VLIB_BUFFER_EXT_HDR_VALID
Definition: buffer.h:101
static void vlib_buffer_init_for_free_list(vlib_buffer_t *dst, vlib_buffer_free_list_t *fl)
Definition: buffer_funcs.h:770
#define vnet_buffer(b)
Definition: buffer.h:306
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VNET_HW_INTERFACE_FLAG_SPEED_40G
Definition: interface.h:409
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
u32 vlib_buffer_free_list_index
Definition: dpdk.h:354
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
int hqos_cpu_first_index
Definition: dpdk.h:375
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
#define ETHERNET_MAX_PACKET_BYTES
Definition: ethernet.h:106
#define vec_foreach(var, vec)
Vector iterator.
i8 cpu_socket
Definition: dpdk.h:169
#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)
Definition: ethernet.h:114
uword * cpu_socket_bitmap
Definition: threads.h:332
vhost_vring_addr_t addr
Definition: vhost-user.h:83
static vlib_buffer_free_list_t * vlib_buffer_get_free_list(vlib_main_t *vm, u32 free_list_index)
Definition: buffer_funcs.h:412
u8 * uio_driver_name
Definition: dpdk.h:311
vlib_thread_registration_t ** registrations
Definition: threads.h:295
u32 flags
Definition: vhost-user.h:77
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
unformat_function_t unformat_dpdk_log_level
Definition: dpdk.h:458
ethernet_interface_t * interfaces
Definition: ethernet.h:244
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:75
vnet_main_t * vnet_main
Definition: dpdk.h:387
u16 nb_tx_desc
Definition: dpdk.h:184
clib_error_t * unformat_hqos(unformat_input_t *input, dpdk_device_config_hqos_t *hqos)
Definition: format.c:779
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:680
uword * device_config_index_by_pci_addr
Definition: dpdk.h:331
uword unformat_skip_white_space(unformat_input_t *input)
Definition: unformat.c:815
static uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
volatile u32 worker_thread_release
Definition: threads.h:338
static void vnet_hw_interface_set_input_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: devices.h:78
vnet_device_main_t vnet_device_main
Definition: devices.c:22
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
Definition: defs.h:46
f64 link_state_poll_interval
Definition: dpdk.h:379
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
dpdk_config_main_t * conf
Definition: dpdk.h:388
vlib_main_t * vlib_main
Definition: dpdk.h:386