FD.io VPP  v21.06
Vector Packet Processing
init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/bitmap.h>
20 #include <vppinfra/linux/sysfs.h>
21 #include <vlib/unix/unix.h>
22 #include <vlib/log.h>
23 
24 #include <vnet/ethernet/ethernet.h>
26 #include <dpdk/buffer.h>
27 #include <dpdk/device/dpdk.h>
29 #include <vlib/pci/pci.h>
30 #include <vlib/vmbus/vmbus.h>
31 
32 #include <rte_ring.h>
33 #include <rte_vect.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
39 #include <sys/mount.h>
40 #include <string.h>
41 #include <fcntl.h>
42 #include <dirent.h>
43 
44 #include <dpdk/device/dpdk_priv.h>
45 
46 #define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */
47 
50 
51 #define LINK_STATE_ELOGS 0
52 
53 /* Port configuration, mildly modified Intel app values */
54 
55 static dpdk_port_type_t
56 port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
57 {
58 
59  if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
61  else if (dev_info->speed_capa & ETH_LINK_SPEED_56G)
63  else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
65  else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
67  else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
69  else if (dev_info->speed_capa & ETH_LINK_SPEED_20G)
71  else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
73  else if (dev_info->speed_capa & ETH_LINK_SPEED_5G)
75  else if (dev_info->speed_capa & ETH_LINK_SPEED_2_5G)
77  else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
79 
81 }
82 
83 static dpdk_port_type_t
85 {
86  switch (link_speed)
87  {
88  case ETH_SPEED_NUM_1G:
90  case ETH_SPEED_NUM_2_5G:
92  case ETH_SPEED_NUM_5G:
94  case ETH_SPEED_NUM_10G:
96  case ETH_SPEED_NUM_20G:
98  case ETH_SPEED_NUM_25G:
100  case ETH_SPEED_NUM_40G:
102  case ETH_SPEED_NUM_50G:
104  case ETH_SPEED_NUM_56G:
106  case ETH_SPEED_NUM_100G:
108  default:
110  }
111 }
112 
113 static u32
115 {
116  dpdk_main_t *dm = &dpdk_main;
118  u32 old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
119 
120  switch (flags)
121  {
123  /* set to L3/non-promisc mode */
124  xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
125  break;
127  xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
128  break;
130  xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
131  dpdk_device_setup (xd);
132  return 0;
133  default:
134  return ~0;
135  }
136 
137  if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
138  {
139  if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
140  rte_eth_promiscuous_enable (xd->port_id);
141  else
142  rte_eth_promiscuous_disable (xd->port_id);
143  }
144 
145  return old;
146 }
147 
148 static int
150 {
151  return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
152 }
153 
154 /* The function check_l3cache helps check if Level 3 cache exists or not on current CPUs
155  return value 1: exist.
156  return value 0: not exist.
157 */
158 static int
160 {
161 
162  struct dirent *dp;
163  clib_error_t *err;
164  const char *sys_cache_dir = "/sys/devices/system/cpu/cpu0/cache";
165  DIR *dir_cache = opendir (sys_cache_dir);
166 
167  if (dir_cache == NULL)
168  return -1;
169 
170  while ((dp = readdir (dir_cache)) != NULL)
171  {
172  if (dp->d_type == DT_DIR)
173  {
174  u8 *p = NULL;
175  int level_cache = -1;
176 
177  p = format (p, "%s/%s/%s%c", sys_cache_dir, dp->d_name, "level", 0);
178  if ((err = clib_sysfs_read ((char *) p, "%d", &level_cache)))
179  clib_error_free (err);
180 
181  if (level_cache == 3)
182  {
183  closedir (dir_cache);
184  return 1;
185  }
186  }
187  }
188 
189  if (dir_cache != NULL)
190  closedir (dir_cache);
191 
192  return 0;
193 }
194 
195 static void
197 {
198  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
199  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
200  xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD |
201  DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
202 }
203 
204 static clib_error_t *
206 {
207  u32 nports;
208  u32 mtu, max_rx_frame;
209  int i;
216  dpdk_device_t *xd;
217  vlib_pci_addr_t last_pci_addr;
218  u32 last_pci_addr_port = 0;
219  u8 af_packet_instance_num = 0;
220  last_pci_addr.as_u32 = ~0;
221 
222  nports = rte_eth_dev_count_avail ();
223 
224  if (nports < 1)
225  {
226  dpdk_log_notice ("DPDK drivers found no Ethernet devices...");
227  }
228 
229  if (CLIB_DEBUG > 0)
230  dpdk_log_notice ("DPDK drivers found %d ports...", nports);
231 
232  if (dm->conf->enable_tcp_udp_checksum)
233  dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
234  | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
235 
236  /* vlib_buffer_t template */
239  for (i = 0; i < tm->n_vlib_mains; i++)
240  {
242  clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t));
244  vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
245  }
246 
247  /* *INDENT-OFF* */
248  RTE_ETH_FOREACH_DEV(i)
249  {
250  u8 addr[6];
251  int vlan_off;
252  struct rte_eth_dev_info dev_info;
253  struct rte_pci_device *pci_dev;
254  struct rte_vmbus_device *vmbus_dev;
255  dpdk_portid_t next_port_id;
256  dpdk_device_config_t *devconf = 0;
257  vlib_pci_addr_t pci_addr;
258  vlib_vmbus_addr_t vmbus_addr;
259  uword *p = 0;
260 
261  if (!rte_eth_dev_is_valid_port(i))
262  continue;
263 
264  rte_eth_dev_info_get (i, &dev_info);
265 
266  if (dev_info.device == 0)
267  {
268  dpdk_log_notice ("DPDK bug: missing device info. Skipping %s device",
269  dev_info.driver_name);
270  continue;
271  }
272 
273  pci_dev = dpdk_get_pci_device (&dev_info);
274 
275  if (pci_dev)
276  {
277  pci_addr.domain = pci_dev->addr.domain;
278  pci_addr.bus = pci_dev->addr.bus;
279  pci_addr.slot = pci_dev->addr.devid;
280  pci_addr.function = pci_dev->addr.function;
282  pci_addr.as_u32);
283  }
284 
285  vmbus_dev = dpdk_get_vmbus_device (&dev_info);
286 
287  if (vmbus_dev)
288  {
289  unformat_input_t input_vmbus;
290 
291  unformat_init_vector (&input_vmbus, (u8 *) dev_info.device->name);
292  if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr,
293  &vmbus_addr))
294  {
296  &vmbus_addr);
297  }
298  }
299 
300  if (p)
301  {
302  devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
303  /* If device is blacklisted, we should skip it */
304  if (devconf->is_blacklisted)
305  {
306  continue;
307  }
308  }
309  else
310  devconf = &dm->conf->default_devconf;
311 
312  /* Create vnet interface */
316  xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
317  if (p)
318  {
319  xd->name = devconf->name;
320  }
321 
322  /* Handle representor devices that share the same PCI ID */
323  if (dev_info.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
324  {
325  if (dev_info.switch_info.port_id != (uint16_t)-1)
326  xd->interface_name_suffix = format (0, "%d", dev_info.switch_info.port_id);
327  }
328  /* Handle interface naming for devices with multiple ports sharing same PCI ID */
329  else if (pci_dev &&
330  ((next_port_id = rte_eth_find_next (i + 1)) != RTE_MAX_ETHPORTS))
331  {
332  struct rte_eth_dev_info di = { 0 };
333  struct rte_pci_device *next_pci_dev;
334  rte_eth_dev_info_get (next_port_id, &di);
335  next_pci_dev = di.device ? RTE_DEV_TO_PCI (di.device) : 0;
336  if (next_pci_dev &&
337  pci_addr.as_u32 != last_pci_addr.as_u32 &&
338  memcmp (&pci_dev->addr, &next_pci_dev->addr,
339  sizeof (struct rte_pci_addr)) == 0)
340  {
341  xd->interface_name_suffix = format (0, "0");
342  last_pci_addr.as_u32 = pci_addr.as_u32;
343  last_pci_addr_port = i;
344  }
345  else if (pci_addr.as_u32 == last_pci_addr.as_u32)
346  {
348  format (0, "%u", i - last_pci_addr_port);
349  }
350  else
351  {
352  last_pci_addr.as_u32 = ~0;
353  }
354  }
355  else
356  last_pci_addr.as_u32 = ~0;
357 
358  clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
359  sizeof (struct rte_eth_txconf));
360 
361  if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
362  {
363  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
364  xd->flags |= DPDK_DEVICE_FLAG_RX_IP4_CKSUM;
365  }
366 
367  if (dm->conf->enable_tcp_udp_checksum)
368  {
369  if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
370  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
371  if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
372  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_CKSUM;
373  }
374 
375  if (dm->conf->no_multi_seg)
376  {
377  xd->port_conf.txmode.offloads &= ~DEV_TX_OFFLOAD_MULTI_SEGS;
378  xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
379  xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
380  }
381  else
382  {
383  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
384  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
385  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
386  xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
387  }
388 
389  xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
390 
391  if (devconf->num_tx_queues > 0
392  && devconf->num_tx_queues < xd->tx_q_used)
393  xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
394 
395  if (devconf->num_rx_queues > 1
396  && dev_info.max_rx_queues >= devconf->num_rx_queues)
397  {
398  xd->rx_q_used = devconf->num_rx_queues;
399  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
400  if (devconf->rss_fn == 0)
401  xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
402  ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
403  else
404  {
405  u64 unsupported_bits;
406  xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
407  unsupported_bits = xd->port_conf.rx_adv_conf.rss_conf.rss_hf;
408  unsupported_bits &= ~dev_info.flow_type_rss_offloads;
409  if (unsupported_bits)
410  dpdk_log_warn ("Unsupported RSS hash functions: %U",
411  format_dpdk_rss_hf_name, unsupported_bits);
412  }
413  xd->port_conf.rx_adv_conf.rss_conf.rss_hf &=
414  dev_info.flow_type_rss_offloads;
415  }
416  else
417  xd->rx_q_used = 1;
418 
421 
422  xd->flags |= DPDK_DEVICE_FLAG_PMD;
423 
424  /* workaround for drivers not setting driver_name */
425  if ((!dev_info.driver_name) && (pci_dev))
426  dev_info.driver_name = pci_dev->driver->driver.name;
427 
428  ASSERT (dev_info.driver_name);
429 
430  if (!xd->pmd)
431  {
432 
433 
434 #define _(s,f) else if (dev_info.driver_name && \
435  !strcmp(dev_info.driver_name, s)) \
436  xd->pmd = VNET_DPDK_PMD_##f;
437  if (0)
438  ;
440 #undef _
441  else
443 
447 
448  switch (xd->pmd)
449  {
450  /* Drivers with valid speed_capa set */
451  case VNET_DPDK_PMD_I40E:
452  xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
453  case VNET_DPDK_PMD_E1000EM:
454  case VNET_DPDK_PMD_IGB:
455  case VNET_DPDK_PMD_IXGBE:
456  case VNET_DPDK_PMD_ICE:
457  xd->port_type = port_type_from_speed_capa (&dev_info);
458  xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
459  VNET_FLOW_ACTION_REDIRECT_TO_NODE |
460  VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
461  VNET_FLOW_ACTION_BUFFER_ADVANCE |
462  VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP |
463  VNET_FLOW_ACTION_RSS;
464 
465  if (dm->conf->no_tx_checksum_offload == 0)
466  {
467  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
468  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
469  xd->flags |=
470  DPDK_DEVICE_FLAG_TX_OFFLOAD |
471  DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
472  }
473 
474  xd->port_conf.intr_conf.rxq = 1;
475  break;
476  case VNET_DPDK_PMD_CXGBE:
477  case VNET_DPDK_PMD_MLX4:
478  case VNET_DPDK_PMD_MLX5:
479  case VNET_DPDK_PMD_QEDE:
480  case VNET_DPDK_PMD_BNXT:
481  xd->port_type = port_type_from_speed_capa (&dev_info);
482  break;
483 
484  /* SR-IOV VFs */
485  case VNET_DPDK_PMD_I40EVF:
486  xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
487  case VNET_DPDK_PMD_IGBVF:
488  case VNET_DPDK_PMD_IXGBEVF:
490  if (dm->conf->no_tx_checksum_offload == 0)
491  {
492  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
493  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
494  xd->flags |=
495  DPDK_DEVICE_FLAG_TX_OFFLOAD |
496  DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
497  }
498  /* DPDK bug in multiqueue... */
499  /* xd->port_conf.intr_conf.rxq = 1; */
500  break;
501 
502  /* iAVF */
503  case VNET_DPDK_PMD_IAVF:
504  xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
507  VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
508  VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
509  VNET_FLOW_ACTION_BUFFER_ADVANCE | VNET_FLOW_ACTION_COUNT |
510  VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS;
511 
512  if (dm->conf->no_tx_checksum_offload == 0)
513  {
514  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
515  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
516  xd->flags |=
517  DPDK_DEVICE_FLAG_TX_OFFLOAD |
518  DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
519  }
520  /* DPDK bug in multiqueue... */
521  /* xd->port_conf.intr_conf.rxq = 1; */
522  break;
523 
524  case VNET_DPDK_PMD_THUNDERX:
526 
527  if (dm->conf->no_tx_checksum_offload == 0)
528  {
529  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
530  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
531  xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD;
532  }
533  break;
534 
535  case VNET_DPDK_PMD_ENA:
537  xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
538  xd->port_conf.intr_conf.rxq = 1;
539  break;
540 
541  case VNET_DPDK_PMD_DPAA2:
543  break;
544 
545  /* Cisco VIC */
546  case VNET_DPDK_PMD_ENIC:
547  {
548  struct rte_eth_link l;
549  rte_eth_link_get_nowait (i, &l);
550  xd->port_type = port_type_from_link_speed (l.link_speed);
551  if (dm->conf->enable_tcp_udp_checksum)
553  }
554  break;
555 
556  /* Intel Red Rock Canyon */
557  case VNET_DPDK_PMD_FM10K:
559  break;
560 
561  /* virtio */
562  case VNET_DPDK_PMD_VIRTIO:
563  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
567  break;
568 
569  /* vmxnet3 */
570  case VNET_DPDK_PMD_VMXNET3:
572  xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
573  break;
574 
575  case VNET_DPDK_PMD_AF_PACKET:
577  xd->af_packet_instance_num = af_packet_instance_num++;
578  break;
579 
580  case VNET_DPDK_PMD_VIRTIO_USER:
582  break;
583 
584  case VNET_DPDK_PMD_VHOST_ETHER:
586  break;
587 
588  case VNET_DPDK_PMD_LIOVF_ETHER:
590  break;
591 
592  case VNET_DPDK_PMD_FAILSAFE:
594  xd->port_conf.intr_conf.lsc = 1;
595  break;
596 
597  case VNET_DPDK_PMD_NETVSC:
598  {
599  struct rte_eth_link l;
600  rte_eth_link_get_nowait (i, &l);
602  }
603  break;
604 
605  default:
607  }
608 
609  if (devconf->num_rx_desc)
610  xd->nb_rx_desc = devconf->num_rx_desc;
611  else {
612 
613  /* If num_rx_desc is not specified by VPP user, the current CPU is working
614  with 2M page and has no L3 cache, default num_rx_desc is changed to 512
615  from original 1024 to help reduce TLB misses.
616  */
617  if ((clib_mem_get_default_hugepage_size () == 2 << 20)
618  && check_l3cache() == 0)
619  xd->nb_rx_desc = 512;
620  }
621 
622  if (devconf->num_tx_desc)
623  xd->nb_tx_desc = devconf->num_tx_desc;
624  else {
625 
626  /* If num_tx_desc is not specified by VPP user, the current CPU is working
627  with 2M page and has no L3 cache, default num_tx_desc is changed to 512
628  from original 1024 to help reduce TLB misses.
629  */
630  if ((clib_mem_get_default_hugepage_size () == 2 << 20)
631  && check_l3cache() == 0)
632  xd->nb_tx_desc = 512;
633  }
634  }
635 
636  if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
637  {
638  f64 now = vlib_time_now (vm);
639  u32 rnd;
640  rnd = (u32) (now * 1e6);
641  rnd = random_u32 (&rnd);
642  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
643  addr[0] = 2;
644  addr[1] = 0xfe;
645  }
646  else
647  rte_eth_macaddr_get (i, (void *) addr);
648 
649  xd->port_id = i;
650  xd->device_index = xd - dm->devices;
651  xd->per_interface_next_index = ~0;
652 
653  /* assign interface to input thread */
654  int q;
655 
657  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
658  /* ethernet address */ addr,
660  if (error)
661  return error;
662 
663  /*
664  * Ensure default mtu is not > the mtu read from the hardware.
665  * Otherwise rte_eth_dev_configure() will fail and the port will
666  * not be available.
667  * Calculate max_frame_size and mtu supported by NIC
668  */
669  if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
670  {
671  /*
672  * This device does not support the platforms's max frame
673  * size. Use it's advertised mru instead.
674  */
675  max_rx_frame = dev_info.max_rx_pktlen;
676  mtu = dev_info.max_rx_pktlen - sizeof (ethernet_header_t);
677  }
678  else
679  {
680  /* VPP treats MTU and max_rx_pktlen both equal to
681  * ETHERNET_MAX_PACKET_BYTES, if dev_info.max_rx_pktlen >=
682  * ETHERNET_MAX_PACKET_BYTES + sizeof(ethernet_header_t)
683  */
684  if (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
685  sizeof (ethernet_header_t)))
686  {
688  max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
689 
690  /*
691  * Some platforms do not account for Ethernet FCS (4 bytes) in
692  * MTU calculations. To interop with them increase mru but only
693  * if the device's settings can support it.
694  */
695  if (dpdk_port_crc_strip_enabled (xd) &&
696  (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
697  sizeof (ethernet_header_t) +
698  4)))
699  {
700  max_rx_frame += 4;
701  }
702  }
703  else
704  {
705  max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
707 
708  if (dpdk_port_crc_strip_enabled (xd) &&
709  (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)))
710  {
711  max_rx_frame += 4;
712  }
713  }
714  }
715 
716  if (xd->pmd == VNET_DPDK_PMD_FAILSAFE)
717  {
718  /* failsafe device numerables are reported with active device only,
719  * need to query the mtu for current device setup to overwrite
720  * reported value.
721  */
722  uint16_t dev_mtu;
723  if (!rte_eth_dev_get_mtu (i, &dev_mtu))
724  {
725  mtu = dev_mtu;
726  max_rx_frame = mtu + sizeof (ethernet_header_t);
727 
729  {
730  max_rx_frame += 4;
731  }
732  }
733  }
734 
735  /*Set port rxmode config */
736  xd->port_conf.rxmode.max_rx_pkt_len = max_rx_frame;
737 
739  xd->sw_if_index = sw->sw_if_index;
741  dpdk_input_node.index);
742 
743  if (devconf->workers)
744  {
745  int i;
746  q = 0;
747  clib_bitmap_foreach (i, devconf->workers) {
750  dm->vnet_main, xd->hw_if_index, q++,
751  vdm->first_worker_thread_index + i);
752  }
753  }
754  else
755  for (q = 0; q < xd->rx_q_used; q++)
756  {
760  }
761 
763 
764  /*Get vnet hardware interface */
766 
767  /*Override default max_packet_bytes and max_supported_bytes set in
768  * ethernet_register_interface() above*/
769  if (hi)
770  {
771  hi->max_packet_bytes = mtu;
772  hi->max_supported_packet_bytes = max_rx_frame;
773  hi->numa_node = xd->cpu_socket;
774 
775  /* Indicate ability to support L3 DMAC filtering and
776  * initialize interface to L3 non-promisc mode */
780  }
781 
782  if (dm->conf->no_tx_checksum_offload == 0)
783  if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD && hi != NULL)
784  {
788  }
789  if (devconf->tso == DPDK_DEVICE_TSO_ON && hi != NULL)
790  {
791  /*tcp_udp checksum must be enabled*/
792  if ((dm->conf->enable_tcp_udp_checksum) &&
794  {
797  xd->port_conf.txmode.offloads |=
798  DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_UDP_TSO;
799  }
800  else
801  clib_warning ("%s: TCP/UDP checksum offload must be enabled",
802  hi->name);
803  }
804 
805  dpdk_device_setup (xd);
806 
807  /* rss queues should be configured after dpdk_device_setup() */
808  if ((hi != NULL) && (devconf->rss_queues != NULL))
809  {
811  (vnet_get_main (), hi, devconf->rss_queues))
812  {
813  clib_warning ("%s: Failed to set rss queues", hi->name);
814  }
815  }
816 
817  if (vec_len (xd->errors))
818  dpdk_log_err ("setup failed for device %U. Errors:\n %U",
821 
822  /*
823  * A note on Cisco VIC (PMD_ENIC) and VLAN:
824  *
825  * With Cisco VIC vNIC, every ingress packet is tagged. On a
826  * trunk vNIC (C series "standalone" server), packets on no VLAN
827  * are tagged with vlan 0. On an access vNIC (standalone or B
828  * series "blade" server), packets on the default/native VLAN
829  * are tagged with that vNIC's VLAN. VPP expects these packets
830  * to be untagged, and previously enabled VLAN strip on VIC by
831  * default. But it also broke vlan sub-interfaces.
832  *
833  * The VIC adapter has "untag default vlan" ingress VLAN rewrite
834  * mode, which removes tags from these packets. VPP now includes
835  * a local patch for the enic driver to use this untag mode, so
836  * enabling vlan stripping is no longer needed. In future, the
837  * driver + dpdk will have an API to set the mode after
838  * rte_eal_init. Then, this note and local patch will be
839  * removed.
840  */
841 
842  /*
843  * VLAN stripping: default to VLAN strip disabled, unless specified
844  * otherwise in the startup config.
845  */
846 
847  vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
849  {
850  vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
851  if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
852  dpdk_log_info ("VLAN strip enabled for interface\n");
853  else
854  dpdk_log_warn ("VLAN strip cannot be supported by interface\n");
855  xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
856  }
857  else
858  {
859  if (vlan_off & ETH_VLAN_STRIP_OFFLOAD)
860  {
861  vlan_off &= ~ETH_VLAN_STRIP_OFFLOAD;
862  if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
863  dpdk_log_warn ("set VLAN offload failed\n");
864  }
865  xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
866  }
867 
868  if (hi)
869  hi->max_packet_bytes = xd->port_conf.rxmode.max_rx_pkt_len
870  - sizeof (ethernet_header_t);
871  else
872  dpdk_log_warn ("hi NULL");
873 
874  if (dm->conf->no_multi_seg)
875  mtu = mtu > ETHER_MAX_LEN ? ETHER_MAX_LEN : mtu;
876 
877  rte_eth_dev_set_mtu (xd->port_id, mtu);
878 }
879 
880  /* *INDENT-ON* */
881 
882  return 0;
883 }
884 
885 static void
887 {
890  u8 *pci_addr = 0;
891  int num_whitelisted = vec_len (conf->dev_confs);
892  vlib_pci_device_info_t *d = 0;
893  vlib_pci_addr_t *addr = 0, *addrs;
894  int i;
895 
896  addrs = vlib_pci_get_all_dev_addrs ();
897  /* *INDENT-OFF* */
898  vec_foreach (addr, addrs)
899  {
900  dpdk_device_config_t * devconf = 0;
901  vec_reset_length (pci_addr);
902  pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, addr, 0);
903  if (d)
904  {
906  d = 0;
907  }
908  d = vlib_pci_get_device_info (vm, addr, &error);
909  if (error)
910  {
911  vlib_log_warn (dpdk_main.log_default, "%U", format_clib_error, error);
912  clib_error_free (error);
913  continue;
914  }
915 
917  continue;
918 
919  if (num_whitelisted)
920  {
921  uword * p = hash_get (conf->device_config_index_by_pci_addr, addr->as_u32);
922 
923  if (!p)
924  {
925  skipped_pci:
926  continue;
927  }
928 
929  devconf = pool_elt_at_index (conf->dev_confs, p[0]);
930  }
931 
932  /* Enforce Device blacklist by vendor and device */
933  for (i = 0; i < vec_len (conf->blacklist_by_pci_vendor_and_device); i++)
934  {
935  u16 vendor, device;
936  vendor = (u16)(conf->blacklist_by_pci_vendor_and_device[i] >> 16);
937  device = (u16)(conf->blacklist_by_pci_vendor_and_device[i] & 0xFFFF);
938  if (d->vendor_id == vendor && d->device_id == device)
939  {
940  /*
941  * Expected case: device isn't whitelisted,
942  * so blacklist it...
943  */
944  if (devconf == 0)
945  {
946  /* Device is blacklisted */
947  pool_get (conf->dev_confs, devconf);
948  hash_set (conf->device_config_index_by_pci_addr, addr->as_u32,
949  devconf - conf->dev_confs);
950  devconf->pci_addr.as_u32 = addr->as_u32;
951  devconf->dev_addr_type = VNET_DEV_ADDR_PCI;
952  devconf->is_blacklisted = 1;
953  goto skipped_pci;
954  }
955  else /* explicitly whitelisted, ignore the device blacklist */
956  break;
957  }
958  }
959 
960  /* virtio */
961  if (d->vendor_id == 0x1af4 &&
964  ;
965  /* vmxnet3 */
966  else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
967  {
968  /*
969  * For vmxnet3 PCI, unless it is explicitly specified in the whitelist,
970  * the default is to put it in the blacklist.
971  */
972  if (devconf == 0)
973  {
974  pool_get (conf->dev_confs, devconf);
975  hash_set (conf->device_config_index_by_pci_addr, addr->as_u32,
976  devconf - conf->dev_confs);
977  devconf->pci_addr.as_u32 = addr->as_u32;
978  devconf->is_blacklisted = 1;
979  }
980  }
981  /* all Intel network devices */
982  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
983  ;
984  /* all Intel QAT devices VFs */
985  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
986  (d->device_id == 0x0443 || d->device_id == 0x18a1 || d->device_id == 0x19e3 ||
987  d->device_id == 0x37c9 || d->device_id == 0x6f55))
988  ;
989  /* Cisco VIC */
990  else if (d->vendor_id == 0x1137 &&
991  (d->device_id == 0x0043 || d->device_id == 0x0071))
992  ;
993  /* Chelsio T4/T5 */
994  else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
995  ;
996  /* Amazon Elastic Network Adapter */
997  else if (d->vendor_id == 0x1d0f && d->device_id >= 0xec20 && d->device_id <= 0xec21)
998  ;
999  /* Cavium Network Adapter */
1000  else if (d->vendor_id == 0x177d && d->device_id == 0x9712)
1001  ;
1002  /* Cavium FastlinQ QL41000 Series */
1003  else if (d->vendor_id == 0x1077 && d->device_id >= 0x8070 && d->device_id <= 0x8090)
1004  ;
1005  /* Mellanox CX3, CX3VF */
1006  else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1003 && d->device_id <= 0x1004)
1007  {
1008  continue;
1009  }
1010  /* Mellanox CX4, CX4VF, CX4LX, CX4LXVF, CX5, CX5VF, CX5EX, CX5EXVF */
1011  else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a)
1012  {
1013  continue;
1014  }
1015  /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF */
1016  else if (d->vendor_id == 0x15b3 && d->device_id >= 0x101b && d->device_id <= 0x101e)
1017  {
1018  continue;
1019  }
1020  /* Broadcom NetXtreme S, and E series only */
1021  else if (d->vendor_id == 0x14e4 &&
1022  ((d->device_id >= 0x16c0 &&
1023  d->device_id != 0x16c6 && d->device_id != 0x16c7 &&
1024  d->device_id != 0x16dd && d->device_id != 0x16f7 &&
1025  d->device_id != 0x16fd && d->device_id != 0x16fe &&
1026  d->device_id != 0x170d && d->device_id != 0x170c &&
1027  d->device_id != 0x170e && d->device_id != 0x1712 &&
1028  d->device_id != 0x1713) ||
1029  (d->device_id == 0x1604 || d->device_id == 0x1605 ||
1030  d->device_id == 0x1614 || d->device_id == 0x1606 ||
1031  d->device_id == 0x1609 || d->device_id == 0x1614)))
1032  ;
1033  else
1034  {
1035  dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
1036  "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
1037  pci_addr);
1038  continue;
1039  }
1040 
1041  error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
1042 
1043  if (error)
1044  {
1045  if (devconf == 0)
1046  {
1047  pool_get (conf->dev_confs, devconf);
1048  hash_set (conf->device_config_index_by_pci_addr, addr->as_u32,
1049  devconf - conf->dev_confs);
1050  devconf->pci_addr.as_u32 = addr->as_u32;
1051  }
1052  devconf->dev_addr_type = VNET_DEV_ADDR_PCI;
1053  devconf->is_blacklisted = 1;
1054  clib_error_report (error);
1055  }
1056  }
1057  /* *INDENT-ON* */
1058  vec_free (pci_addr);
1060 }
1061 
1062 static void
1064 {
1066  vlib_vmbus_addr_t *addrs, *addr = 0;
1067  int num_whitelisted = vec_len (conf->dev_confs);
1068  int i;
1069 
1070  addrs = vlib_vmbus_get_all_dev_addrs ();
1071 
1072  /* *INDENT-OFF* */
1073  vec_foreach (addr, addrs)
1074  {
1075  dpdk_device_config_t *devconf = 0;
1076  if (num_whitelisted)
1077  {
1078  uword *p =
1080  if (!p)
1081  {
1082  /* No devices blacklisted, but have whitelisted. blacklist all
1083  * non-whitelisted */
1084  pool_get (conf->dev_confs, devconf);
1086  devconf - conf->dev_confs, 0);
1087  devconf->vmbus_addr = *addr;
1089  devconf->is_blacklisted = 1;
1090  skipped_vmbus:
1091  continue;
1092  }
1093 
1094  devconf = pool_elt_at_index (conf->dev_confs, p[0]);
1095  }
1096 
1097  /* Enforce Device blacklist by vmbus_addr */
1098  for (i = 0; i < vec_len (conf->blacklist_by_vmbus_addr); i++)
1099  {
1101  vlib_vmbus_addr_t *a2 = addr;
1102  if (memcmp (a1, a2, sizeof (vlib_vmbus_addr_t)) == 0)
1103  {
1104  if (devconf == 0)
1105  {
1106  /* Device not whitelisted */
1107  pool_get (conf->dev_confs, devconf);
1109  devconf - conf->dev_confs, 0);
1110  devconf->vmbus_addr = *addr;
1112  devconf->is_blacklisted = 1;
1113  goto skipped_vmbus;
1114  }
1115  else
1116  {
1117  break;
1118  }
1119  }
1120  }
1121 
1122  error = vlib_vmbus_bind_to_uio (addr);
1123  if (error)
1124  {
1125  if (devconf == 0)
1126  {
1127  pool_get (conf->dev_confs, devconf);
1129  devconf - conf->dev_confs, 0);
1130  devconf->vmbus_addr = *addr;
1131  }
1133  devconf->is_blacklisted = 1;
1134  clib_error_report (error);
1135  }
1136  }
1137  /* *INDENT-ON* */
1138 }
1139 
1140 uword
1142 {
1143  uword *max_simd_bitwidth = va_arg (*va, uword *);
1144 
1145  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1146  {
1147  if (!unformat (input, "%u", max_simd_bitwidth))
1148  goto error;
1149 
1150  if (*max_simd_bitwidth != DPDK_MAX_SIMD_BITWIDTH_256 &&
1151  *max_simd_bitwidth != DPDK_MAX_SIMD_BITWIDTH_512)
1152  goto error;
1153  }
1154  return 1;
1155 error:
1156  return 0;
1157 }
1158 
1159 static clib_error_t *
1161  dpdk_device_addr_type_t addr_type, unformat_input_t *input,
1162  u8 is_default)
1163 {
1164  clib_error_t *error = 0;
1165  uword *p;
1166  dpdk_device_config_t *devconf = 0;
1167  unformat_input_t sub_input;
1168 
1169  if (is_default)
1170  {
1171  devconf = &conf->default_devconf;
1172  }
1173  else if (addr_type == VNET_DEV_ADDR_PCI)
1174  {
1176  ((vlib_pci_addr_t *) (addr))->as_u32);
1177 
1178  if (!p)
1179  {
1180  pool_get (conf->dev_confs, devconf);
1182  ((vlib_pci_addr_t *) (addr))->as_u32,
1183  devconf - conf->dev_confs);
1184  }
1185  else
1186  return clib_error_return (0,
1187  "duplicate configuration for PCI address %U",
1188  format_vlib_pci_addr, addr);
1189  }
1190  else if (addr_type == VNET_DEV_ADDR_VMBUS)
1191  {
1193  (vlib_vmbus_addr_t *) (addr));
1194 
1195  if (!p)
1196  {
1197  pool_get (conf->dev_confs, devconf);
1199  devconf - conf->dev_confs, 0);
1200  }
1201  else
1202  return clib_error_return (
1203  0, "duplicate configuration for VMBUS address %U",
1204  format_vlib_vmbus_addr, addr);
1205  }
1206 
1207  if (addr_type == VNET_DEV_ADDR_PCI)
1208  {
1209  devconf->pci_addr.as_u32 = ((vlib_pci_addr_t *) (addr))->as_u32;
1210  devconf->tso = DPDK_DEVICE_TSO_DEFAULT;
1211  devconf->dev_addr_type = VNET_DEV_ADDR_PCI;
1212  }
1213  else if (addr_type == VNET_DEV_ADDR_VMBUS)
1214  {
1215  devconf->vmbus_addr = *((vlib_vmbus_addr_t *) (addr));
1216  devconf->tso = DPDK_DEVICE_TSO_DEFAULT;
1218  }
1219 
1220  if (!input)
1221  return 0;
1222 
1223  unformat_skip_white_space (input);
1224  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1225  {
1226  if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
1227  ;
1228  else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
1229  ;
1230  else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
1231  ;
1232  else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
1233  ;
1234  else if (unformat (input, "name %s", &devconf->name))
1235  ;
1236  else if (unformat (input, "workers %U", unformat_bitmap_list,
1237  &devconf->workers))
1238  ;
1239  else
1240  if (unformat
1241  (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
1242  {
1243  error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
1244  if (error)
1245  break;
1246  }
1247  else if (unformat (input, "vlan-strip-offload off"))
1249  else if (unformat (input, "vlan-strip-offload on"))
1251  else if (unformat (input, "tso on"))
1252  {
1253  devconf->tso = DPDK_DEVICE_TSO_ON;
1254  }
1255  else if (unformat (input, "tso off"))
1256  {
1257  devconf->tso = DPDK_DEVICE_TSO_OFF;
1258  }
1259  else if (unformat (input, "devargs %s", &devconf->devargs))
1260  ;
1261  else if (unformat (input, "rss-queues %U",
1262  unformat_bitmap_list, &devconf->rss_queues))
1263  ;
1264  else
1265  {
1266  error = clib_error_return (0, "unknown input `%U'",
1267  format_unformat_error, input);
1268  break;
1269  }
1270  }
1271 
1272  if (error)
1273  return error;
1274 
1275  if (devconf->workers && devconf->num_rx_queues == 0)
1276  devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
1277  else if (devconf->workers &&
1278  clib_bitmap_count_set_bits (devconf->workers) !=
1279  devconf->num_rx_queues)
1280  error = clib_error_return (0,
1281  "%U: number of worker threads must be "
1282  "equal to number of rx queues",
1283  format_vlib_pci_addr, addr);
1284 
1285  return error;
1286 }
1287 
1288 static clib_error_t *
1290 {
1291  unformat_input_t input;
1292  u8 *line, *s = 0;
1293  int n, n_try;
1294 
1295  n = n_try = 4096;
1296  while (n == n_try)
1297  {
1298  uword len = vec_len (s);
1299  vec_resize (s, len + n_try);
1300 
1301  n = read (uf->file_descriptor, s + len, n_try);
1302  if (n < 0 && errno != EAGAIN)
1303  return clib_error_return_unix (0, "read");
1304  _vec_len (s) = len + (n < 0 ? 0 : n);
1305  }
1306 
1307  unformat_init_vector (&input, s);
1308 
1309  while (unformat_user (&input, unformat_line, &line))
1310  {
1311  dpdk_log_notice ("%v", line);
1312  vec_free (line);
1313  }
1314 
1315  unformat_free (&input);
1316  return 0;
1317 }
1318 
1319 static clib_error_t *
1321 {
1322  clib_error_t *error = 0;
1325  dpdk_device_config_t *devconf;
1326  vlib_pci_addr_t pci_addr = { 0 };
1327  vlib_vmbus_addr_t vmbus_addr = { 0 };
1328  unformat_input_t sub_input;
1329  uword default_hugepage_sz, x;
1330  u8 *s, *tmp = 0;
1331  int ret, i;
1332  int num_whitelisted = 0;
1333  int eal_no_hugetlb = 0;
1334  u8 no_pci = 0;
1335  u8 no_vmbus = 0;
1336  u8 file_prefix = 0;
1337  u8 *socket_mem = 0;
1338  u8 *huge_dir_path = 0;
1339  u32 vendor, device, domain, bus, func;
1340 
1341  huge_dir_path =
1342  format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
1343 
1344  conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
1346  sizeof (vlib_vmbus_addr_t));
1347 
1348  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1349  {
1350  /* Prime the pump */
1351  if (unformat (input, "no-hugetlb"))
1352  {
1353  vec_add1 (conf->eal_init_args, (u8 *) "--no-huge");
1354  eal_no_hugetlb = 1;
1355  }
1356  else if (unformat (input, "telemetry"))
1357  conf->enable_telemetry = 1;
1358 
1359  else if (unformat (input, "enable-tcp-udp-checksum"))
1360  conf->enable_tcp_udp_checksum = 1;
1361 
1362  else if (unformat (input, "no-tx-checksum-offload"))
1363  conf->no_tx_checksum_offload = 1;
1364 
1365  else if (unformat (input, "decimal-interface-names"))
1367 
1368  else if (unformat (input, "no-multi-seg"))
1369  conf->no_multi_seg = 1;
1370  else if (unformat (input, "max-simd-bitwidth %U",
1372  ;
1373  else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
1374  &sub_input))
1375  {
1376  error =
1377  dpdk_device_config (conf, 0, VNET_DEV_ADDR_ANY, &sub_input, 1);
1378 
1379  if (error)
1380  return error;
1381  }
1382  else
1383  if (unformat
1384  (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
1385  unformat_vlib_cli_sub_input, &sub_input))
1386  {
1387  error = dpdk_device_config (conf, &pci_addr, VNET_DEV_ADDR_PCI,
1388  &sub_input, 0);
1389 
1390  if (error)
1391  return error;
1392 
1393  num_whitelisted++;
1394  }
1395  else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
1396  {
1397  error =
1398  dpdk_device_config (conf, &pci_addr, VNET_DEV_ADDR_PCI, 0, 0);
1399 
1400  if (error)
1401  return error;
1402 
1403  num_whitelisted++;
1404  }
1405  else if (unformat (input, "dev %U %U", unformat_vlib_vmbus_addr,
1406  &vmbus_addr, unformat_vlib_cli_sub_input, &sub_input))
1407  {
1408  error = dpdk_device_config (conf, &vmbus_addr, VNET_DEV_ADDR_VMBUS,
1409  &sub_input, 0);
1410 
1411  if (error)
1412  return error;
1413 
1414  num_whitelisted++;
1415  }
1416  else if (unformat (input, "dev %U", unformat_vlib_vmbus_addr,
1417  &vmbus_addr))
1418  {
1419  error =
1420  dpdk_device_config (conf, &vmbus_addr, VNET_DEV_ADDR_VMBUS, 0, 0);
1421 
1422  if (error)
1423  return error;
1424 
1425  num_whitelisted++;
1426  }
1427  else if (unformat (input, "num-mem-channels %d", &conf->nchannels))
1428  conf->nchannels_set_manually = 0;
1429  else if (unformat (input, "num-crypto-mbufs %d",
1430  &conf->num_crypto_mbufs))
1431  ;
1432  else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
1433  ;
1434  else if (unformat (input, "socket-mem %s", &socket_mem))
1435  ;
1436  else if (unformat (input, "no-pci"))
1437  {
1438  no_pci = 1;
1439  tmp = format (0, "--no-pci%c", 0);
1440  vec_add1 (conf->eal_init_args, tmp);
1441  }
1442  else if (unformat (input, "blacklist %U", unformat_vlib_vmbus_addr,
1443  &vmbus_addr))
1444  {
1445  vec_add1 (conf->blacklist_by_vmbus_addr, vmbus_addr);
1446  }
1447  else
1448  if (unformat
1449  (input, "blacklist %x:%x:%x.%x", &domain, &bus, &device, &func))
1450  {
1451  tmp = format (0, "-b%c", 0);
1452  vec_add1 (conf->eal_init_args, tmp);
1453  tmp =
1454  format (0, "%04x:%02x:%02x.%x%c", domain, bus, device, func, 0);
1455  vec_add1 (conf->eal_init_args, tmp);
1456  }
1457  else if (unformat (input, "blacklist %x:%x", &vendor, &device))
1458  {
1459  u32 blacklist_entry;
1460  if (vendor > 0xFFFF)
1461  return clib_error_return (0, "blacklist PCI vendor out of range");
1462  if (device > 0xFFFF)
1463  return clib_error_return (0, "blacklist PCI device out of range");
1464  blacklist_entry = (vendor << 16) | (device & 0xffff);
1466  blacklist_entry);
1467  }
1468  else if (unformat (input, "no-vmbus"))
1469  {
1470  no_vmbus = 1;
1471  tmp = format (0, "--no-vmbus%c", 0);
1472  vec_add1 (conf->eal_init_args, tmp);
1473  }
1474 
1475 #define _(a) \
1476  else if (unformat(input, #a)) \
1477  { \
1478  tmp = format (0, "--%s%c", #a, 0); \
1479  vec_add1 (conf->eal_init_args, tmp); \
1480  }
1482 #undef _
1483 #define _(a) \
1484  else if (unformat(input, #a " %s", &s)) \
1485  { \
1486  if (!strncmp(#a, "file-prefix", 11)) \
1487  file_prefix = 1; \
1488  tmp = format (0, "--%s%c", #a, 0); \
1489  vec_add1 (conf->eal_init_args, tmp); \
1490  vec_add1 (s, 0); \
1491  if (!strncmp(#a, "vdev", 4)) \
1492  if (strstr((char*)s, "af_packet")) \
1493  clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \
1494  vec_add1 (conf->eal_init_args, s); \
1495  }
1497 #undef _
1498 #define _(a,b) \
1499  else if (unformat(input, #a " %s", &s)) \
1500  { \
1501  tmp = format (0, "-%s%c", #b, 0); \
1502  vec_add1 (conf->eal_init_args, tmp); \
1503  vec_add1 (s, 0); \
1504  vec_add1 (conf->eal_init_args, s); \
1505  }
1507 #undef _
1508 #define _(a,b) \
1509  else if (unformat(input, #a " %s", &s)) \
1510  { \
1511  tmp = format (0, "-%s%c", #b, 0); \
1512  vec_add1 (conf->eal_init_args, tmp); \
1513  vec_add1 (s, 0); \
1514  vec_add1 (conf->eal_init_args, s); \
1515  conf->a##_set_manually = 1; \
1516  }
1518 #undef _
1519  else if (unformat (input, "default"))
1520  ;
1521 
1522  else if (unformat_skip_white_space (input))
1523  ;
1524  else
1525  {
1526  error = clib_error_return (0, "unknown input `%U'",
1527  format_unformat_error, input);
1528  goto done;
1529  }
1530  }
1531 
1532  if (!conf->uio_driver_name)
1533  conf->uio_driver_name = format (0, "auto%c", 0);
1534 
1535  if (eal_no_hugetlb == 0)
1536  {
1537  vec_add1 (conf->eal_init_args, (u8 *) "--in-memory");
1538 
1539  default_hugepage_sz = clib_mem_get_default_hugepage_size ();
1540 
1541  /* *INDENT-OFF* */
1543  {
1544  clib_error_t *e;
1545  uword n_pages;
1546  /* preallocate at least 16MB of hugepages per socket,
1547  if more is needed it is up to consumer to preallocate more */
1548  n_pages = round_pow2 ((uword) 16 << 20, default_hugepage_sz);
1549  n_pages /= default_hugepage_sz;
1550 
1551  if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
1552  clib_error_report (e);
1553  }
1554  /* *INDENT-ON* */
1555  }
1556 
1557  /* on/off dpdk's telemetry thread */
1558  if (conf->enable_telemetry == 0)
1559  {
1560  vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry");
1561  }
1562 
1563  if (!file_prefix)
1564  {
1565  tmp = format (0, "--file-prefix%c", 0);
1566  vec_add1 (conf->eal_init_args, tmp);
1567  tmp = format (0, "vpp%c", 0);
1568  vec_add1 (conf->eal_init_args, tmp);
1569  }
1570 
1571  if (error)
1572  return error;
1573 
1574  /* I'll bet that -c and -n must be the first and second args... */
1575  if (!conf->coremask_set_manually)
1576  {
1578  uword *coremask = 0;
1579  int i;
1580 
1581  /* main thread core */
1582  coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
1583 
1584  for (i = 0; i < vec_len (tm->registrations); i++)
1585  {
1586  tr = tm->registrations[i];
1587  coremask = clib_bitmap_or (coremask, tr->coremask);
1588  }
1589 
1590  vec_insert (conf->eal_init_args, 2, 1);
1591  conf->eal_init_args[1] = (u8 *) "-c";
1592  tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
1593  conf->eal_init_args[2] = tmp;
1594  clib_bitmap_free (coremask);
1595  }
1596 
1597  if (!conf->nchannels_set_manually)
1598  {
1599  vec_insert (conf->eal_init_args, 2, 3);
1600  conf->eal_init_args[3] = (u8 *) "-n";
1601  tmp = format (0, "%d", conf->nchannels);
1602  vec_terminate_c_string (tmp);
1603  conf->eal_init_args[4] = tmp;
1604  }
1605 
1606  if (no_pci == 0 && geteuid () == 0)
1607  dpdk_bind_devices_to_uio (conf);
1608 
1609  if (no_vmbus == 0 && geteuid () == 0)
1611 
1612 #define _(x) \
1613  if (devconf->x == 0 && conf->default_devconf.x > 0) \
1614  devconf->x = conf->default_devconf.x ;
1615 
1616  /* *INDENT-OFF* */
1617  pool_foreach (devconf, conf->dev_confs) {
1618 
1619  /* default per-device config items */
1621 
1622  /* copy vlan_strip config from default device */
1623  _ (vlan_strip_offload)
1624 
1625  /* copy tso config from default device */
1626  _ (tso)
1627 
1628  /* copy tso config from default device */
1629  _ (devargs)
1630 
1631  /* copy rss_queues config from default device */
1632  _ (rss_queues)
1633 
1634  /* add DPDK EAL whitelist/blacklist entry */
1635  if (num_whitelisted > 0 && devconf->is_blacklisted == 0 &&
1636  devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
1637  {
1638  tmp = format (0, "-a%c", 0);
1639  vec_add1 (conf->eal_init_args, tmp);
1640  if (devconf->devargs)
1641  {
1642  tmp = format (0, "%U,%s%c", format_vlib_pci_addr,
1643  &devconf->pci_addr, devconf->devargs, 0);
1644  }
1645  else
1646  {
1647  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1648  }
1649  vec_add1 (conf->eal_init_args, tmp);
1650  }
1651  else if (num_whitelisted == 0 && devconf->is_blacklisted != 0 &&
1652  devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
1653  {
1654  tmp = format (0, "-b%c", 0);
1655  vec_add1 (conf->eal_init_args, tmp);
1656  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1657  vec_add1 (conf->eal_init_args, tmp);
1658  }
1659  }
1660  /* *INDENT-ON* */
1661 
1662 #undef _
1663 
1664  /* set master-lcore */
1665  tmp = format (0, "--main-lcore%c", 0);
1666  vec_add1 (conf->eal_init_args, tmp);
1667  tmp = format (0, "%u%c", tm->main_lcore, 0);
1668  vec_add1 (conf->eal_init_args, tmp);
1669 
1670 
1671  if (socket_mem)
1672  clib_warning ("socket-mem argument is deprecated");
1673 
1674  /* NULL terminate the "argv" vector, in case of stupidity */
1675  vec_add1 (conf->eal_init_args, 0);
1676  _vec_len (conf->eal_init_args) -= 1;
1677 
1678  /* Set up DPDK eal and packet mbuf pool early. */
1679 
1680  int log_fds[2] = { 0 };
1681  if (pipe (log_fds) == 0)
1682  {
1683  if (fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
1684  {
1685  FILE *f = fdopen (log_fds[1], "a");
1686  if (f && rte_openlog_stream (f) == 0)
1687  {
1688  clib_file_t t = { 0 };
1690  t.file_descriptor = log_fds[0];
1691  t.description = format (0, "DPDK logging pipe");
1692  clib_file_add (&file_main, &t);
1693  }
1694  }
1695  else
1696  {
1697  close (log_fds[0]);
1698  close (log_fds[1]);
1699  }
1700  }
1701 
1702  vm = vlib_get_main ();
1703 
1704  /* make copy of args as rte_eal_init tends to mess up with arg array */
1705  for (i = 1; i < vec_len (conf->eal_init_args); i++)
1706  conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
1707  conf->eal_init_args[i]);
1708 
1710 
1711  dpdk_log_notice ("EAL init args: %s", conf->eal_init_args_str);
1712  ret = rte_eal_init (vec_len (conf->eal_init_args),
1713  (char **) conf->eal_init_args);
1714 
1715  /* enable the AVX-512 vPMDs in DPDK */
1716  if (clib_cpu_supports_avx512_bitalg () &&
1718  rte_vect_set_max_simd_bitwidth (RTE_VECT_SIMD_512);
1720  rte_vect_set_max_simd_bitwidth (conf->max_simd_bitwidth ==
1722  RTE_VECT_SIMD_256 :
1723  RTE_VECT_SIMD_512);
1724 
1725  /* lazy umount hugepages */
1726  umount2 ((char *) huge_dir_path, MNT_DETACH);
1727  rmdir ((char *) huge_dir_path);
1728  vec_free (huge_dir_path);
1729 
1730  if (ret < 0)
1731  return clib_error_return (0, "rte_eal_init returned %d", ret);
1732 
1733  /* main thread 1st */
1734  if ((error = dpdk_buffer_pools_create (vm)))
1735  return error;
1736 
1737 done:
1738  return error;
1739 }
1740 
1742 
1743 void
1745 {
1746  vnet_main_t *vnm = vnet_get_main ();
1747  struct rte_eth_link prev_link = xd->link;
1748  u32 hw_flags = 0;
1749  u8 hw_flags_chg = 0;
1750 
1751  /* only update link state for PMD interfaces */
1752  if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
1753  return;
1754 
1756  clib_memset (&xd->link, 0, sizeof (xd->link));
1757  rte_eth_link_get_nowait (xd->port_id, &xd->link);
1758 
1759  if (LINK_STATE_ELOGS)
1760  {
1761  ELOG_TYPE_DECLARE (e) =
1762  {
1763  .format =
1764  "update-link-state: sw_if_index %d, admin_up %d,"
1765  "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
1766 
1767  struct
1768  {
1769  u32 sw_if_index;
1770  u8 admin_up;
1771  u8 old_link_state;
1772  u8 new_link_state;
1773  } *ed;
1775  ed->sw_if_index = xd->sw_if_index;
1776  ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
1777  ed->old_link_state = (u8)
1779  ed->new_link_state = (u8) xd->link.link_status;
1780  }
1781 
1782  if ((xd->link.link_duplex != prev_link.link_duplex))
1783  {
1784  hw_flags_chg = 1;
1785  switch (xd->link.link_duplex)
1786  {
1787  case ETH_LINK_HALF_DUPLEX:
1789  break;
1790  case ETH_LINK_FULL_DUPLEX:
1792  break;
1793  default:
1794  break;
1795  }
1796  }
1797  if (xd->link.link_speed != prev_link.link_speed)
1799  xd->link.link_speed * 1000);
1800 
1801  if (xd->link.link_status != prev_link.link_status)
1802  {
1803  hw_flags_chg = 1;
1804 
1805  if (xd->link.link_status)
1806  hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
1807  }
1808 
1809  if (hw_flags_chg)
1810  {
1811  if (LINK_STATE_ELOGS)
1812  {
1813  ELOG_TYPE_DECLARE (e) =
1814  {
1815  .format =
1816  "update-link-state: sw_if_index %d, new flags %d",.format_args
1817  = "i4i4",};
1818 
1819  struct
1820  {
1821  u32 sw_if_index;
1822  u32 flags;
1823  } *ed;
1825  ed->sw_if_index = xd->sw_if_index;
1826  ed->flags = hw_flags;
1827  }
1828  vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
1829  }
1830 }
1831 
1832 static uword
1834 {
1836  dpdk_main_t *dm = &dpdk_main;
1837  dpdk_device_t *xd;
1839 
1840  error = dpdk_lib_init (dm);
1841 
1842  if (error)
1843  clib_error_report (error);
1844 
1845  error = dpdk_cryptodev_init (vm);
1846  if (error)
1847  {
1848  vlib_log_warn (dpdk_main.log_cryptodev, "%U", format_clib_error, error);
1849  clib_error_free (error);
1850  }
1851 
1852  tm->worker_thread_release = 1;
1853 
1854  f64 now = vlib_time_now (vm);
1855  vec_foreach (xd, dm->devices)
1856  {
1857  dpdk_update_link_state (xd, now);
1858  }
1859 
1860  while (1)
1861  {
1862  /*
1863  * check each time through the loop in case intervals are changed
1864  */
1865  f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
1867 
1868  vlib_process_wait_for_event_or_clock (vm, min_wait);
1869 
1870  if (dm->admin_up_down_in_progress)
1871  /* skip the poll if an admin up down is in progress (on any interface) */
1872  continue;
1873 
1874  vec_foreach (xd, dm->devices)
1875  {
1876  f64 now = vlib_time_now (vm);
1877  if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
1878  dpdk_update_counters (xd, now);
1879  if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
1880  dpdk_update_link_state (xd, now);
1881 
1882  }
1883  }
1884 
1885  return 0;
1886 }
1887 
1888 /* *INDENT-OFF* */
1890  .function = dpdk_process,
1891  .type = VLIB_NODE_TYPE_PROCESS,
1892  .name = "dpdk-process",
1893  .process_log2_n_stack_bytes = 17,
1894 };
1895 /* *INDENT-ON* */
1896 
1897 static clib_error_t *
1899 {
1900  dpdk_main_t *dm = &dpdk_main;
1901  clib_error_t *error = 0;
1902 
1903  /* verify that structs are cacheline aligned */
1904  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
1905  "Cache line marker must be 1st element in dpdk_device_t");
1906  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
1908  "Data in cache line 0 is bigger than cache line size");
1909  STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
1910  "Cache line marker must be 1st element in frame_queue_trace_t");
1911  STATIC_ASSERT (RTE_CACHE_LINE_SIZE == 1 << CLIB_LOG2_CACHE_LINE_BYTES,
1912  "DPDK RTE CACHE LINE SIZE does not match with 1<<CLIB_LOG2_CACHE_LINE_BYTES");
1913 
1914  dpdk_cli_reference ();
1915 
1916  dm->vlib_main = vm;
1917  dm->vnet_main = vnet_get_main ();
1918  dm->conf = &dpdk_config_main;
1919 
1920  dm->conf->nchannels = 4;
1921  vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
1922 
1923  /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
1924  dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID |
1925  VLIB_BUFFER_EXT_HDR_VALID |
1926  VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
1927  VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1928 
1931 
1932  dm->log_default = vlib_log_register_class ("dpdk", 0);
1933  dm->log_cryptodev = vlib_log_register_class ("dpdk", "cryptodev");
1934  dm->log_ipsec = vlib_log_register_class ("dpdk", "ipsec");
1935 
1936  return error;
1937 }
1938 
1940 
1941 /*
1942  * fd.io coding-style-patch-verification: ON
1943  *
1944  * Local Variables:
1945  * eval: (c-set-style "gnu")
1946  * End:
1947  */
vlib_log_class_t vlib_log_register_class(char *class, char *subclass)
Definition: log.c:339
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:133
static void dpdk_bind_devices_to_uio(dpdk_config_main_t *conf)
Definition: init.c:886
f64 time_last_link_update
Definition: dpdk.h:232
#define hash_set(h, key, value)
Definition: hash.h:255
#define VIRTIO_PCI_MODERN_DEVICEID_NET
Definition: pci_config.h:169
vnet_interface_output_runtime_t * rt
static void dpdk_enable_l4_csum_offload(dpdk_device_t *xd)
Definition: init.c:196
#define clib_min(x, y)
Definition: clib.h:342
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
Definition: sysfs.c:240
u8 * eal_init_args_str
Definition: dpdk.h:300
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
Definition: node_funcs.h:755
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105
uword first_worker_thread_index
Definition: devices.h:63
vlib_log_class_t log_ipsec
Definition: dpdk.h:384
u8 interface_name_format_decimal
Definition: dpdk.h:323
clib_error_t * vlib_vmbus_bind_to_uio(vlib_vmbus_addr_t *addr)
Definition: vmbus.c:213
#define pool_foreach(VAR, POOL)
Iterate through pool.
Definition: pool.h:534
Optimized string handling code, including c11-compliant "safe C library" variants.
vnet_device_class_t dpdk_device_class
unsigned long u64
Definition: types.h:89
u32 sw_if_index
Definition: dpdk.h:192
#define DPDK_DEVICE_VLAN_STRIP_OFF
Definition: dpdk.h:279
clib_error_t * vnet_hw_interface_set_rss_queues(vnet_main_t *vnm, vnet_hw_interface_t *hi, clib_bitmap_t *bitmap)
Definition: interface.c:1762
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:325
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:657
static uword * clib_bitmap_or(uword *ai, uword *bi)
Logical operator across two bitmaps.
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: init.c:114
vnet_hw_interface_capabilities_t caps
Definition: interface.h:645
u16 max_simd_bitwidth
Definition: dpdk.h:306
#define clib_bitmap_foreach(i, ai)
Macro to iterate across set bits in a bitmap.
Definition: bitmap.h:361
struct rte_pci_device * dpdk_get_pci_device(const struct rte_eth_dev_info *info)
Definition: common.c:335
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1744
#define DPDK_MAX_SIMD_BITWIDTH_256
Definition: dpdk.h:309
u16 flags
Definition: dpdk.h:199
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define LINK_STATE_ELOGS
Definition: init.c:51
#define CLIB_LOG2_CACHE_LINE_BYTES
Definition: cache.h:50
u32 file_descriptor
Definition: file.h:54
#define vec_terminate_c_string(V)
(If necessary) NULL terminate a vector containing a c-string.
Definition: vec.h:1133
#define DPDK_MAX_SIMD_BITWIDTH_DEFAULT
Definition: dpdk.h:308
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:607
static void vlib_pci_free_device_info(vlib_pci_device_info_t *di)
Definition: pci.h:114
#define DPDK_NB_RX_DESC_VIRTIO
Definition: dpdk_priv.h:18
format_function_t format_vlib_vmbus_addr
Definition: vmbus.h:38
clib_error_t * errors
Definition: dpdk.h:244
u32 per_interface_next_index
Definition: dpdk.h:195
u8 enable_tcp_udp_checksum
Definition: dpdk.h:303
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
uword unformat_user(unformat_input_t *input, unformat_function_t *func,...)
Definition: unformat.c:989
#define DPDK_DEVICE_VLAN_STRIP_ON
Definition: dpdk.h:280
void dpdk_cli_reference(void)
Definition: cli.c:389
#define DPDK_NB_TX_DESC_DEFAULT
Definition: dpdk_priv.h:17
u32 supported_flow_actions
Definition: dpdk.h:221
#define foreach_eal_double_hyphen_predicate_arg
Definition: dpdk_priv.h:29
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:535
#define ETHERNET_INTERFACE_FLAG_DEFAULT_L3
Definition: ethernet.h:160
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, vnet_hw_interface_flags_t flags)
Definition: interface.c:513
#define DPDK_DEVICE_TSO_OFF
Definition: dpdk.h:291
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:255
vhost_vring_addr_t addr
Definition: vhost_user.h:130
u32 * blacklist_by_pci_vendor_and_device
Definition: dpdk.h:332
unsigned char u8
Definition: types.h:56
dpdk_config_main_t dpdk_config_main
Definition: init.c:49
struct rte_vmbus_device * dpdk_get_vmbus_device(const struct rte_eth_dev_info *info)
Definition: common.c:348
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
clib_file_function_t * read_function
Definition: file.h:67
#define ETHER_MAX_LEN
Maximum frame len, including CRC.
Definition: init.c:46
double f64
Definition: types.h:142
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
unsigned int u32
Definition: types.h:88
#define clib_memcpy(d, s, n)
Definition: string.h:197
vlib_frame_t * f
foreach_dpdk_device_config_item clib_bitmap_t * workers
Definition: dpdk.h:285
#define dpdk_log_warn(...)
Definition: dpdk.h:436
static clib_error_t * dpdk_log_read_ready(clib_file_t *uf)
Definition: init.c:1289
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:172
dpdk_portid_t port_id
Definition: dpdk.h:202
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:480
dpdk_device_config_t default_devconf
Definition: dpdk.h:326
f64 stat_poll_interval
Definition: dpdk.h:371
static char * vlib_unix_get_runtime_dir(void)
Definition: unix.h:151
static dpdk_port_type_t port_type_from_speed_capa(struct rte_eth_dev_info *dev_info)
Definition: init.c:56
description fragment has unexpected format
Definition: map.api:433
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
Definition: error.h:99
u16 rx_q_used
Definition: dpdk.h:197
vnet_main_t * vnet_get_main(void)
vlib_pci_addr_t * vlib_pci_get_all_dev_addrs()
Definition: pci.c:1466
clib_file_main_t file_main
Definition: main.c:63
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
Definition: vec.h:296
void dpdk_device_setup(dpdk_device_t *xd)
Definition: common.c:42
clib_error_t * unformat_rss_fn(unformat_input_t *input, uword *rss_fn)
Definition: format.c:939
#define DPDK_NB_TX_DESC_VIRTIO
Definition: dpdk_priv.h:19
u32 device_index
Definition: dpdk.h:189
struct rte_eth_conf port_conf
Definition: dpdk.h:217
static clib_error_t * dpdk_init(vlib_main_t *vm)
Definition: init.c:1898
u32 max_supported_packet_bytes
Definition: interface.h:694
f64 time_last_stats_update
Definition: dpdk.h:237
vlib_pci_device_info_t * vlib_pci_get_device_info(vlib_main_t *vm, vlib_pci_addr_t *addr, clib_error_t **error)
Definition: pci.c:202
struct rte_eth_txconf tx_conf
Definition: dpdk.h:218
Definition: cJSON.c:88
u8 * description
Definition: file.h:70
#define hash_get(h, key)
Definition: hash.h:249
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:553
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
Definition: vec.h:776
u32 vnet_hw_if_register_rx_queue(vnet_main_t *vnm, u32 hw_if_index, u32 queue_id, u32 thread_index)
Definition: rx_queue.c:64
vl_api_interface_index_t sw_if_index
Definition: wireguard.api:34
vlib_pci_addr_t pci_addr
Definition: dpdk.h:271
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
Definition: init.c:1320
uword unformat_max_simd_bitwidth(unformat_input_t *input, va_list *va)
Definition: init.c:1141
#define foreach_eal_double_hyphen_arg
Definition: dpdk_priv.h:44
u8 ** eal_init_args
Definition: dpdk.h:299
unformat_function_t unformat_vlib_vmbus_addr
Definition: vmbus.h:39
dpdk_per_thread_data_t * per_thread_data
Definition: dpdk.h:358
clib_error_t * vlib_pci_bind_to_uio(vlib_main_t *vm, vlib_pci_addr_t *addr, char *uio_drv_name)
Definition: pci.c:397
#define foreach_eal_single_hyphen_mandatory_arg
Definition: dpdk_priv.h:35
static dpdk_port_type_t port_type_from_link_speed(u32 link_speed)
Definition: init.c:84
struct _unformat_input_t unformat_input_t
unsigned short u16
Definition: types.h:57
#define clib_error_return_unix(e, args...)
Definition: error.h:102
#define foreach_dpdk_pmd
Definition: dpdk.h:69
vlib_buffer_t buffer_template
Definition: dpdk.h:350
unformat_function_t unformat_vlib_pci_addr
Definition: pci.h:325
u32 * tmp
#define ELOG_DATA(em, f)
Definition: elog.h:484
#define VIRTIO_PCI_LEGACY_DEVICEID_NET
Definition: pci_config.h:168
dpdk_port_type_t port_type
Definition: dpdk.h:238
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:181
signed char i8
Definition: types.h:45
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
__clib_export u8 * format_bitmap_hex(u8 *s, va_list *args)
Format a bitmap as a string of hex bytes.
Definition: bitmap.c:107
void vnet_hw_if_update_runtime_data(vnet_main_t *vnm, u32 hw_if_index)
Definition: runtime.c:58
u16 tx_q_used
Definition: dpdk.h:198
u16 nb_rx_desc
Definition: dpdk.h:208
uint16_t dpdk_portid_t
Definition: dpdk.h:129
static uword mhash_set(mhash_t *h, void *key, uword new_value, uword *old_value)
Definition: mhash.h:117
#define dpdk_log_info(...)
Definition: dpdk.h:440
__clib_export void mhash_init(mhash_t *h, uword n_value_bytes, uword n_key_bytes)
Definition: mhash.c:168
u32 hw_if_index
Definition: dpdk.h:191
u8 len
Definition: ip_types.api:103
clib_error_t * dpdk_cryptodev_init(vlib_main_t *vm)
Definition: cryptodev.c:1061
void unformat_init_vector(unformat_input_t *input, u8 *vector_string)
Definition: unformat.c:1037
u16 af_packet_instance_num
Definition: dpdk.h:229
vlib_log_class_t log_cryptodev
Definition: dpdk.h:383
#define foreach_eal_single_hyphen_arg
Definition: dpdk_priv.h:39
#define VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM
Definition: interface.h:559
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
u32 num_crypto_mbufs
Definition: dpdk.h:317
#define DPDK_NB_RX_DESC_DEFAULT
Definition: dpdk_priv.h:16
static int check_l3cache()
Definition: init.c:159
#define UNFORMAT_END_OF_INPUT
Definition: format.h:137
dpdk_device_t * devices
Definition: dpdk.h:357
vlib_vmbus_addr_t vmbus_addr
Definition: dpdk.h:272
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:95
u8 nchannels_set_manually
Definition: dpdk.h:314
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:261
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:395
#define dpdk_log_err(...)
Definition: dpdk.h:434
dpdk_device_config_t * dev_confs
Definition: dpdk.h:327
#define clib_warning(format, args...)
Definition: error.h:59
__clib_export uword unformat_bitmap_list(unformat_input_t *input, va_list *va)
unformat a list of bit ranges into a bitmap (eg "0-3,5-7,11" )
Definition: bitmap.c:55
dpdk_pmd_t pmd
Definition: dpdk.h:203
format_function_t format_dpdk_device_errors
Definition: dpdk.h:474
#define ETHERNET_INTERFACE_FLAG_MTU
Definition: ethernet.h:166
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
Definition: ethernet.h:163
vlib_vmbus_addr_t * blacklist_by_vmbus_addr
Definition: dpdk.h:334
u8 coremask_set_manually
Definition: dpdk.h:313
__clib_export uword clib_mem_get_default_hugepage_size(void)
Definition: mem.c:79
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:442
static uword round_pow2(uword x, uword pow2)
Definition: clib.h:279
format_function_t format_dpdk_rss_hf_name
Definition: dpdk.h:480
static void dpdk_bind_vmbus_devices_to_uio(dpdk_config_main_t *conf)
Definition: init.c:1063
u8 enable_telemetry
Definition: dpdk.h:305
u8 * interface_name_suffix
Definition: dpdk.h:211
uword unformat_vlib_cli_sub_input(unformat_input_t *i, va_list *args)
Definition: cli.c:163
#define hash_create(elts, value_bytes)
Definition: hash.h:696
#define ASSERT(truth)
format_function_t format_dpdk_device_name
Definition: dpdk.h:472
static uword * mhash_get(mhash_t *h, const void *key)
Definition: mhash.h:110
static uword clib_file_add(clib_file_main_t *um, clib_file_t *template)
Definition: file.h:96
vlib_log_class_t log_default
Definition: dpdk.h:382
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: init.c:1833
Bitmaps built as vectors of machine words.
#define clib_error_report(e)
Definition: error.h:113
#define clib_bitmap_free(v)
Free a bitmap.
Definition: bitmap.h:92
#define DPDK_LINK_POLL_INTERVAL
Definition: dpdk.h:250
dpdk_main_t dpdk_main
Definition: init.c:48
struct rte_eth_link link
Definition: dpdk.h:231
#define DPDK_MAX_SIMD_BITWIDTH_512
Definition: dpdk.h:310
u8 * name
Definition: dpdk.h:210
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:38
vl_api_ip4_address_t hi
Definition: arp.api:37
dpdk_port_type_t
Definition: dpdk.h:107
static int dpdk_port_crc_strip_enabled(dpdk_device_t *xd)
Definition: init.c:149
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
Definition: bitmap.h:468
dpdk_device_addr_type_t
Definition: dpdk.h:260
void vnet_hw_if_set_input_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: rx_queue.c:157
Definition: defs.h:47
#define DPDK_STATS_POLL_INTERVAL
Definition: dpdk.h:247
vlib_vmbus_addr_t * vlib_vmbus_get_all_dev_addrs()
Definition: vmbus.c:390
unformat_function_t unformat_line
Definition: format.h:272
static vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
Definition: init.c:1889
clib_error_t * dpdk_buffer_pools_create(vlib_main_t *vm)
Definition: buffer.c:438
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u8 admin_up_down_in_progress
Definition: dpdk.h:367
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, const u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:348
u32 queue_index
Definition: dpdk.h:170
#define STATIC_ASSERT(truth,...)
u8 no_tx_checksum_offload
Definition: dpdk.h:304
VLIB buffer representation.
Definition: buffer.h:111
u64 uword
Definition: types.h:112
static void unformat_free(unformat_input_t *i)
Definition: format.h:155
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
Definition: init.c:205
description no domain
Definition: map.api:415
#define clib_error_free(e)
Definition: error.h:86
#define DPDK_DEVICE_TSO_ON
Definition: dpdk.h:292
u32 buffer_flags_template
Definition: dpdk.h:361
#define vnet_buffer(b)
Definition: buffer.h:437
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
static clib_error_t * dpdk_device_config(dpdk_config_main_t *conf, void *addr, dpdk_device_addr_type_t addr_type, unformat_input_t *input, u8 is_default)
Definition: init.c:1160
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:56
#define foreach_dpdk_device_config_item
Definition: dpdk.h:253
f64 now
#define ETHERNET_MAX_PACKET_BYTES
Definition: ethernet.h:133
#define vec_foreach(var, vec)
Vector iterator.
__clib_export clib_error_t * clib_sysfs_read(char *file_name, char *fmt,...)
Definition: sysfs.c:51
#define DPDK_DEVICE_TSO_DEFAULT
Definition: dpdk.h:290
i8 cpu_socket
Definition: dpdk.h:204
uword * cpu_socket_bitmap
Definition: threads.h:307
Definition: file.h:51
dpdk_rx_queue_t * rx_queues
Definition: dpdk.h:185
u8 * uio_driver_name
Definition: dpdk.h:301
vlib_thread_registration_t ** registrations
Definition: threads.h:270
dpdk_device_addr_type_t dev_addr_type
Definition: dpdk.h:274
elog_main_t elog_main
Definition: main.h:300
u8 bus
Definition: pci_types.api:21
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
vnet_main_t * vnet_main
Definition: dpdk.h:375
u16 nb_tx_desc
Definition: dpdk.h:207
mhash_t device_config_index_by_vmbus_addr
Definition: dpdk.h:329
u16 device_class
Definition: pci.h:72
#define VNET_HW_IF_RXQ_THREAD_ANY
Definition: interface.h:598
uword * device_config_index_by_pci_addr
Definition: dpdk.h:328
uword unformat_skip_white_space(unformat_input_t *input)
Definition: unformat.c:821
static uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
__clib_export u8 * format_clib_error(u8 *s, va_list *va)
Definition: error.c:191
volatile u32 worker_thread_release
Definition: threads.h:313
#define dpdk_log_notice(...)
Definition: dpdk.h:438
clib_bitmap_t * rss_queues
Definition: dpdk.h:288
static void vnet_hw_interface_set_link_speed(vnet_main_t *vnm, u32 hw_if_index, u32 link_speed)
#define vlib_log_warn(...)
Definition: log.h:134
vlib_global_main_t vlib_global_main
Definition: main.c:1786
vnet_device_main_t vnet_device_main
Definition: devices.c:22
format_function_t format_vlib_pci_addr
Definition: pci.h:326
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
f64 link_state_poll_interval
Definition: dpdk.h:370
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
#define u8
Padding.
Definition: clib.h:121
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:163
dpdk_config_main_t * conf
Definition: dpdk.h:376
u32 ethernet_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:441
vlib_main_t * vlib_main
Definition: dpdk.h:374