FD.io VPP  v16.12-rc0-308-g931be3a
Vector Packet Processing
node.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/xxhash.h>
20 
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/devices/dpdk/dpdk.h>
24 #include <vnet/mpls/packet.h>
25 #include <vnet/handoff.h>
26 #include <vnet/devices/devices.h>
27 #include <vnet/feature/feature.h>
28 
29 #include "dpdk_priv.h"
30 
31 #ifndef MAX
32 #define MAX(a,b) ((a) < (b) ? (b) : (a))
33 #endif
34 
35 #ifndef MIN
36 #define MIN(a,b) ((a) < (b) ? (a) : (b))
37 #endif
38 
39 /*
40  * At least in certain versions of ESXi, vmware e1000's don't honor the
41  * "strip rx CRC" bit. Set this flag to work around that bug FOR UNIT TEST ONLY.
42  *
43  * If wireshark complains like so:
44  *
45  * "Frame check sequence: 0x00000000 [incorrect, should be <hex-num>]"
46  * and you're using ESXi emulated e1000's, set this flag FOR UNIT TEST ONLY.
47  *
48  * Note: do NOT check in this file with this workaround enabled! You'll lose
49  * actual data from e.g. 10xGE interfaces. The extra 4 bytes annoy
50  * wireshark, but they're harmless...
51  */
52 #define VMWARE_LENGTH_BUG_WORKAROUND 0
53 
54 static char *dpdk_error_strings[] = {
55 #define _(n,s) s,
57 #undef _
58 };
59 
60 always_inline int
61 dpdk_mbuf_is_ip4 (struct rte_mbuf *mb)
62 {
63  return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0;
64 }
65 
66 always_inline int
67 dpdk_mbuf_is_ip6 (struct rte_mbuf *mb)
68 {
69  return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0;
70 }
71 
72 always_inline int
74 {
76  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
77 }
78 
79 always_inline void
81  struct rte_mbuf *mb,
82  vlib_buffer_t * b0, u32 * next0,
83  u8 * error0)
84 {
85  u8 n0;
86  uint16_t mb_flags = mb->ol_flags;
87 
88  if (PREDICT_FALSE (mb_flags & (
89 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
90  PKT_EXT_RX_PKT_ERROR | PKT_EXT_RX_BAD_FCS |
91 #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
92  PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)))
93  {
94  /* some error was flagged. determine the drop reason */
96  *error0 =
97 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
98  (mb_flags & PKT_EXT_RX_PKT_ERROR) ? DPDK_ERROR_RX_PACKET_ERROR :
99  (mb_flags & PKT_EXT_RX_BAD_FCS) ? DPDK_ERROR_RX_BAD_FCS :
100 #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
101  (mb_flags & PKT_RX_IP_CKSUM_BAD) ? DPDK_ERROR_IP_CHECKSUM_ERROR :
102  (mb_flags & PKT_RX_L4_CKSUM_BAD) ? DPDK_ERROR_L4_CHECKSUM_ERROR :
103  DPDK_ERROR_NONE;
104  }
105  else
106  {
107  *error0 = DPDK_ERROR_NONE;
108  if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
109  {
110  n0 = xd->per_interface_next_index;
112  if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
113  vnet_buffer (b0)->handoff.next_index =
115  else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
116  vnet_buffer (b0)->handoff.next_index =
118  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
119  vnet_buffer (b0)->handoff.next_index =
121  else
122  vnet_buffer (b0)->handoff.next_index =
124  }
125  else
127  (mb_flags & PKT_RX_VLAN_PKT)))
129  else
130  {
131  if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
133  else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
135  else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
137  else
139  }
140  }
141  *next0 = n0;
142 }
143 
144 void
146  vlib_node_runtime_t * node,
147  dpdk_device_t * xd,
148  u16 queue_id, u32 * buffers, uword n_buffers)
149 {
150  vlib_main_t *vm = vlib_get_main ();
151  u32 *b, n_left;
152  u32 next0;
153 
154  n_left = n_buffers;
155  b = buffers;
156 
157  while (n_left >= 1)
158  {
159  u32 bi0;
160  vlib_buffer_t *b0;
162  struct rte_mbuf *mb;
163  u8 error0;
164 
165  bi0 = b[0];
166  n_left -= 1;
167 
168  b0 = vlib_get_buffer (vm, bi0);
169  mb = rte_mbuf_from_vlib_buffer (b0);
170  dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, &next0, &error0);
171  vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
172  t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
173  t0->queue_index = queue_id;
174  t0->device_index = xd->device_index;
175  t0->buffer_index = bi0;
176 
177  clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
178  clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
179  clib_memcpy (t0->buffer.pre_data, b0->data,
180  sizeof (t0->buffer.pre_data));
181  clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data));
182 
183 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
184  /*
185  * Clear overloaded TX offload flags when a DPDK driver
186  * is using them for RX flags (e.g. Cisco VIC Ethernet driver)
187  */
188  mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK;
189 #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
190 
191  b += 1;
192  }
193 }
194 
195 /*
196  * dpdk_efd_update_counters()
197  * Update EFD (early-fast-discard) counters
198  */
199 void
200 dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, u16 enabled)
201 {
202  if (enabled & DPDK_EFD_MONITOR_ENABLED)
203  {
204  u64 now = clib_cpu_time_now ();
205  if (xd->efd_agent.last_poll_time > 0)
206  {
207  u64 elapsed_time = (now - xd->efd_agent.last_poll_time);
208  if (elapsed_time > xd->efd_agent.max_poll_delay)
209  xd->efd_agent.max_poll_delay = elapsed_time;
210  }
211  xd->efd_agent.last_poll_time = now;
212  }
213 
214  xd->efd_agent.total_packet_cnt += n_buffers;
215  xd->efd_agent.last_burst_sz = n_buffers;
216 
217  if (n_buffers > xd->efd_agent.max_burst_sz)
218  xd->efd_agent.max_burst_sz = n_buffers;
219 
220  if (PREDICT_FALSE (n_buffers == VLIB_FRAME_SIZE))
221  {
224  }
225  else
226  {
228  }
229 }
230 
231 /* is_efd_discardable()
232  * returns non zero DPDK error if packet meets early-fast-discard criteria,
233  * zero otherwise
234  */
235 u32
237  vlib_buffer_t * b0, struct rte_mbuf *mb)
238 {
240 
241  if (eh->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
242  {
243  ip4_header_t *ipv4 =
244  (ip4_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
245  u8 pkt_prec = (ipv4->tos >> 5);
246 
247  return (tm->efd.ip_prec_bitmap & (1 << pkt_prec) ?
248  DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE);
249  }
250  else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_IP6))
251  {
252  ip6_header_t *ipv6 =
253  (ip6_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
254  u8 pkt_tclass =
255  ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff);
256 
257  return (tm->efd.ip_prec_bitmap & (1 << pkt_tclass) ?
258  DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE);
259  }
260  else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_MPLS_UNICAST))
261  {
262  mpls_unicast_header_t *mpls =
263  (mpls_unicast_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
264  u8 pkt_exp = ((mpls->label_exp_s_ttl >> 9) & 0x07);
265 
266  return (tm->efd.mpls_exp_bitmap & (1 << pkt_exp) ?
267  DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE);
268  }
269  else if ((eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_VLAN)) ||
270  (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD)))
271  {
272  ethernet_vlan_header_t *vlan =
273  (ethernet_vlan_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
274  u8 pkt_cos = ((vlan->priority_cfi_and_id >> 13) & 0x07);
275 
276  return (tm->efd.vlan_cos_bitmap & (1 << pkt_cos) ?
277  DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE);
278  }
279 
280  return DPDK_ERROR_NONE;
281 }
282 
283 static inline u32
285 {
286  u32 n_buffers;
287  u32 n_left;
288  u32 n_this_chunk;
289 
290  n_left = VLIB_FRAME_SIZE;
291  n_buffers = 0;
292 
294  {
295  while (n_left)
296  {
297  n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id,
298  xd->rx_vectors[queue_id] +
299  n_buffers, n_left);
300  n_buffers += n_this_chunk;
301  n_left -= n_this_chunk;
302 
303  /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */
304  if (n_this_chunk < 32)
305  break;
306  }
307  }
308  else
309  {
310  ASSERT (0);
311  }
312 
313  return n_buffers;
314 }
315 
316 /*
317  * This function is used when there are no worker threads.
318  * The main thread performs IO and forwards the packets.
319  */
320 static inline u32
322  dpdk_device_t * xd,
323  vlib_node_runtime_t * node,
324  u32 cpu_index, u16 queue_id, int use_efd)
325 {
326  u32 n_buffers;
328  u32 n_left_to_next, *to_next;
329  u32 mb_index;
330  vlib_main_t *vm = vlib_get_main ();
331  uword n_rx_bytes = 0;
332  u32 n_trace, trace_cnt __attribute__ ((unused));
334  u8 efd_discard_burst = 0;
335  u32 buffer_flags_template;
336 
337  if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
338  return 0;
339 
340  n_buffers = dpdk_rx_burst (dm, xd, queue_id);
341 
342  if (n_buffers == 0)
343  {
344  /* check if EFD (dpdk) is enabled */
345  if (PREDICT_FALSE (use_efd && dm->efd.enabled))
346  {
347  /* reset a few stats */
348  xd->efd_agent.last_poll_time = 0;
349  xd->efd_agent.last_burst_sz = 0;
350  }
351  return 0;
352  }
353 
354  buffer_flags_template = dm->buffer_flags_template;
355 
357  trace_cnt = n_trace = vlib_get_trace_count (vm, node);
358 
360 
361  /* Check for congestion if EFD (Early-Fast-Discard) is enabled
362  * in any mode (e.g. dpdk, monitor, or drop_all)
363  */
364  if (PREDICT_FALSE (use_efd && dm->efd.enabled))
365  {
366  /* update EFD counters */
367  dpdk_efd_update_counters (xd, n_buffers, dm->efd.enabled);
368 
370  {
371  /* discard all received packets */
372  for (mb_index = 0; mb_index < n_buffers; mb_index++)
373  rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]);
374 
375  xd->efd_agent.discard_cnt += n_buffers;
377  DPDK_ERROR_VLAN_EFD_DROP_PKTS,
378  n_buffers);
379 
380  return 0;
381  }
382 
385  {
386  u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index,
387  queue_id);
388  if (device_queue_sz >= dm->efd.queue_hi_thresh)
389  {
390  /* dpdk device queue has reached the critical threshold */
392 
393  /* apply EFD to packets from the burst */
394  efd_discard_burst = 1;
395  }
396  }
397  }
398 
399  mb_index = 0;
400 
401  while (n_buffers > 0)
402  {
403  u32 bi0, next0;
404  u8 error0;
405  u32 l3_offset0;
406  vlib_buffer_t *b0, *b_seg, *b_chain = 0;
407  u32 cntr_type;
408 
409  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
410 
411  while (n_buffers > 0 && n_left_to_next > 0)
412  {
413  u8 nb_seg = 1;
414  struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index];
415  struct rte_mbuf *mb_seg = mb->next;
416 
417  if (PREDICT_TRUE (n_buffers > 2))
418  {
419  struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index + 2];
421  CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, STORE);
423  }
424 
425  ASSERT (mb);
426 
427  b0 = vlib_buffer_from_rte_mbuf (mb);
428 
429  /* check whether EFD is looking for packets to discard */
430  if (PREDICT_FALSE (efd_discard_burst))
431  {
433 
434  if (PREDICT_TRUE (cntr_type = is_efd_discardable (tm, b0, mb)))
435  {
436  rte_pktmbuf_free (mb);
437  xd->efd_agent.discard_cnt++;
438  increment_efd_drop_counter (vm, cntr_type, 1);
439  n_buffers--;
440  mb_index++;
441  continue;
442  }
443  }
444 
445  /* Prefetch one next segment if it exists. */
446  if (PREDICT_FALSE (mb->nb_segs > 1))
447  {
448  struct rte_mbuf *pfmb = mb->next;
450  CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD);
452  b_chain = b0;
453  }
454 
456 
457  bi0 = vlib_get_buffer_index (vm, b0);
458 
459  to_next[0] = bi0;
460  to_next++;
461  n_left_to_next--;
462 
464  &next0, &error0);
465 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
466  /*
467  * Clear overloaded TX offload flags when a DPDK driver
468  * is using them for RX flags (e.g. Cisco VIC Ethernet driver)
469  */
470 
471  if (PREDICT_TRUE (trace_cnt == 0))
472  mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK;
473  else
474  trace_cnt--;
475 #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
476 
477  b0->error = node->errors[error0];
478 
479  l3_offset0 = ((next0 == VNET_DEVICE_INPUT_NEXT_IP4_INPUT ||
482  sizeof (ethernet_header_t) : 0);
483 
484  b0->current_data = l3_offset0;
485  /* Some drivers like fm10k receive frames with
486  mb->data_off > RTE_PKTMBUF_HEADROOM */
487  b0->current_data += mb->data_off - RTE_PKTMBUF_HEADROOM;
488  b0->current_length = mb->data_len - l3_offset0;
489 
490  b0->flags = buffer_flags_template;
491 
493  b0->current_length -= 4;
494 
495  vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
496  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
497  n_rx_bytes += mb->pkt_len;
498 
499  /* Process subsequent segments of multi-segment packets */
500  while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs))
501  {
502  ASSERT (mb_seg != 0);
503 
504  b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
505  vlib_buffer_init_for_free_list (b_seg, fl);
506 
507  ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
508  ASSERT (b_seg->current_data == 0);
509 
510  /*
511  * The driver (e.g. virtio) may not put the packet data at the start
512  * of the segment, so don't assume b_seg->current_data == 0 is correct.
513  */
514  b_seg->current_data =
515  (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
516 
517  b_seg->current_length = mb_seg->data_len;
518  b0->total_length_not_including_first_buffer += mb_seg->data_len;
519 
520  b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
521  b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
522 
523  b_chain = b_seg;
524  mb_seg = mb_seg->next;
525  nb_seg++;
526  }
527 
528  /*
529  * Turn this on if you run into
530  * "bad monkey" contexts, and you want to know exactly
531  * which nodes they've visited... See main.c...
532  */
534 
535  /* Do we have any driver RX features configured on the interface? */
536  vnet_feature_device_input_redirect_x1 (node, xd->vlib_sw_if_index, &next0, b0, l3_offset0);
537 
538  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
539  to_next, n_left_to_next,
540  bi0, next0);
541  if (PREDICT_FALSE (n_trace > mb_index))
542  vec_add1 (xd->d_trace_buffers, bi0);
543  n_buffers--;
544  mb_index++;
545  }
546  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
547  }
548 
549  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers) > 0))
550  {
551  dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers,
552  vec_len (xd->d_trace_buffers));
553  vlib_set_trace_count (vm, node,
554  n_trace - vec_len (xd->d_trace_buffers));
555  }
556 
558  (vnet_get_main ()->interface_main.combined_sw_if_counters
560  cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
561 
562  dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index);
563  dw->aggregate_rx_packets += mb_index;
564 
565  return mb_index;
566 }
567 
568 static inline void
570 {
571  /* Limit the poll rate by sleeping for N msec between polls */
572  if (PREDICT_FALSE (dm->poll_sleep != 0))
573  {
574  struct timespec ts, tsrem;
575 
576  ts.tv_sec = 0;
577  ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */
578 
579  while (nanosleep (&ts, &tsrem) < 0)
580  {
581  ts = tsrem;
582  }
583  }
584 }
585 
586 /** \brief Main DPDK input node
587  @node dpdk-input
588 
589  This is the main DPDK input node: across each assigned interface,
590  call rte_eth_rx_burst(...) or similar to obtain a vector of
591  packets to process. Handle early packet discard. Derive @c
592  vlib_buffer_t metadata from <code>struct rte_mbuf</code> metadata,
593  Depending on the resulting metadata: adjust <code>b->current_data,
594  b->current_length </code> and dispatch directly to
595  ip4-input-no-checksum, or ip6-input. Trace the packet if required.
596 
597  @param vm vlib_main_t corresponding to the current thread
598  @param node vlib_node_runtime_t
599  @param f vlib_frame_t input-node, not used.
600 
601  @par Graph mechanics: buffer metadata, next index usage
602 
603  @em Uses:
604  - <code>struct rte_mbuf mb->ol_flags</code>
605  - PKT_EXT_RX_PKT_ERROR, PKT_EXT_RX_BAD_FCS
606  PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD
607  - <code> RTE_ETH_IS_xxx_HDR(mb->packet_type) </code>
608  - packet classification result
609 
610  @em Sets:
611  - <code>b->error</code> if the packet is to be dropped immediately
612  - <code>b->current_data, b->current_length</code>
613  - adjusted as needed to skip the L2 header in direct-dispatch cases
614  - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
615  - rx interface sw_if_index
616  - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
617  - required by ipX-lookup
618  - <code>b->flags</code>
619  - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
620 
621  <em>Next Nodes:</em>
622  - Static arcs to: error-drop, ethernet-input,
623  ip4-input-no-checksum, ip6-input, mpls-input
624  - per-interface redirection, controlled by
625  <code>xd->per_interface_next_index</code>
626 */
627 
628 static uword
630 {
631  dpdk_main_t *dm = &dpdk_main;
632  dpdk_device_t *xd;
633  uword n_rx_packets = 0;
635  u32 cpu_index = os_get_cpu_number ();
636 
637  /*
638  * Poll all devices on this cpu for input/interrupts.
639  */
640  /* *INDENT-OFF* */
641  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
642  {
643  xd = vec_elt_at_index(dm->devices, dq->device);
644  ASSERT(dq->queue_id == 0);
645  n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0);
646  }
647  /* *INDENT-ON* */
648 
649  poll_rate_limit (dm);
650 
651  return n_rx_packets;
652 }
653 
654 uword
656  vlib_node_runtime_t * node, vlib_frame_t * f)
657 {
658  dpdk_main_t *dm = &dpdk_main;
659  dpdk_device_t *xd;
660  uword n_rx_packets = 0;
662  u32 cpu_index = os_get_cpu_number ();
663 
664  /*
665  * Poll all devices on this cpu for input/interrupts.
666  */
667  /* *INDENT-OFF* */
668  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
669  {
670  xd = vec_elt_at_index(dm->devices, dq->device);
671  n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0);
672  }
673  /* *INDENT-ON* */
674 
675  poll_rate_limit (dm);
676 
677  return n_rx_packets;
678 }
679 
680 uword
682  vlib_node_runtime_t * node, vlib_frame_t * f)
683 {
684  dpdk_main_t *dm = &dpdk_main;
685  dpdk_device_t *xd;
686  uword n_rx_packets = 0;
688  u32 cpu_index = os_get_cpu_number ();
689 
690  /*
691  * Poll all devices on this cpu for input/interrupts.
692  */
693  /* *INDENT-OFF* */
694  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
695  {
696  xd = vec_elt_at_index(dm->devices, dq->device);
697  n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1);
698  }
699  /* *INDENT-ON* */
700 
701  poll_rate_limit (dm);
702 
703  return n_rx_packets;
704 }
705 
706 /* *INDENT-OFF* */
708  .function = dpdk_input,
709  .type = VLIB_NODE_TYPE_INPUT,
710  .name = "dpdk-input",
711 
712  /* Will be enabled if/when hardware is detected. */
713  .state = VLIB_NODE_STATE_DISABLED,
714 
715  .format_buffer = format_ethernet_header_with_length,
716  .format_trace = format_dpdk_rx_dma_trace,
717 
718  .n_errors = DPDK_N_ERROR,
719  .error_strings = dpdk_error_strings,
720 
721  .n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES,
722  .next_nodes = VNET_DEVICE_INPUT_NEXT_NODES,
723 };
724 
725 
726 /* handle dpdk_input_rss alternative function */
730 
731 /* this macro defines dpdk_input_rss_multiarch_select() */
735 
736 /*
737  * set_efd_bitmap()
738  * Based on the operation type, set lower/upper bits for the given index value
739  */
740 void
741 set_efd_bitmap (u8 * bitmap, u32 value, u32 op)
742 {
743  int ix;
744 
745  *bitmap = 0;
746  for (ix = 0; ix < 8; ix++)
747  {
748  if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) ||
749  ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value)))
750  {
751  (*bitmap) |= (1 << ix);
752  }
753  }
754 }
755 
756 void
757 efd_config (u32 enabled,
758  u32 ip_prec, u32 ip_op,
759  u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op)
760 {
762  dpdk_main_t *dm = &dpdk_main;
763 
764  if (enabled)
765  {
768  }
769  else
770  {
773  }
774 
775  set_efd_bitmap (&tm->efd.ip_prec_bitmap, ip_prec, ip_op);
776  set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op);
777  set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op);
778 }
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:457
static void dpdk_rx_next_and_error_from_mb_flags_x1(dpdk_device_t *xd, struct rte_mbuf *mb, vlib_buffer_t *b0, u32 *next0, u8 *error0)
Definition: node.c:80
void dpdk_rx_trace(dpdk_main_t *dm, vlib_node_runtime_t *node, dpdk_device_t *xd, u16 queue_id, u32 *buffers, uword n_buffers)
Definition: node.c:145
u16 enabled
Definition: dpdk.h:299
uword dpdk_input_rss(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: node.c:655
#define rte_mbuf_from_vlib_buffer(x)
Definition: buffer.h:385
static int vlib_buffer_is_mpls(vlib_buffer_t *b)
Definition: node.c:73
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:143
u16 max_burst_sz
Definition: dpdk.h:138
void dpdk_efd_update_counters(dpdk_device_t *xd, u32 n_buffers, u16 enabled)
Definition: node.c:200
dpdk_main_t dpdk_main
Definition: dpdk.h:476
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
static u32 dpdk_rx_burst(dpdk_main_t *dm, dpdk_device_t *xd, u16 queue_id)
Definition: node.c:284
#define VLIB_EFD_DISCARD_ENABLED
Definition: threads.h:246
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:402
#define PREDICT_TRUE(x)
Definition: clib.h:98
u64 last_poll_time
Definition: dpdk.h:142
#define foreach_dpdk_error
Definition: dpdk.h:519
u16 flags
Definition: dpdk.h:215
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:482
static u64 clib_cpu_time_now(void)
Definition: time.h:73
#define BUFFER_HANDOFF_NEXT_VALID
Definition: buffer.h:68
u32 per_interface_next_index
Definition: dpdk.h:203
static int dpdk_mbuf_is_ip6(struct rte_mbuf *mb)
Definition: node.c:67
u32 congestion_cnt
Definition: dpdk.h:141
static void poll_rate_limit(dpdk_main_t *dm)
Definition: node.c:569
static char * dpdk_error_strings[]
Definition: node.c:54
vlib_error_t * errors
Definition: node.h:419
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define DPDK_DEVICE_FLAG_PMD
Definition: dpdk.h:218
static u32 dpdk_device_input(dpdk_main_t *dm, dpdk_device_t *xd, vlib_node_runtime_t *node, u32 cpu_index, u16 queue_id, int use_efd)
Definition: node.c:321
u8 mpls_exp_bitmap
Definition: threads.h:257
static void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
Definition: trace_funcs.h:104
static void vlib_buffer_init_for_free_list(vlib_buffer_t *_dst, vlib_buffer_free_list_t *fl)
Definition: buffer_funcs.h:606
vnet_main_t * vnet_get_main(void)
Definition: misc.c:46
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:78
u8 data[256]
Definition: dpdk.h:495
static int dpdk_mbuf_is_ip4(struct rte_mbuf *mb)
Definition: node.c:61
#define always_inline
Definition: clib.h:84
void efd_config(u32 enabled, u32 ip_prec, u32 ip_op, u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op)
Definition: node.c:757
static_always_inline void vnet_feature_device_input_redirect_x1(vlib_node_runtime_t *node, u32 sw_if_index, u32 *next0, vlib_buffer_t *b0, u16 buffer_advanced0)
Definition: feature.h:158
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]
Space for inserting data before buffer start.
Definition: buffer.h:146
unsigned long u64
Definition: types.h:89
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:707
#define VMWARE_LENGTH_BUG_WORKAROUND
Definition: node.c:52
u32 device_index
Definition: dpdk.h:197
dpdk_worker_t * workers
Definition: dpdk.h:423
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:82
#define VLIB_BUFFER_NEXT_PRESENT
Definition: buffer.h:97
struct rte_mbuf mb
Definition: dpdk.h:493
u16 consec_full_frames_hi_thresh
Definition: dpdk.h:301
u32 vlib_sw_if_index
Definition: dpdk.h:200
format_function_t format_dpdk_rx_dma_trace
Definition: dpdk.h:608
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:82
dpdk_device_and_queue_t ** devices_by_cpu
Definition: dpdk.h:407
u64 max_poll_delay
Definition: dpdk.h:143
static_always_inline void increment_efd_drop_counter(vlib_main_t *vm, u32 counter_index, u32 count)
Definition: dpdk.h:545
u32 consec_full_frames_cnt
Definition: dpdk.h:140
#define DPDK_EFD_DISCARD_ENABLED
Definition: dpdk.h:290
uword os_get_cpu_number(void)
Definition: unix-misc.c:224
u16 last_burst_sz
Definition: dpdk.h:137
u32 * d_trace_buffers
Definition: dpdk.h:210
#define DPDK_EFD_MONITOR_ENABLED
Definition: dpdk.h:291
#define PREDICT_FALSE(x)
Definition: clib.h:97
u32 full_frames_cnt
Definition: dpdk.h:139
#define VNET_DEVICE_INPUT_NEXT_NODES
Definition: devices.h:32
#define VLIB_FRAME_SIZE
Definition: node.h:328
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:216
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:350
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:121
u64 aggregate_rx_packets
Definition: dpdk.h:271
#define DPDK_DEVICE_FLAG_ADMIN_UP
Definition: dpdk.h:216
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
Definition: format.c:115
dpdk_device_t * devices
Definition: dpdk.h:406
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
u8 ip_prec_bitmap
Definition: threads.h:256
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:207
u16 queue_hi_thresh
Definition: dpdk.h:300
u8 vlan_cos_bitmap
Definition: threads.h:258
#define clib_memcpy(a, b, c)
Definition: string.h:64
#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn)
Definition: node.h:157
#define EFD_OPERATION_GREATER_OR_EQUAL
Definition: dpdk.h:581
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:306
u32 discard_cnt
Definition: dpdk.h:144
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 cpu_index, u32 index, u32 packet_increment, u32 byte_increment)
Increment a combined counter.
Definition: counter.h:241
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
#define vnet_buffer(b)
Definition: buffer.h:333
u32 poll_sleep
Definition: dpdk.h:468
dpdk_efd_t efd
Definition: dpdk.h:445
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:117
mpls_label_t label_exp_s_ttl
Definition: packet.h:31
#define DPDK_DEVICE_FLAG_HAVE_SUBIF
Definition: dpdk.h:220
u64 uword
Definition: types.h:112
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:112
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:287
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
vlib_buffer_t buffer
Definition: dpdk.h:494
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
unsigned char u8
Definition: types.h:56
dpdk_efd_agent_t efd_agent
Definition: dpdk.h:257
#define DPDK_EFD_DROPALL_ENABLED
Definition: dpdk.h:292
u32 buffer_flags_template
Definition: dpdk.h:414
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
#define vlib_buffer_from_rte_mbuf(x)
Definition: buffer.h:386
u8 data[0]
Packet data.
Definition: buffer.h:154
#define vec_foreach(var, vec)
Vector iterator.
#define EFD_OPERATION_LESS_THAN
Definition: dpdk.h:580
u32 total_packet_cnt
Definition: dpdk.h:145
static vlib_buffer_free_list_t * vlib_buffer_get_free_list(vlib_main_t *vm, u32 free_list_index)
Definition: buffer_funcs.h:337
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
Definition: trace_funcs.h:159
u32 is_efd_discardable(vlib_thread_main_t *tm, vlib_buffer_t *b0, struct rte_mbuf *mb)
Definition: node.c:236
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
CLIB_MULTIARCH_SELECT_FN(dpdk_input)
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:85
static uword dpdk_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Main DPDK input node.
Definition: node.c:629
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:69
u16 enabled
Definition: threads.h:254
uword dpdk_input_efd(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: node.c:681
vlib_efd_t efd
Definition: threads.h:322
Definition: defs.h:46
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
void set_efd_bitmap(u8 *bitmap, u32 value, u32 op)
Definition: node.c:741