FD.io VPP  v21.06-1-gbb7418cf9
Vector Packet Processing
input.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2018 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #include <poll.h>
19 #include <vlib/vlib.h>
20 #include <vlib/unix/unix.h>
21 #include <vlib/pci/pci.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/devices/devices.h>
25 #include "af_xdp.h"
26 
27 #define foreach_af_xdp_input_error \
28  _ (SYSCALL_REQUIRED, "syscall required") \
29  _ (SYSCALL_FAILURES, "syscall failures")
30 
31 typedef enum
32 {
33 #define _(f,s) AF_XDP_INPUT_ERROR_##f,
35 #undef _
38 
39 static __clib_unused char *af_xdp_input_error_strings[] = {
40 #define _(n,s) s,
42 #undef _
43 };
44 
47  u32 n_left, const u32 * bi, u32 next_index,
48  u32 hw_if_index)
49 {
50  u32 n_trace = vlib_get_trace_count (vm, node);
51 
52  if (PREDICT_TRUE (0 == n_trace))
53  return;
54 
55  while (n_trace && n_left)
56  {
57  vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
58  if (PREDICT_TRUE
59  (vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 0)))
60  {
62  vlib_add_trace (vm, node, b, sizeof (*tr));
63  tr->next_index = next_index;
64  tr->hw_if_index = hw_if_index;
65  n_trace--;
66  }
67  n_left--;
68  bi++;
69  }
70 
71  vlib_set_trace_count (vm, node, n_trace);
72 }
73 
76  const vlib_node_runtime_t * node,
77  af_xdp_device_t * ad, af_xdp_rxq_t * rxq,
78  const u32 n_alloc)
79 {
80  xsk_ring_prod__submit (&rxq->fq, n_alloc);
81 
82  if (AF_XDP_RXQ_MODE_INTERRUPT == rxq->mode ||
83  !xsk_ring_prod__needs_wakeup (&rxq->fq))
84  return;
85 
86  if (node)
87  vlib_error_count (vm, node->node_index,
88  AF_XDP_INPUT_ERROR_SYSCALL_REQUIRED, 1);
89 
91  {
92  struct pollfd fd = { .fd = rxq->xsk_fd, .events = POLLIN | POLLOUT };
93  int ret = poll (&fd, 1, 0);
95  if (PREDICT_FALSE (ret < 0))
96  {
97  /* something bad is happening */
98  if (node)
99  vlib_error_count (vm, node->node_index,
100  AF_XDP_INPUT_ERROR_SYSCALL_FAILURES, 1);
101  af_xdp_device_error (ad, "rx poll() failed");
102  }
103  }
104 }
105 
108  const vlib_node_runtime_t *node,
109  af_xdp_device_t *ad, af_xdp_rxq_t *rxq)
110 {
111  __u64 *fill;
112  const u32 size = rxq->fq.size;
113  const u32 mask = size - 1;
114  u32 bis[VLIB_FRAME_SIZE], *bi = bis;
115  u32 n_alloc, n, n_wrap;
116  u32 idx = 0;
117 
118  ASSERT (mask == rxq->fq.mask);
119 
120  /* do not enqueue more packet than ring space */
121  n_alloc = xsk_prod_nb_free (&rxq->fq, 16);
122  /* do not bother to allocate if too small */
123  if (n_alloc < 16)
124  return;
125 
126  n_alloc = clib_min (n_alloc, ARRAY_LEN (bis));
127  n_alloc = vlib_buffer_alloc_from_pool (vm, bis, n_alloc, ad->pool);
128  n = xsk_ring_prod__reserve (&rxq->fq, n_alloc, &idx);
129  ASSERT (n == n_alloc);
130 
131  fill = xsk_ring_prod__fill_addr (&rxq->fq, idx);
132  n = clib_min (n_alloc, size - (idx & mask));
133  n_wrap = n_alloc - n;
134 
135 #define bi2addr(bi) ((bi) << CLIB_LOG2_CACHE_LINE_BYTES)
136 
137 wrap_around:
138 
139  while (n >= 8)
140  {
141 #ifdef CLIB_HAVE_VEC256
142  u64x4 b0 = u64x4_from_u32x4 (*(u32x4u *) (bi + 0));
143  u64x4 b1 = u64x4_from_u32x4 (*(u32x4u *) (bi + 4));
144  *(u64x4u *) (fill + 0) = bi2addr (b0);
145  *(u64x4u *) (fill + 4) = bi2addr (b1);
146 #else
147  fill[0] = bi2addr (bi[0]);
148  fill[1] = bi2addr (bi[1]);
149  fill[2] = bi2addr (bi[2]);
150  fill[3] = bi2addr (bi[3]);
151  fill[4] = bi2addr (bi[4]);
152  fill[5] = bi2addr (bi[5]);
153  fill[6] = bi2addr (bi[6]);
154  fill[7] = bi2addr (bi[7]);
155 #endif
156  fill += 8;
157  bi += 8;
158  n -= 8;
159  }
160 
161  while (n >= 1)
162  {
163  fill[0] = bi2addr (bi[0]);
164  fill += 1;
165  bi += 1;
166  n -= 1;
167  }
168 
169  if (n_wrap)
170  {
171  fill = xsk_ring_prod__fill_addr (&rxq->fq, 0);
172  n = n_wrap;
173  n_wrap = 0;
174  goto wrap_around;
175  }
176 
177  af_xdp_device_input_refill_db (vm, node, ad, rxq, n_alloc);
178 }
179 
182  const u32 next_index, const u32 sw_if_index,
183  const u32 hw_if_index)
184 {
185  vlib_next_frame_t *nf;
186  vlib_frame_t *f;
188 
190  return;
191 
192  nf =
195  f = vlib_get_frame (vm, nf->frame);
197 
198  ef = vlib_frame_scalar_args (f);
199  ef->sw_if_index = sw_if_index;
200  ef->hw_if_index = hw_if_index;
201 }
202 
205  af_xdp_rxq_t *rxq, u32 *bis, const u32 n_rx,
206  vlib_buffer_t *bt, u32 idx)
207 {
209  u16 offs[VLIB_FRAME_SIZE], *off = offs;
210  u16 lens[VLIB_FRAME_SIZE], *len = lens;
211  const u32 mask = rxq->rx.mask;
212  u32 n = n_rx, *bi = bis, bytes = 0;
213 
214 #define addr2bi(addr) ((addr) >> CLIB_LOG2_CACHE_LINE_BYTES)
215 
216  while (n >= 1)
217  {
218  const struct xdp_desc *desc = xsk_ring_cons__rx_desc (&rxq->rx, idx);
219  const u64 addr = desc->addr;
220  bi[0] = addr2bi (xsk_umem__extract_addr (addr));
221  ASSERT (vlib_buffer_is_known (vm, bi[0]) ==
223  off[0] = xsk_umem__extract_offset (addr) - sizeof (vlib_buffer_t);
224  len[0] = desc->len;
225  idx = (idx + 1) & mask;
226  bi += 1;
227  off += 1;
228  len += 1;
229  n -= 1;
230  }
231 
232  vlib_get_buffers (vm, bis, bufs, n_rx);
233 
234  n = n_rx;
235  off = offs;
236  len = lens;
237 
238  while (n >= 8)
239  {
240  vlib_prefetch_buffer_header (b[4], LOAD);
241  vlib_buffer_copy_template (b[0], bt);
242  b[0]->current_data = off[0];
243  bytes += b[0]->current_length = len[0];
244 
245  vlib_prefetch_buffer_header (b[5], LOAD);
246  vlib_buffer_copy_template (b[1], bt);
247  b[1]->current_data = off[1];
248  bytes += b[1]->current_length = len[1];
249 
250  vlib_prefetch_buffer_header (b[6], LOAD);
251  vlib_buffer_copy_template (b[2], bt);
252  b[2]->current_data = off[2];
253  bytes += b[2]->current_length = len[2];
254 
255  vlib_prefetch_buffer_header (b[7], LOAD);
256  vlib_buffer_copy_template (b[3], bt);
257  b[3]->current_data = off[3];
258  bytes += b[3]->current_length = len[3];
259 
260  b += 4;
261  off += 4;
262  len += 4;
263  n -= 4;
264  }
265 
266  while (n >= 1)
267  {
268  vlib_buffer_copy_template (b[0], bt);
269  b[0]->current_data = off[0];
270  bytes += b[0]->current_length = len[0];
271  b += 1;
272  off += 1;
273  len += 1;
274  n -= 1;
275  }
276 
277  xsk_ring_cons__release (&rxq->rx, n_rx);
278  return bytes;
279 }
280 
284 {
285  vnet_main_t *vnm = vnet_get_main ();
286  af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid);
287  vlib_buffer_t bt;
288  u32 next_index, *to_next, n_left_to_next;
289  u32 n_rx_packets, n_rx_bytes;
290  u32 idx;
291 
292  n_rx_packets = xsk_ring_cons__peek (&rxq->rx, VLIB_FRAME_SIZE, &idx);
293 
294  if (PREDICT_FALSE (0 == n_rx_packets))
295  goto refill;
296 
298  next_index = ad->per_interface_next_index;
300  vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, &bt);
301 
302  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
303 
304  n_rx_bytes =
305  af_xdp_device_input_bufs (vm, ad, rxq, to_next, n_rx_packets, &bt, idx);
306  af_xdp_device_input_ethernet (vm, node, next_index, ad->sw_if_index,
307  ad->hw_if_index);
308 
309  vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
310 
311  af_xdp_device_input_trace (vm, node, n_rx_packets, to_next, next_index,
312  ad->hw_if_index);
313 
317  ad->hw_if_index, n_rx_packets, n_rx_bytes);
318 
319 refill:
320  af_xdp_device_input_refill_inline (vm, node, ad, rxq);
321 
322  return n_rx_packets;
323 }
324 
328 {
329  u32 n_rx = 0;
333 
334  vec_foreach (p, pv)
335  {
337  if ((ad->flags & AF_XDP_DEVICE_F_ADMIN_UP) == 0)
338  continue;
339  n_rx += af_xdp_device_input_inline (vm, node, frame, ad, p->queue_id);
340  }
341 
342  return n_rx;
343 }
344 
345 #ifndef CLIB_MARCH_VARIANT
346 void
348 {
349  vlib_main_t *vm = vlib_get_main ();
350  af_xdp_rxq_t *rxq;
351  vec_foreach (rxq, ad->rxqs)
352  af_xdp_device_input_refill_inline (vm, 0, ad, rxq);
353 }
354 #endif /* CLIB_MARCH_VARIANT */
355 
356 /* *INDENT-OFF* */
358  .name = "af_xdp-input",
359  .sibling_of = "device-input",
360  .format_trace = format_af_xdp_input_trace,
361  .type = VLIB_NODE_TYPE_INPUT,
362  .state = VLIB_NODE_STATE_DISABLED,
363  .n_errors = AF_XDP_INPUT_N_ERROR,
364  .error_strings = af_xdp_input_error_strings,
366 };
367 /* *INDENT-ON* */
368 
369 /*
370  * fd.io coding-style-patch-verification: ON
371  *
372  * Local Variables:
373  * eval: (c-set-style "gnu")
374  * End:
375  */
static_always_inline int clib_spinlock_trylock_if_init(clib_spinlock_t *p)
Definition: lock.h:113
#define clib_min(x, y)
Definition: clib.h:342
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:212
struct xsk_ring_cons rx
Definition: af_xdp.h:67
static_always_inline u32 af_xdp_device_input_bufs(vlib_main_t *vm, const af_xdp_device_t *ad, af_xdp_rxq_t *rxq, u32 *bis, const u32 n_rx, vlib_buffer_t *bt, u32 idx)
Definition: input.c:204
vlib_node_registration_t af_xdp_input_node
(constructor) VLIB_REGISTER_NODE (af_xdp_input_node)
Definition: input.c:357
vnet_interface_main_t interface_main
Definition: vnet.h:81
u32 per_interface_next_index
Definition: af_xdp.h:100
#define PREDICT_TRUE(x)
Definition: clib.h:125
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:119
unsigned long u64
Definition: types.h:89
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:57
vlib_increment_combined_counter(ccm, ti, sw_if_index, n_buffers, n_bytes)
#define VLIB_NODE_FLAG_TRACE_SUPPORTED
Definition: node.h:296
static_always_inline vnet_hw_if_rxq_poll_vector_t * vnet_hw_if_get_rxq_poll_vector(vlib_main_t *vm, vlib_node_runtime_t *node)
static_always_inline void clib_spinlock_unlock_if_init(clib_spinlock_t *p)
Definition: lock.h:129
u32 thread_index
Definition: main.h:213
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:122
static vlib_frame_t * vlib_get_frame(vlib_main_t *vm, vlib_frame_t *f)
Definition: node_funcs.h:273
static_always_inline void af_xdp_device_input_trace(vlib_main_t *vm, vlib_node_runtime_t *node, u32 n_left, const u32 *bi, u32 next_index, u32 hw_if_index)
Definition: input.c:46
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: nat44_ei.c:3048
struct xsk_ring_prod fq
Definition: af_xdp.h:68
static_always_inline uword af_xdp_device_input_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, af_xdp_device_t *ad, u16 qid)
Definition: input.c:282
#define af_xdp_device_error(dev, fmt,...)
Definition: af_xdp.h:45
#define VLIB_NODE_FN(node)
Definition: node.h:202
static vlib_buffer_known_state_t vlib_buffer_is_known(vlib_main_t *vm, u32 buffer_index)
Definition: buffer_funcs.h:529
vhost_vring_addr_t addr
Definition: vhost_user.h:130
vlib_buffer_t ** b
af_xdp_device_t * devices
Definition: af_xdp.h:128
af_xdp_rxq_t * rxqs
Definition: af_xdp.h:97
unsigned int u32
Definition: types.h:88
vlib_frame_t * f
vlib_buffer_t * buffer_template
Definition: af_xdp.h:99
#define static_always_inline
Definition: clib.h:112
vlib_get_buffers(vm, from, b, n_left_from)
#define ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX
Definition: ethernet.h:52
af_xdp_main_t af_xdp_main
Definition: device.c:31
vlib_combined_counter_main_t * combined_sw_if_counters
Definition: interface.h:1023
static_always_inline int vnet_device_input_have_features(u32 sw_if_index)
Definition: feature.h:336
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:231
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
void af_xdp_device_input_refill(af_xdp_device_t *ad)
Definition: input.c:347
#define vlib_get_new_next_frame(vm, node, next_index, vectors, n_vectors_left)
Definition: node_funcs.h:400
vnet_main_t * vnet_get_main(void)
static_always_inline void af_xdp_device_input_ethernet(vlib_main_t *vm, vlib_node_runtime_t *node, const u32 next_index, const u32 sw_if_index, const u32 hw_if_index)
Definition: input.c:181
#define VLIB_FRAME_SIZE
Definition: node.h:369
static vlib_next_frame_t * vlib_node_runtime_get_next_frame(vlib_main_t *vm, vlib_node_runtime_t *n, u32 next_index)
Definition: node_funcs.h:321
static __clib_unused char * af_xdp_input_error_strings[]
Definition: input.c:39
static_always_inline void af_xdp_device_input_refill_db(vlib_main_t *vm, const vlib_node_runtime_t *node, af_xdp_device_t *ad, af_xdp_rxq_t *rxq, const u32 n_alloc)
Definition: input.c:75
format_function_t format_af_xdp_input_trace
Definition: af_xdp.h:175
af_xdp_rxq_mode_t mode
Definition: af_xdp.h:75
vl_api_interface_index_t sw_if_index
Definition: wireguard.api:34
#define bi2addr(bi)
int xsk_fd
Definition: af_xdp.h:69
static_always_inline void vlib_buffer_copy_template(vlib_buffer_t *b, vlib_buffer_t *bt)
Definition: buffer_funcs.h:171
static __clib_warn_unused_result int vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
Definition: trace_funcs.h:153
unsigned short u16
Definition: types.h:57
u32 size
Definition: vhost_user.h:125
static_always_inline void af_xdp_device_input_refill_inline(vlib_main_t *vm, const vlib_node_runtime_t *node, af_xdp_device_t *ad, af_xdp_rxq_t *rxq)
Definition: input.c:107
#define PREDICT_FALSE(x)
Definition: clib.h:124
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
u32 node_index
Node index.
Definition: node.h:479
#define foreach_af_xdp_input_error
Definition: input.c:27
u32 n_left
u8 len
Definition: ip_types.api:103
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
vl_api_pnat_mask_t mask
Definition: pnat.api:45
#define addr2bi(addr)
static void * vlib_frame_scalar_args(vlib_frame_t *f)
Get pointer to frame scalar data.
Definition: node_funcs.h:315
#define ARRAY_LEN(x)
Definition: clib.h:70
af_xdp_input_error_t
Definition: input.c:31
#define ASSERT(truth)
vlib_frame_t * frame
Definition: node.h:397
u16 flags
Definition: node.h:379
vlib_put_next_frame(vm, node, next_index, 0)
nat44_ei_hairpin_src_next_t next_index
u32 hw_if_index
Definition: af_xdp.h:102
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:38
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
VLIB buffer representation.
Definition: buffer.h:111
u64 uword
Definition: types.h:112
u32 sw_if_index
Definition: af_xdp.h:101
u32 off
static_always_inline void vnet_feature_start_device_input_x1(u32 sw_if_index, u32 *next0, vlib_buffer_t *b0)
Definition: feature.h:343
#define vec_foreach(var, vec)
Vector iterator.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace.c:628
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
Definition: trace_funcs.h:226
app_main_t * am
Definition: application.c:489
vlib_buffer_t * bufs[VLIB_FRAME_SIZE]
u64x4
Definition: vector_avx2.h:142
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:111
clib_spinlock_t syscall_lock
Definition: af_xdp.h:66
static __clib_warn_unused_result u32 vlib_buffer_alloc_from_pool(vlib_main_t *vm, u32 *buffers, u32 n_buffers, u8 buffer_pool_index)
Allocate buffers from specific pool into supplied array.
Definition: buffer_funcs.h:597