FD.io VPP  v19.04-6-g6f05f72
Vector Packet Processing
input.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2018 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #include <vlib/vlib.h>
19 #include <vlib/unix/unix.h>
20 #include <vlib/pci/pci.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/devices/devices.h>
23 
24 #include <rdma/rdma.h>
25 
26 #define foreach_rdma_input_error \
27  _(BUFFER_ALLOC, "buffer alloc error")
28 
29 typedef enum
30 {
31 #define _(f,s) RDMA_INPUT_ERROR_##f,
33 #undef _
36 
37 static __clib_unused char *rdma_input_error_strings[] = {
38 #define _(n,s) s,
40 #undef _
41 };
42 
45  rdma_rxq_t * rxq)
46 {
47  u32 n_alloc, n;
48  u32 buffers[VLIB_FRAME_SIZE], *bi = buffers;
49  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
50  struct ibv_recv_wr wr[VLIB_FRAME_SIZE], *w = wr;
51  struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
52 
53  if (PREDICT_FALSE (rxq->n_enq >= rxq->size))
54  return;
55 
56  n_alloc = clib_min (VLIB_FRAME_SIZE, rxq->size - rxq->n_enq);
57  n_alloc = n = vlib_buffer_alloc (vm, buffers, n_alloc);
58  vlib_get_buffers (vm, buffers, bufs, n_alloc);
59 
60  while (n >= 4)
61  {
62  if (PREDICT_TRUE (n >= 8))
63  {
64  CLIB_PREFETCH (&s[4 + 0], 4 * sizeof (s[0]), STORE);
65  CLIB_PREFETCH (&w[4 + 0], 4 * sizeof (w[0]), STORE);
66  }
67 
68  s[0].addr = vlib_buffer_get_va (b[0]);
69  s[0].length = vlib_buffer_get_default_data_size (vm);
70  s[0].lkey = rd->mr->lkey;
71 
72  s[1].addr = vlib_buffer_get_va (b[1]);
73  s[1].length = vlib_buffer_get_default_data_size (vm);
74  s[1].lkey = rd->mr->lkey;
75 
76  s[2].addr = vlib_buffer_get_va (b[2]);
77  s[2].length = vlib_buffer_get_default_data_size (vm);
78  s[2].lkey = rd->mr->lkey;
79 
80  s[3].addr = vlib_buffer_get_va (b[3]);
81  s[3].length = vlib_buffer_get_default_data_size (vm);
82  s[3].lkey = rd->mr->lkey;
83 
84  w[0].wr_id = bi[0];
85  w[0].next = &w[0] + 1;
86  w[0].sg_list = &s[0];
87  w[0].num_sge = 1;
88 
89  w[1].wr_id = bi[1];
90  w[1].next = &w[1] + 1;
91  w[1].sg_list = &s[1];
92  w[1].num_sge = 1;
93 
94  w[2].wr_id = bi[2];
95  w[2].next = &w[2] + 1;
96  w[2].sg_list = &s[2];
97  w[2].num_sge = 1;
98 
99  w[3].wr_id = bi[3];
100  w[3].next = &w[3] + 1;
101  w[3].sg_list = &s[3];
102  w[3].num_sge = 1;
103 
104  s += 4;
105  bi += 4;
106  w += 4;
107  b += 4;
108  n -= 4;
109  }
110 
111  while (n >= 1)
112  {
113  s[0].addr = vlib_buffer_get_va (b[0]);
114  s[0].length = vlib_buffer_get_default_data_size (vm);
115  s[0].lkey = rd->mr->lkey;
116 
117  w[0].wr_id = bi[0];
118  w[0].next = &w[0] + 1;
119  w[0].sg_list = &s[0];
120  w[0].num_sge = 1;
121 
122  s += 1;
123  bi += 1;
124  w += 1;
125  b += 1;
126  n -= 1;
127  }
128 
129  w[-1].next = 0; /* fix next pointer in WR linked-list last item */
130 
131  w = wr;
132  ibv_post_wq_recv (rxq->wq, wr, &w);
133  n = wr == w ? n_alloc : (uintptr_t) (w - wr);
134 
135  if (PREDICT_FALSE (n != n_alloc))
136  vlib_buffer_free (vm, buffers + n, n_alloc - n);
137 
138  rxq->n_enq += n;
139 }
140 
143  const rdma_device_t * rd, u32 n_left, const u32 * bi)
144 {
145  u32 n_trace, i;
146 
147  if (PREDICT_TRUE (0 == (n_trace = vlib_get_trace_count (vm, node))))
148  return;
149 
150  i = 0;
151  while (n_trace && n_left)
152  {
153  vlib_buffer_t *b;
154  rdma_input_trace_t *tr;
155  b = vlib_get_buffer (vm, bi[0]);
157  /* follow_chain */ 0);
158  tr = vlib_add_trace (vm, node, b, sizeof (*tr));
160  tr->hw_if_index = rd->hw_if_index;
161 
162  /* next */
163  n_trace--;
164  n_left--;
165  bi++;
166  i++;
167  }
168  vlib_set_trace_count (vm, node, n_trace);
169 }
170 
173  const rdma_device_t * rd)
174 {
175  vlib_next_frame_t *nf;
176  vlib_frame_t *f;
178 
179  if (PREDICT_FALSE
181  return;
182 
183  nf =
185  f = vlib_get_frame (vm, nf->frame_index);
187  /* FIXME: f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; */
188 
189  ef = vlib_frame_scalar_args (f);
190  ef->sw_if_index = rd->sw_if_index;
191  ef->hw_if_index = rd->hw_if_index;
192 }
193 
195 rdma_device_input_load_wc (u32 n_left_from, struct ibv_wc * wc, u32 * to_next,
196  u32 * bufsz)
197 {
198  u32 n_rx_bytes[4] = { 0 };
199 
200  while (n_left_from >= 4)
201  {
202  if (PREDICT_TRUE (n_left_from >= 8))
203  {
204  CLIB_PREFETCH (&wc[4 + 0], CLIB_CACHE_LINE_BYTES, LOAD);
205  CLIB_PREFETCH (&wc[4 + 1], CLIB_CACHE_LINE_BYTES, LOAD);
206  CLIB_PREFETCH (&wc[4 + 2], CLIB_CACHE_LINE_BYTES, LOAD);
207  CLIB_PREFETCH (&wc[4 + 3], CLIB_CACHE_LINE_BYTES, LOAD);
208  CLIB_PREFETCH (&bufsz[4 + 0], 4 * sizeof (bufsz[0]), STORE);
209  CLIB_PREFETCH (&to_next[4 + 0], 4 * sizeof (to_next[0]), STORE);
210  }
211 
212  to_next[0] = wc[0].wr_id;
213  to_next[1] = wc[1].wr_id;
214  to_next[2] = wc[2].wr_id;
215  to_next[3] = wc[3].wr_id;
216 
217  bufsz[0] = wc[0].byte_len;
218  bufsz[1] = wc[1].byte_len;
219  bufsz[2] = wc[2].byte_len;
220  bufsz[3] = wc[3].byte_len;
221 
222  n_rx_bytes[0] += wc[0].byte_len;
223  n_rx_bytes[1] += wc[1].byte_len;
224  n_rx_bytes[2] += wc[2].byte_len;
225  n_rx_bytes[3] += wc[3].byte_len;
226 
227  wc += 4;
228  to_next += 4;
229  bufsz += 4;
230  n_left_from -= 4;
231  }
232 
233  while (n_left_from >= 1)
234  {
235  to_next[0] = wc[0].wr_id;
236  bufsz[0] = wc[0].byte_len;
237  n_rx_bytes[0] += wc[0].byte_len;
238 
239  wc += 1;
240  to_next += 1;
241  bufsz += 1;
242  n_left_from -= 1;
243  }
244 
245  return n_rx_bytes[0] + n_rx_bytes[1] + n_rx_bytes[2] + n_rx_bytes[3];
246 }
247 
250  u32 * bufsz, u32 sw_if_index)
251 {
252  while (n_left_from >= 4)
253  {
254  if (PREDICT_TRUE (n_left_from >= 8))
255  {
256  vlib_prefetch_buffer_header (bufs[4 + 0], STORE);
257  vlib_prefetch_buffer_header (bufs[4 + 1], STORE);
258  vlib_prefetch_buffer_header (bufs[4 + 2], STORE);
259  vlib_prefetch_buffer_header (bufs[4 + 3], STORE);
260  CLIB_PREFETCH (&bufsz[4 + 0], 4 * sizeof (bufsz[0]), LOAD);
261  }
262 
263  bufs[0]->current_length = bufsz[0];
264  bufs[1]->current_length = bufsz[1];
265  bufs[2]->current_length = bufsz[2];
266  bufs[3]->current_length = bufsz[3];
267 
268  vnet_buffer (bufs[0])->sw_if_index[VLIB_RX] = sw_if_index;
269  vnet_buffer (bufs[1])->sw_if_index[VLIB_RX] = sw_if_index;
270  vnet_buffer (bufs[2])->sw_if_index[VLIB_RX] = sw_if_index;
271  vnet_buffer (bufs[3])->sw_if_index[VLIB_RX] = sw_if_index;
272 
273  vnet_buffer (bufs[0])->sw_if_index[VLIB_TX] = ~0;
274  vnet_buffer (bufs[1])->sw_if_index[VLIB_TX] = ~0;
275  vnet_buffer (bufs[2])->sw_if_index[VLIB_TX] = ~0;
276  vnet_buffer (bufs[3])->sw_if_index[VLIB_TX] = ~0;
277 
278  bufs += 4;
279  bufsz += 4;
280  n_left_from -= 4;
281  }
282 
283  while (n_left_from >= 1)
284  {
285  bufs[0]->current_length = bufsz[0];
286  vnet_buffer (bufs[0])->sw_if_index[VLIB_RX] = sw_if_index;
287  vnet_buffer (bufs[0])->sw_if_index[VLIB_TX] = ~0;
288 
289  bufs += 1;
290  bufsz += 1;
291  n_left_from -= 1;
292  }
293 }
294 
297  vlib_frame_t * frame, rdma_device_t * rd, u16 qid)
298 {
299  vnet_main_t *vnm = vnet_get_main ();
300  rdma_rxq_t *rxq = vec_elt_at_index (rd->rxqs, qid);
301  struct ibv_wc wc[VLIB_FRAME_SIZE];
302  u32 bufsz[VLIB_FRAME_SIZE];
304  u32 *to_next, n_left_to_next;
305  u32 n_rx_packets, n_rx_bytes;
306 
307  n_rx_packets = ibv_poll_cq (rxq->cq, VLIB_FRAME_SIZE, wc);
308 
309  if (PREDICT_FALSE (n_rx_packets <= 0))
310  {
311  rdma_device_input_refill (vm, rd, rxq);
312  return 0;
313  }
314 
315  vlib_get_new_next_frame (vm, node, rd->per_interface_next_index, to_next,
316  n_left_to_next);
317  n_rx_bytes = rdma_device_input_load_wc (n_rx_packets, wc, to_next, bufsz);
318  vlib_get_buffers (vm, to_next, bufs, n_rx_packets);
319  rdma_device_input_bufs_init (n_rx_packets, bufs, bufsz, rd->sw_if_index);
320  rdma_device_input_trace (vm, node, rd, n_rx_packets, to_next);
321  rdma_device_input_ethernet (vm, node, rd);
322 
324  n_left_to_next - n_rx_packets);
325 
329  rd->hw_if_index, n_rx_packets, n_rx_bytes);
330 
331  rxq->n_enq -= n_rx_packets;
332 
333  rdma_device_input_refill (vm, rd, rxq);
334 
335  return n_rx_packets;
336 }
337 
339  vlib_node_runtime_t * node,
340  vlib_frame_t * frame)
341 {
342  u32 n_rx = 0;
343  rdma_main_t *rm = &rdma_main;
344  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
346 
348  {
349  rdma_device_t *rd;
350  rd = vec_elt_at_index (rm->devices, dq->dev_instance);
351  if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP))
352  n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id);
353  }
354  return n_rx;
355 }
356 
357 /* *INDENT-OFF* */
359  .name = "rdma-input",
360  .sibling_of = "device-input",
361  .format_trace = format_rdma_input_trace,
362  .type = VLIB_NODE_TYPE_INPUT,
363  .state = VLIB_NODE_STATE_DISABLED,
364  .n_errors = RDMA_INPUT_N_ERROR,
365  .error_strings = rdma_input_error_strings,
366 };
367 
368 /* *INDENT-ON* */
369 
370 
371 /*
372  * fd.io coding-style-patch-verification: ON
373  *
374  * Local Variables:
375  * eval: (c-set-style "gnu")
376  * End:
377  */
u32 sw_if_index
Definition: ipsec_gre.api:37
struct ibv_mr * mr
Definition: rdma.h:80
vnet_device_and_queue_t * devices_and_queues
Definition: devices.h:69
#define clib_min(x, y)
Definition: clib.h:295
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:156
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 thread_index, u32 index, u64 n_packets, u64 n_bytes)
Increment a combined counter.
Definition: counter.h:220
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:865
vnet_main_t * vnet_get_main(void)
Definition: misc.c:47
static_always_inline void rdma_device_input_trace(vlib_main_t *vm, vlib_node_runtime_t *node, const rdma_device_t *rd, u32 n_left, const u32 *bi)
Definition: input.c:142
vnet_interface_main_t interface_main
Definition: vnet.h:56
#define PREDICT_TRUE(x)
Definition: clib.h:112
u32 size
Definition: rdma.h:43
u32 dev_instance
Definition: rdma.h:65
u32 thread_index
Definition: main.h:197
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:113
struct ibv_wq * wq
Definition: rdma.h:46
u32 per_interface_next_index
Definition: rdma.h:63
format_function_t format_rdma_input_trace
Definition: rdma.h:121
int i
#define VLIB_NODE_FN(node)
Definition: node.h:201
static void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
Definition: trace_funcs.h:114
#define static_always_inline
Definition: clib.h:99
#define ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX
Definition: ethernet.h:52
rdma_device_t * devices
Definition: rdma.h:92
vlib_combined_counter_main_t * combined_sw_if_counters
Definition: interface.h:824
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:203
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define vlib_get_new_next_frame(vm, node, next_index, vectors, n_vectors_left)
Definition: node_funcs.h:373
unsigned int u32
Definition: types.h:88
#define VLIB_FRAME_SIZE
Definition: node.h:376
static vlib_next_frame_t * vlib_node_runtime_get_next_frame(vlib_main_t *vm, vlib_node_runtime_t *n, u32 next_index)
Definition: node_funcs.h:294
u32 flags
Definition: rdma.h:62
u32 frame_index
Definition: node.h:404
static_always_inline void rdma_device_input_ethernet(vlib_main_t *vm, vlib_node_runtime_t *node, const rdma_device_t *rd)
Definition: input.c:172
unsigned short u16
Definition: types.h:57
vlib_node_registration_t rdma_input_node
(constructor) VLIB_REGISTER_NODE (rdma_input_node)
Definition: input.c:358
struct ibv_cq * cq
Definition: rdma.h:45
#define PREDICT_FALSE(x)
Definition: clib.h:111
static vlib_frame_t * vlib_get_frame(vlib_main_t *vm, uword frame_index)
Definition: node_funcs.h:238
static_always_inline u32 rdma_device_input_load_wc(u32 n_left_from, struct ibv_wc *wc, u32 *to_next, u32 *bufsz)
Definition: input.c:195
static_always_inline u32 vlib_buffer_get_default_data_size(vlib_main_t *vm)
Definition: buffer_funcs.h:96
u32 hw_if_index
Definition: rdma.h:67
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
rdma_main_t rdma_main
Definition: device.c:46
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:80
vlib_main_t * vm
Definition: buffer.c:312
static_always_inline uword rdma_device_input_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, rdma_device_t *rd, u16 qid)
Definition: input.c:296
static_always_inline void rdma_device_input_refill(vlib_main_t *vm, rdma_device_t *rd, rdma_rxq_t *rxq)
Definition: input.c:44
static void * vlib_frame_scalar_args(vlib_frame_t *f)
Get pointer to frame scalar data.
Definition: node_funcs.h:288
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:465
static __clib_unused char * rdma_input_error_strings[]
Definition: input.c:37
u32 sw_if_index
Definition: rdma.h:66
u16 flags
Definition: node.h:386
rdma_rxq_t * rxqs
Definition: rdma.h:71
#define foreach_rdma_input_error
Definition: input.c:26
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:57
#define foreach_device_and_queue(var, vec)
Definition: devices.h:161
Definition: defs.h:47
VLIB buffer representation.
Definition: buffer.h:102
u64 uword
Definition: types.h:112
#define vnet_buffer(b)
Definition: buffer.h:369
static uword vlib_buffer_get_va(vlib_buffer_t *b)
Definition: buffer.h:217
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
Definition: trace_funcs.h:172
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
Definition: buffer_funcs.h:244
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
static u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:612
rdma_input_error_t
Definition: input.c:29
static_always_inline void rdma_device_input_bufs_init(u32 n_left_from, vlib_buffer_t **bufs, u32 *bufsz, u32 sw_if_index)
Definition: input.c:249
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:85
Definition: defs.h:46
u32 n_enq
Definition: rdma.h:44