FD.io VPP  v19.08.1-401-g8e4ed521a
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <math.h>
18 
19 typedef enum _tcp_output_next
20 {
27 
28 #define foreach_tcp4_output_next \
29  _ (DROP, "error-drop") \
30  _ (IP_LOOKUP, "ip4-lookup") \
31  _ (IP_REWRITE, "ip4-rewrite") \
32  _ (IP_ARP, "ip4-arp")
33 
34 #define foreach_tcp6_output_next \
35  _ (DROP, "error-drop") \
36  _ (IP_LOOKUP, "ip6-lookup") \
37  _ (IP_REWRITE, "ip6-rewrite") \
38  _ (IP_ARP, "ip6-discover-neighbor")
39 
40 static char *tcp_error_strings[] = {
41 #define tcp_error(n,s) s,
42 #include <vnet/tcp/tcp_error.def>
43 #undef tcp_error
44 };
45 
46 typedef struct
47 {
51 
52 static u8 *
53 format_tcp_tx_trace (u8 * s, va_list * args)
54 {
55  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
56  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
57  tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
58  u32 indent = format_get_indent (s);
59 
60  s = format (s, "%U\n%U%U",
61  format_tcp_header, &t->tcp_header, 128,
62  format_white_space, indent,
64 
65  return s;
66 }
67 
68 #ifndef CLIB_MARCH_VARIANT
69 static u8
71 {
72  u8 wnd_scale = 0;
73  while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
74  wnd_scale++;
75  return wnd_scale;
76 }
77 
78 /**
79  * TCP's initial window
80  */
83 {
84  /* RFC 6928 recommends the value lower. However at the time our connections
85  * are initialized, fifos may not be allocated. Therefore, advertise the
86  * smallest possible unscaled window size and update once fifos are
87  * assigned to the session.
88  */
89  /*
90  tcp_update_rcv_mss (tc);
91  TCP_IW_N_SEGMENTS * tc->mss;
92  */
93  return tcp_cfg.min_rx_fifo;
94 }
95 
96 /**
97  * Compute initial window and scale factor. As per RFC1323, window field in
98  * SYN and SYN-ACK segments is never scaled.
99  */
100 u32
102 {
103  /* Compute rcv wscale only if peer advertised support for it */
104  if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
105  tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
106 
107  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
108 
109  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
110 }
111 
112 static inline void
114 {
115  u32 available_space, wnd;
116  i32 observed_wnd;
117 
118  ASSERT (tc->rcv_opts.mss < transport_rx_fifo_size (&tc->connection));
119 
120  /*
121  * Figure out how much space we have available
122  */
123  available_space = transport_max_rx_enqueue (&tc->connection);
124  if (PREDICT_FALSE (available_space < tc->rcv_opts.mss))
125  {
126  tc->rcv_wnd = 0;
127  return;
128  }
129 
130  /*
131  * Use the above and what we know about what we've previously advertised
132  * to compute the new window
133  */
134  observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
135 
136  /* Bad. Thou shalt not shrink */
137  if (PREDICT_FALSE ((i32) available_space < observed_wnd))
138  {
139  wnd = clib_max (observed_wnd, 0);
140  TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
141  }
142  else
143  {
144  wnd = available_space;
145  }
146 
147  /* Make sure we have a multiple of rcv_wscale */
148  if (wnd && tc->rcv_wscale)
149  {
150  wnd &= ~((1 << tc->rcv_wscale) - 1);
151  if (wnd == 0)
152  wnd = 1 << tc->rcv_wscale;
153  }
154 
155  tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
156 }
157 
158 /**
159  * Compute and return window to advertise, scaled as per RFC1323
160  */
161 static inline u32
163 {
164  if (state < TCP_STATE_ESTABLISHED)
166 
167  tcp_update_rcv_wnd (tc);
168  return tc->rcv_wnd >> tc->rcv_wscale;
169 }
170 
171 /**
172  * Write TCP options to segment.
173  */
174 static u32
176 {
177  u32 opts_len = 0;
178  u32 buf, seq_len = 4;
179 
180  if (tcp_opts_mss (opts))
181  {
182  *data++ = TCP_OPTION_MSS;
183  *data++ = TCP_OPTION_LEN_MSS;
184  buf = clib_host_to_net_u16 (opts->mss);
185  clib_memcpy_fast (data, &buf, sizeof (opts->mss));
186  data += sizeof (opts->mss);
187  opts_len += TCP_OPTION_LEN_MSS;
188  }
189 
190  if (tcp_opts_wscale (opts))
191  {
192  *data++ = TCP_OPTION_WINDOW_SCALE;
193  *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
194  *data++ = opts->wscale;
195  opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
196  }
197 
198  if (tcp_opts_sack_permitted (opts))
199  {
200  *data++ = TCP_OPTION_SACK_PERMITTED;
202  opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
203  }
204 
205  if (tcp_opts_tstamp (opts))
206  {
207  *data++ = TCP_OPTION_TIMESTAMP;
208  *data++ = TCP_OPTION_LEN_TIMESTAMP;
209  buf = clib_host_to_net_u32 (opts->tsval);
210  clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
211  data += sizeof (opts->tsval);
212  buf = clib_host_to_net_u32 (opts->tsecr);
213  clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
214  data += sizeof (opts->tsecr);
215  opts_len += TCP_OPTION_LEN_TIMESTAMP;
216  }
217 
218  if (tcp_opts_sack (opts))
219  {
220  int i;
221 
222  if (opts->n_sack_blocks != 0)
223  {
224  *data++ = TCP_OPTION_SACK_BLOCK;
225  *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
226  for (i = 0; i < opts->n_sack_blocks; i++)
227  {
228  buf = clib_host_to_net_u32 (opts->sacks[i].start);
229  clib_memcpy_fast (data, &buf, seq_len);
230  data += seq_len;
231  buf = clib_host_to_net_u32 (opts->sacks[i].end);
232  clib_memcpy_fast (data, &buf, seq_len);
233  data += seq_len;
234  }
235  opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
236  }
237  }
238 
239  /* Terminate TCP options */
240  if (opts_len % 4)
241  {
242  *data++ = TCP_OPTION_EOL;
243  opts_len += TCP_OPTION_LEN_EOL;
244  }
245 
246  /* Pad with zeroes to a u32 boundary */
247  while (opts_len % 4)
248  {
249  *data++ = TCP_OPTION_NOOP;
250  opts_len += TCP_OPTION_LEN_NOOP;
251  }
252  return opts_len;
253 }
254 
255 static int
257 {
258  u8 len = 0;
259 
260  opts->flags |= TCP_OPTS_FLAG_MSS;
261  opts->mss = tc->mss;
262  len += TCP_OPTION_LEN_MSS;
263 
264  opts->flags |= TCP_OPTS_FLAG_WSCALE;
265  opts->wscale = tc->rcv_wscale;
267 
268  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
269  opts->tsval = tcp_time_now ();
270  opts->tsecr = 0;
272 
273  if (TCP_USE_SACKS)
274  {
275  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
277  }
278 
279  /* Align to needed boundary */
280  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
281  return len;
282 }
283 
284 static int
286 {
287  u8 len = 0;
288 
289  opts->flags |= TCP_OPTS_FLAG_MSS;
290  opts->mss = tc->mss;
291  len += TCP_OPTION_LEN_MSS;
292 
293  if (tcp_opts_wscale (&tc->rcv_opts))
294  {
295  opts->flags |= TCP_OPTS_FLAG_WSCALE;
296  opts->wscale = tc->rcv_wscale;
298  }
299 
300  if (tcp_opts_tstamp (&tc->rcv_opts))
301  {
302  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
303  opts->tsval = tcp_time_now ();
304  opts->tsecr = tc->tsval_recent;
306  }
307 
308  if (tcp_opts_sack_permitted (&tc->rcv_opts))
309  {
310  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
312  }
313 
314  /* Align to needed boundary */
315  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
316  return len;
317 }
318 
319 static int
321 {
322  u8 len = 0;
323 
324  opts->flags = 0;
325 
326  if (tcp_opts_tstamp (&tc->rcv_opts))
327  {
328  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
329  opts->tsval = tcp_tstamp (tc);
330  opts->tsecr = tc->tsval_recent;
332  }
333  if (tcp_opts_sack_permitted (&tc->rcv_opts))
334  {
335  if (vec_len (tc->snd_sacks))
336  {
337  opts->flags |= TCP_OPTS_FLAG_SACK;
338  if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
339  tc->snd_sack_pos = 0;
340  opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
341  opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
342  opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
344  tc->snd_sack_pos += opts->n_sack_blocks;
345  len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
346  }
347  }
348 
349  /* Align to needed boundary */
350  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
351  return len;
352 }
353 
354 always_inline int
357 {
358  switch (state)
359  {
360  case TCP_STATE_ESTABLISHED:
361  case TCP_STATE_CLOSE_WAIT:
362  case TCP_STATE_FIN_WAIT_1:
363  case TCP_STATE_LAST_ACK:
364  case TCP_STATE_CLOSING:
365  case TCP_STATE_FIN_WAIT_2:
366  case TCP_STATE_TIME_WAIT:
367  case TCP_STATE_CLOSED:
368  return tcp_make_established_options (tc, opts);
369  case TCP_STATE_SYN_RCVD:
370  return tcp_make_synack_options (tc, opts);
371  case TCP_STATE_SYN_SENT:
372  return tcp_make_syn_options (tc, opts);
373  default:
374  clib_warning ("State not handled! %d", state);
375  return 0;
376  }
377 }
378 
379 /**
380  * Update burst send vars
381  *
382  * - Updates snd_mss to reflect the effective segment size that we can send
383  * by taking into account all TCP options, including SACKs.
384  * - Cache 'on the wire' options for reuse
385  * - Updates receive window which can be reused for a burst.
386  *
387  * This should *only* be called when doing bursts
388  */
389 void
391 {
392  tcp_main_t *tm = &tcp_main;
393 
394  /* Compute options to be used for connection. These may be reused when
395  * sending data or to compute the effective mss (snd_mss) */
396  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
397  TCP_STATE_ESTABLISHED);
398 
399  /* XXX check if MTU has been updated */
400  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
401  ASSERT (tc->snd_mss > 0);
402 
403  tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
404  &tc->snd_opts);
405 
406  tcp_update_rcv_wnd (tc);
407 
408  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
410 
411  if (tc->snd_una == tc->snd_nxt)
412  {
415  }
416 }
417 
418 #endif /* CLIB_MARCH_VARIANT */
419 
420 static void *
422 {
423  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
425  /* Zero all flags but free list index and trace flag */
426  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
427  b->current_data = 0;
428  b->current_length = 0;
430  vnet_buffer (b)->tcp.flags = 0;
431 
432  /* Leave enough space for headers */
434 }
435 
436 #ifndef CLIB_MARCH_VARIANT
437 static void *
439 {
440  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
441  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
443  b->current_data = 0;
444  vnet_buffer (b)->tcp.flags = 0;
446  /* Leave enough space for headers */
448 }
449 
450 
451 /* Compute TCP checksum in software when offloading is disabled for a connection */
452 u16
454  ip46_address_t * src, ip46_address_t * dst)
455 {
456  ip_csum_t sum0;
457  u16 payload_length_host_byte_order;
458  u32 i;
459 
460  /* Initialize checksum with ip header. */
461  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
462  clib_host_to_net_u16 (IP_PROTOCOL_TCP);
463  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
464 
465  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
466  {
467  sum0 = ip_csum_with_carry
468  (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
469  sum0 = ip_csum_with_carry
470  (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
471  }
472 
473  return ip_calculate_l4_checksum (vm, p0, sum0,
474  payload_length_host_byte_order, NULL, 0,
475  NULL);
476 }
477 
478 u16
480  ip46_address_t * src, ip46_address_t * dst)
481 {
482  ip_csum_t sum0;
483  u32 payload_length_host_byte_order;
484 
485  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
486  sum0 =
487  clib_host_to_net_u32 (payload_length_host_byte_order +
488  (IP_PROTOCOL_TCP << 16));
489 
490  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
491  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
492 
493  return ip_calculate_l4_checksum (vm, p0, sum0,
494  payload_length_host_byte_order, NULL, 0,
495  NULL);
496 }
497 
498 static inline u16
500 {
501  u16 checksum = 0;
502  if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
503  {
504  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
505  vlib_main_t *vm = wrk->vm;
506 
507  if (tc->c_is_ip4)
509  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
510  else
512  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
513  }
514  else
515  {
516  b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
517  }
518  return checksum;
519 }
520 
521 /**
522  * Prepare ACK
523  */
524 static inline void
526  u8 flags)
527 {
528  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
529  u8 tcp_opts_len, tcp_hdr_opts_len;
530  tcp_header_t *th;
531  u16 wnd;
532 
533  wnd = tcp_window_to_advertise (tc, state);
534 
535  /* Make and write options */
536  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
537  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
538 
539  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
540  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
541 
542  tcp_options_write ((u8 *) (th + 1), snd_opts);
543 
544  th->checksum = tcp_compute_checksum (tc, b);
545 
546  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
547 
548  if (wnd == 0)
550  else
552 }
553 
554 /**
555  * Convert buffer to ACK
556  */
557 static inline void
559 {
560  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
561  TCP_EVT (TCP_EVT_ACK_SENT, tc);
562  tc->rcv_las = tc->rcv_nxt;
563 }
564 
565 /**
566  * Convert buffer to FIN-ACK
567  */
568 void
570 {
571  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
572 }
573 
574 /**
575  * Convert buffer to SYN
576  */
577 void
579 {
580  u8 tcp_hdr_opts_len, tcp_opts_len;
581  tcp_header_t *th;
582  u16 initial_wnd;
583  tcp_options_t snd_opts;
584 
585  initial_wnd = tcp_initial_window_to_advertise (tc);
586 
587  /* Make and write options */
588  clib_memset (&snd_opts, 0, sizeof (snd_opts));
589  tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
590  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
591 
592  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
593  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
594  initial_wnd);
595  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
596  tcp_options_write ((u8 *) (th + 1), &snd_opts);
597  th->checksum = tcp_compute_checksum (tc, b);
598 }
599 
600 /**
601  * Convert buffer to SYN-ACK
602  */
603 void
605 {
606  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
607  u8 tcp_opts_len, tcp_hdr_opts_len;
608  tcp_header_t *th;
609  u16 initial_wnd;
610 
611  clib_memset (snd_opts, 0, sizeof (*snd_opts));
612  initial_wnd = tcp_initial_window_to_advertise (tc);
613  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
614  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
615 
616  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
617  tc->rcv_nxt, tcp_hdr_opts_len,
618  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
619  tcp_options_write ((u8 *) (th + 1), snd_opts);
620 
621  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
622  th->checksum = tcp_compute_checksum (tc, b);
623 }
624 
625 always_inline void
627  u8 is_ip4, u32 fib_index, u8 flush)
628 {
629  vlib_main_t *vm = wrk->vm;
630  u32 *to_next, next_index;
631  vlib_frame_t *f;
632 
633  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
634  b->error = 0;
635 
636  vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
637  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
638 
639  /* Send to IP lookup */
640  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
642 
643  f = wrk->ip_lookup_tx_frames[!is_ip4];
644  if (!f)
645  {
646  f = vlib_get_frame_to_node (vm, next_index);
647  ASSERT (f);
648  wrk->ip_lookup_tx_frames[!is_ip4] = f;
649  }
650 
651  to_next = vlib_frame_vector_args (f);
652  to_next[f->n_vectors] = bi;
653  f->n_vectors += 1;
654  if (flush || f->n_vectors == VLIB_FRAME_SIZE)
655  {
656  vlib_put_frame_to_node (vm, next_index, f);
657  wrk->ip_lookup_tx_frames[!is_ip4] = 0;
658  }
659 }
660 
661 static void
663  u32 bi, u8 is_ip4, u32 fib_index)
664 {
665  tcp_enqueue_to_ip_lookup_i (wrk, b, bi, is_ip4, fib_index, 1);
666 }
667 
668 static void
670  u8 is_ip4, u32 fib_index)
671 {
672  tcp_enqueue_to_ip_lookup_i (wrk, b, bi, is_ip4, fib_index, 0);
673  if (wrk->vm->thread_index == 0 && vlib_num_workers ())
675 }
676 
677 static void
679  u8 is_ip4)
680 {
681  session_type_t st;
682 
683  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
684  b->error = 0;
685 
686  st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TCP, is_ip4);
688 }
689 
690 #endif /* CLIB_MARCH_VARIANT */
691 
692 static int
694  tcp_state_t state, u8 thread_index, u8 is_ip4)
695 {
696  ip4_header_t *ih4;
697  ip6_header_t *ih6;
698  tcp_header_t *th0;
699  ip4_address_t src_ip40, dst_ip40;
700  ip6_address_t src_ip60, dst_ip60;
702  u32 tmp;
703  u32 seq, ack;
704  u8 flags;
705 
706  /* Find IP and TCP headers */
707  th0 = tcp_buffer_hdr (b0);
708 
709  /* Save src and dst ip */
710  if (is_ip4)
711  {
712  ih4 = vlib_buffer_get_current (b0);
713  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
714  src_ip40.as_u32 = ih4->src_address.as_u32;
715  dst_ip40.as_u32 = ih4->dst_address.as_u32;
716  }
717  else
718  {
719  ih6 = vlib_buffer_get_current (b0);
720  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
721  clib_memcpy_fast (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
722  clib_memcpy_fast (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t));
723  }
724 
725  src_port = th0->src_port;
726  dst_port = th0->dst_port;
727 
728  /* Try to determine what/why we're actually resetting */
729  if (state == TCP_STATE_CLOSED)
730  {
731  if (!tcp_syn (th0))
732  return -1;
733 
734  tmp = clib_net_to_host_u32 (th0->seq_number);
735 
736  /* Got a SYN for no listener. */
737  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
738  ack = clib_host_to_net_u32 (tmp + 1);
739  seq = 0;
740  }
741  else
742  {
743  flags = TCP_FLAG_RST;
744  seq = th0->ack_number;
745  ack = 0;
746  }
747 
748  tcp_reuse_buffer (vm, b0);
749  tcp_trajectory_add_start (b0, 4);
750  th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
751  sizeof (tcp_header_t), flags, 0);
752 
753  if (is_ip4)
754  {
755  ih4 = vlib_buffer_push_ip4 (vm, b0, &dst_ip40, &src_ip40,
756  IP_PROTOCOL_TCP, 1);
757  th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
758  }
759  else
760  {
761  int bogus = ~0;
762  ih6 = vlib_buffer_push_ip6 (vm, b0, &dst_ip60, &src_ip60,
763  IP_PROTOCOL_TCP);
764  th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
765  ASSERT (!bogus);
766  }
767 
768  return 0;
769 }
770 
771 #ifndef CLIB_MARCH_VARIANT
772 /**
773  * Send reset without reusing existing buffer
774  *
775  * It extracts connection info out of original packet
776  */
777 void
779  u32 thread_index, u8 is_ip4)
780 {
781  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
782  vlib_main_t *vm = wrk->vm;
783  vlib_buffer_t *b;
784  u32 bi, sw_if_index, fib_index;
785  u8 tcp_hdr_len, flags = 0;
786  tcp_header_t *th, *pkt_th;
787  u32 seq, ack;
788  ip4_header_t *ih4, *pkt_ih4;
789  ip6_header_t *ih6, *pkt_ih6;
790  fib_protocol_t fib_proto;
791 
792  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
793  return;
794 
795  b = vlib_get_buffer (vm, bi);
796  sw_if_index = vnet_buffer (pkt)->sw_if_index[VLIB_RX];
797  fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
798  fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
799  tcp_init_buffer (vm, b);
800 
801  /* Make and write options */
802  tcp_hdr_len = sizeof (tcp_header_t);
803 
804  if (is_ip4)
805  {
806  pkt_ih4 = vlib_buffer_get_current (pkt);
807  pkt_th = ip4_next_header (pkt_ih4);
808  }
809  else
810  {
811  pkt_ih6 = vlib_buffer_get_current (pkt);
812  pkt_th = ip6_next_header (pkt_ih6);
813  }
814 
815  if (tcp_ack (pkt_th))
816  {
817  flags = TCP_FLAG_RST;
818  seq = pkt_th->ack_number;
819  ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
820  }
821  else
822  {
823  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
824  seq = 0;
825  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
826  }
827 
828  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
829  seq, ack, tcp_hdr_len, flags, 0);
830 
831  /* Swap src and dst ip */
832  if (is_ip4)
833  {
834  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
835  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
836  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
837  tcp_csum_offload (tc));
838  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
839  }
840  else
841  {
842  int bogus = ~0;
843  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
844  0x60);
845  ih6 = vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address,
846  &pkt_ih6->src_address, IP_PROTOCOL_TCP);
847  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
848  ASSERT (!bogus);
849  }
850 
851  tcp_enqueue_to_ip_lookup_now (wrk, b, bi, is_ip4, fib_index);
852  TCP_EVT (TCP_EVT_RST_SENT, tc);
853  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
854  TCP_ERROR_RST_SENT, 1);
855 }
856 
857 /**
858  * Build and set reset packet for connection
859  */
860 void
862 {
863  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
864  vlib_main_t *vm = wrk->vm;
865  vlib_buffer_t *b;
866  u32 bi;
867  tcp_header_t *th;
868  u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
869  u8 flags;
870 
871  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
872  return;
873  b = vlib_get_buffer (vm, bi);
874  tcp_init_buffer (vm, b);
875 
876  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
877  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
878  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
879  flags = TCP_FLAG_RST;
880  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
881  tc->rcv_nxt, tcp_hdr_opts_len, flags,
882  advertise_wnd);
883  opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
884  th->checksum = tcp_compute_checksum (tc, b);
885  ASSERT (opts_write_len == tc->snd_opts_len);
886  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
887  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
888  TCP_EVT (TCP_EVT_RST_SENT, tc);
889  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
890  TCP_ERROR_RST_SENT, 1);
891 }
892 
893 static void
895  vlib_buffer_t * b)
896 {
898  vlib_main_t *vm = wrk->vm;
899  if (tc->c_is_ip4)
900  {
901  ip4_header_t *ih;
902  ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
903  &tc->c_rmt_ip4, IP_PROTOCOL_TCP,
904  tcp_csum_offload (tc));
905  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
906  }
907  else
908  {
909  ip6_header_t *ih;
910  int bogus = ~0;
911 
912  ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6,
913  &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
914  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih, &bogus);
915  ASSERT (!bogus);
916  }
917 }
918 
919 /**
920  * Send SYN
921  *
922  * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
923  * The packet is not forwarded through tcpx_output to avoid doing lookups
924  * in the half_open pool.
925  */
926 void
928 {
929  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
930  vlib_main_t *vm = wrk->vm;
931  vlib_buffer_t *b;
932  u32 bi;
933 
934  /*
935  * Setup retransmit and establish timers before requesting buffer
936  * such that we can return if we've ran out.
937  */
938  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
939  tc->rto * TCP_TO_TIMER_TICK);
940 
941  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
942  {
943  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, 1);
944  return;
945  }
946 
947  b = vlib_get_buffer (vm, bi);
948  tcp_init_buffer (vm, b);
949  tcp_make_syn (tc, b);
950 
951  /* Measure RTT with this */
952  tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
953  tc->rtt_seq = tc->snd_nxt;
954  tc->rto_boff = 0;
955 
956  tcp_push_ip_hdr (wrk, tc, b);
957  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
958  TCP_EVT (TCP_EVT_SYN_SENT, tc);
959 }
960 
961 void
963 {
964  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
965  vlib_main_t *vm = wrk->vm;
966  vlib_buffer_t *b;
967  u32 bi;
968 
970 
971  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
972  {
973  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
974  return;
975  }
976 
977  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
978  b = vlib_get_buffer (vm, bi);
979  tcp_init_buffer (vm, b);
980  tcp_make_synack (tc, b);
981  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
982  TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
983 }
984 
985 /**
986  * Flush ip lookup tx frames populated by timer pops
987  */
988 static void
990 {
991  if (wrk->ip_lookup_tx_frames[!is_ip4])
992  {
993  u32 next_index;
994  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
995  vlib_put_frame_to_node (wrk->vm, next_index,
996  wrk->ip_lookup_tx_frames[!is_ip4]);
997  wrk->ip_lookup_tx_frames[!is_ip4] = 0;
998  }
999 }
1000 
1001 /**
1002  * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
1003  */
1004 void
1006 {
1009 }
1010 
1011 /**
1012  * Send FIN
1013  */
1014 void
1016 {
1017  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1018  vlib_main_t *vm = wrk->vm;
1019  vlib_buffer_t *b;
1020  u32 bi;
1021  u8 fin_snt = 0;
1022 
1023  fin_snt = tc->flags & TCP_CONN_FINSNT;
1024  if (fin_snt)
1025  tc->snd_nxt -= 1;
1026 
1027  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1028  {
1029  /* Out of buffers so program fin retransmit ASAP */
1030  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1031  if (fin_snt)
1032  tc->snd_nxt += 1;
1033  else
1034  /* Make sure retransmit retries a fin not data */
1035  tc->flags |= TCP_CONN_FINSNT;
1036  return;
1037  }
1038 
1039  /* If we have non-dupacks programmed, no need to send them */
1040  if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
1041  tc->flags &= ~TCP_CONN_SNDACK;
1042 
1044  b = vlib_get_buffer (vm, bi);
1045  tcp_init_buffer (vm, b);
1046  tcp_make_fin (tc, b);
1047  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1048  TCP_EVT (TCP_EVT_FIN_SENT, tc);
1049  /* Account for the FIN */
1050  tc->snd_nxt += 1;
1051  if (!fin_snt)
1052  {
1053  tc->flags |= TCP_CONN_FINSNT;
1054  tc->flags &= ~TCP_CONN_FINPNDG;
1055  tc->snd_una_max = seq_max (tc->snd_una_max, tc->snd_nxt);
1056  }
1057 }
1058 
1059 /**
1060  * Push TCP header and update connection variables. Should only be called
1061  * for segments with data, not for 'control' packets.
1062  */
1063 always_inline void
1065  u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
1066 {
1067  u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
1068  u32 advertise_wnd, data_len;
1069  tcp_main_t *tm = &tcp_main;
1070  tcp_header_t *th;
1071 
1072  data_len = b->current_length;
1073  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
1075 
1076  vnet_buffer (b)->tcp.flags = 0;
1077  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
1078 
1079  if (compute_opts)
1080  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1081 
1082  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
1083 
1084  if (maybe_burst)
1085  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
1086  else
1087  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
1088 
1089  if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
1090  {
1091  if (seq_geq (tc->psh_seq, snd_nxt)
1092  && seq_lt (tc->psh_seq, snd_nxt + data_len))
1093  flags |= TCP_FLAG_PSH;
1094  }
1095  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
1096  tc->rcv_nxt, tcp_hdr_opts_len, flags,
1097  advertise_wnd);
1098 
1099  if (maybe_burst)
1100  {
1101  clib_memcpy_fast ((u8 *) (th + 1),
1102  tm->wrk_ctx[tc->c_thread_index].cached_opts,
1103  tc->snd_opts_len);
1104  }
1105  else
1106  {
1107  u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
1108  ASSERT (len == tc->snd_opts_len);
1109  }
1110 
1111  /*
1112  * Update connection variables
1113  */
1114 
1115  if (update_snd_nxt)
1116  tc->snd_nxt += data_len;
1117  tc->rcv_las = tc->rcv_nxt;
1118 
1119  tc->bytes_out += data_len;
1120  tc->data_segs_out += 1;
1121 
1122  th->checksum = tcp_compute_checksum (tc, b);
1123 
1124  TCP_EVT (TCP_EVT_PKTIZE, tc);
1125 }
1126 
1129 {
1130  u32 data_len = b->current_length;
1131  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
1133  return data_len;
1134 }
1135 
1136 u32
1138 {
1139  tcp_connection_t *tc = (tcp_connection_t *) tconn;
1140 
1141  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1142  tcp_bt_track_tx (tc, tcp_buffer_len (b));
1143 
1144  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
1145  /* update_snd_nxt */ 1);
1146 
1147  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1148  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
1149  /* If not tracking an ACK, start tracking */
1150  if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
1151  {
1152  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
1153  tc->rtt_seq = tc->snd_nxt;
1154  }
1155  if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
1156  {
1158  tc->rto_boff = 0;
1159  }
1160  tcp_trajectory_add_start (b, 3);
1161  return 0;
1162 }
1163 
1164 void
1166 {
1167  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1168  vlib_main_t *vm = wrk->vm;
1169  vlib_buffer_t *b;
1170  u32 bi;
1171 
1172  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1173  {
1174  tcp_update_rcv_wnd (tc);
1175  return;
1176  }
1177  b = vlib_get_buffer (vm, bi);
1178  tcp_init_buffer (vm, b);
1179  tcp_make_ack (tc, b);
1180  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1181 }
1182 
1183 void
1185 {
1186  if (!(tc->flags & TCP_CONN_SNDACK))
1187  {
1188  session_add_self_custom_tx_evt (&tc->connection, 1);
1189  tc->flags |= TCP_CONN_SNDACK;
1190  }
1191 }
1192 
1193 void
1195 {
1196  if (!(tc->flags & TCP_CONN_SNDACK))
1197  {
1198  session_add_self_custom_tx_evt (&tc->connection, 1);
1199  tc->flags |= TCP_CONN_SNDACK;
1200  }
1201  if (tc->pending_dupacks < 255)
1202  tc->pending_dupacks += 1;
1203 }
1204 
1205 void
1207 {
1208  if (!(tc->flags & TCP_CONN_RXT_PENDING))
1209  {
1210  session_add_self_custom_tx_evt (&tc->connection, 0);
1211  tc->flags |= TCP_CONN_RXT_PENDING;
1212  }
1213 }
1214 
1215 /**
1216  * Delayed ack timer handler
1217  *
1218  * Sends delayed ACK when timer expires
1219  */
1220 void
1222 {
1223  u32 thread_index = vlib_get_thread_index ();
1224  tcp_connection_t *tc;
1225 
1226  tc = tcp_connection_get (index, thread_index);
1227  tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
1228  tcp_send_ack (tc);
1229 }
1230 
1231 /**
1232  * Send window update ack
1233  *
1234  * Ensures that it will be sent only once, after a zero rwnd has been
1235  * advertised in a previous ack, and only if rwnd has grown beyond a
1236  * configurable value.
1237  */
1238 void
1240 {
1241  if (tcp_zero_rwnd_sent (tc))
1242  {
1243  tcp_update_rcv_wnd (tc);
1244  if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1245  {
1247  tcp_program_ack (tc);
1248  }
1249  }
1250 }
1251 
1252 /**
1253  * Allocate a new buffer and build a new tcp segment
1254  *
1255  * @param wrk tcp worker
1256  * @param tc connection for which the segment will be allocated
1257  * @param offset offset of the first byte in the tx fifo
1258  * @param max_deq_byte segment size
1259  * @param[out] b pointer to buffer allocated
1260  *
1261  * @return the number of bytes in the segment or 0 if buffer cannot be
1262  * allocated or no data available
1263  */
1264 static int
1266  u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
1267 {
1268  u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1269  vlib_main_t *vm = wrk->vm;
1270  u32 bi, seg_size;
1271  int n_bytes = 0;
1272  u8 *data;
1273 
1274  seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
1275 
1276  /*
1277  * Prepare options
1278  */
1279  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1280 
1281  /*
1282  * Allocate and fill in buffer(s)
1283  */
1284 
1285  /* Easy case, buffer size greater than mss */
1286  if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
1287  {
1288  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1289  return 0;
1290  *b = vlib_get_buffer (vm, bi);
1291  data = tcp_init_buffer (vm, *b);
1292  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1293  max_deq_bytes);
1294  ASSERT (n_bytes == max_deq_bytes);
1295  b[0]->current_length = n_bytes;
1296  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1297  /* burst */ 0, /* update_snd_nxt */ 0);
1298  }
1299  /* Split mss into multiple buffers */
1300  else
1301  {
1302  u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1303  u16 n_peeked, len_to_deq;
1304  vlib_buffer_t *chain_b, *prev_b;
1305  int i;
1306 
1307  /* Make sure we have enough buffers */
1308  n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
1309  vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1311  n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1312  if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
1313  {
1314  if (n_bufs)
1315  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1316  return 0;
1317  }
1318 
1319  *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
1320  data = tcp_init_buffer (vm, *b);
1321  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1322  bytes_per_buffer -
1323  TRANSPORT_MAX_HDRS_LEN);
1324  b[0]->current_length = n_bytes;
1325  b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1327  max_deq_bytes -= n_bytes;
1328 
1329  chain_b = *b;
1330  for (i = 1; i < n_bufs_per_seg; i++)
1331  {
1332  prev_b = chain_b;
1333  len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
1334  chain_bi = wrk->tx_buffers[--n_bufs];
1335  chain_b = vlib_get_buffer (vm, chain_bi);
1336  chain_b->current_data = 0;
1337  data = vlib_buffer_get_current (chain_b);
1338  n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1339  offset + n_bytes,
1340  len_to_deq);
1341  ASSERT (n_peeked == len_to_deq);
1342  n_bytes += n_peeked;
1343  chain_b->current_length = n_peeked;
1344  chain_b->next_buffer = 0;
1345 
1346  /* update previous buffer */
1347  prev_b->next_buffer = chain_bi;
1348  prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1349 
1350  max_deq_bytes -= n_peeked;
1351  b[0]->total_length_not_including_first_buffer += n_peeked;
1352  }
1353 
1354  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1355  /* burst */ 0, /* update_snd_nxt */ 0);
1356 
1357  if (PREDICT_FALSE (n_bufs))
1358  {
1359  clib_warning ("not all buffers consumed");
1360  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1361  }
1362  }
1363 
1364  ASSERT (n_bytes > 0);
1365  ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1366 
1367  return n_bytes;
1368 }
1369 
1370 /**
1371  * Build a retransmit segment
1372  *
1373  * @return the number of bytes in the segment or 0 if there's nothing to
1374  * retransmit
1375  */
1376 static u32
1378  tcp_connection_t * tc, u32 offset,
1379  u32 max_deq_bytes, vlib_buffer_t ** b)
1380 {
1381  u32 start, available_bytes;
1382  int n_bytes = 0;
1383 
1384  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1385  ASSERT (max_deq_bytes != 0);
1386 
1387  /*
1388  * Make sure we can retransmit something
1389  */
1390  available_bytes = transport_max_tx_dequeue (&tc->connection);
1391  ASSERT (available_bytes >= offset);
1392  available_bytes -= offset;
1393  if (!available_bytes)
1394  return 0;
1395 
1396  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1397  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1398 
1399  start = tc->snd_una + offset;
1400  ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1401 
1402  n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1403  if (!n_bytes)
1404  return 0;
1405 
1406  tc->snd_rxt_bytes += n_bytes;
1407 
1408  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1409  tcp_bt_track_rxt (tc, start, start + n_bytes);
1410 
1411  tc->bytes_retrans += n_bytes;
1412  tc->segs_retrans += 1;
1413  TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1414 
1415  return n_bytes;
1416 }
1417 
1418 static void
1420 {
1421  sack_scoreboard_t *sb = &tc->sack_sb;
1422  sack_scoreboard_hole_t *hole;
1423 
1424  hole = scoreboard_first_hole (sb);
1425  if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1426  return;
1427 
1428  scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1429 }
1430 
1431 /**
1432  * Reset congestion control, switch cwnd to loss window and try again.
1433  */
1434 static void
1436 {
1437  TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1438 
1439  tc->prev_ssthresh = tc->ssthresh;
1440  tc->prev_cwnd = tc->cwnd;
1441 
1442  /* If we entrered loss without fast recovery, notify cc algo of the
1443  * congestion event such that it can update ssthresh and its state */
1444  if (!tcp_in_fastrecovery (tc))
1445  tcp_cc_congestion (tc);
1446 
1447  /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
1448  tcp_cc_loss (tc);
1449 
1450  tc->rtt_ts = 0;
1451  tc->cwnd_acc_bytes = 0;
1452  tc->tr_occurences += 1;
1453  tcp_recovery_on (tc);
1454 }
1455 
1456 void
1458 {
1459  u32 thread_index = vlib_get_thread_index ();
1460  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1461  vlib_main_t *vm = wrk->vm;
1462  tcp_connection_t *tc;
1463  vlib_buffer_t *b = 0;
1464  u32 bi, n_bytes;
1465 
1466  tc = tcp_connection_get (tc_index, thread_index);
1467 
1468  /* Note: the connection may have been closed and pool_put */
1469  if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT))
1470  return;
1471 
1472  tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
1473 
1474  /* Wait-close and retransmit could pop at the same time */
1475  if (tc->state == TCP_STATE_CLOSED)
1476  return;
1477 
1478  if (tc->state >= TCP_STATE_ESTABLISHED)
1479  {
1480  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1481 
1482  /* Lost FIN, retransmit and return */
1483  if (tc->flags & TCP_CONN_FINSNT)
1484  {
1485  tcp_send_fin (tc);
1486  tc->rto_boff += 1;
1487  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1488  return;
1489  }
1490 
1491  /* Shouldn't be here. This condition is tricky because it has to take
1492  * into account boff > 0 due to persist timeout. */
1493  if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_nxt)
1494  || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion)
1495  && !tcp_flight_size (tc)))
1496  {
1497  ASSERT (!tcp_in_recovery (tc));
1498  tc->rto_boff = 0;
1499  return;
1500  }
1501 
1502  /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1503  * to persist timer timeout */
1504  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1505  {
1506  tc->rto_boff = 0;
1507  tcp_update_rto (tc);
1508  }
1509 
1510  /* Peer is dead or network connectivity is lost. Close connection.
1511  * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1512  * a min rto of 0.2s we need to retry about 8 times. */
1513  if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1514  {
1515  tcp_send_reset (tc);
1516  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1517  session_transport_closing_notify (&tc->connection);
1518  session_transport_closed_notify (&tc->connection);
1520  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
1521  return;
1522  }
1523 
1524  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1526 
1527  /* Update send congestion to make sure that rxt has data to send */
1528  tc->snd_congestion = tc->snd_nxt;
1529 
1530  /* Send the first unacked segment. If we're short on buffers, return
1531  * as soon as possible */
1532  n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1533  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
1534  if (!n_bytes)
1535  {
1536  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1537  return;
1538  }
1539 
1540  bi = vlib_get_buffer_index (vm, b);
1541  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1542 
1543  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1545 
1546  tc->rto_boff += 1;
1547  if (tc->rto_boff == 1)
1548  {
1550  /* Record timestamp. Eifel detection algorithm RFC3522 */
1551  tc->snd_rxt_ts = tcp_tstamp (tc);
1552  }
1553 
1554  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1555  scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
1556 
1558  }
1559  /* Retransmit SYN-ACK */
1560  else if (tc->state == TCP_STATE_SYN_RCVD)
1561  {
1562  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1563 
1564  tc->rtt_ts = 0;
1565 
1566  /* Passive open establish timeout */
1567  if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1568  {
1569  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1571  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
1572  return;
1573  }
1574 
1575  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1576  {
1577  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1578  return;
1579  }
1580 
1581  tc->rto_boff += 1;
1582  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1583  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1584 
1586 
1587  b = vlib_get_buffer (vm, bi);
1588  tcp_init_buffer (vm, b);
1589  tcp_make_synack (tc, b);
1590  TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1591 
1592  /* Retransmit timer already updated, just enqueue to output */
1593  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1594  }
1595  else
1596  {
1597  ASSERT (tc->state == TCP_STATE_CLOSED);
1598  return;
1599  }
1600 }
1601 
1602 /**
1603  * SYN retransmit timer handler. Active open only.
1604  */
1605 void
1607 {
1608  u32 thread_index = vlib_get_thread_index ();
1609  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1610  vlib_main_t *vm = wrk->vm;
1611  tcp_connection_t *tc;
1612  vlib_buffer_t *b = 0;
1613  u32 bi;
1614 
1615  tc = tcp_half_open_connection_get (tc_index);
1616 
1617  /* Note: the connection may have transitioned to ESTABLISHED... */
1618  if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT))
1619  return;
1620 
1621  tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
1622 
1623  /* Half-open connection actually moved to established but we were
1624  * waiting for syn retransmit to pop to call cleanup from the right
1625  * thread. */
1626  if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1627  {
1629  TCP_DBG ("could not remove half-open connection");
1630  return;
1631  }
1632 
1633  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1634  tc->rtt_ts = 0;
1635 
1636  /* Active open establish timeout */
1637  if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1638  {
1639  session_stream_connect_notify (&tc->connection, 1 /* fail */ );
1641  return;
1642  }
1643 
1644  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1645  {
1646  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, 1);
1647  return;
1648  }
1649 
1650  /* Try without increasing RTO a number of times. If this fails,
1651  * start growing RTO exponentially */
1652  tc->rto_boff += 1;
1653  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1654  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1655 
1656  b = vlib_get_buffer (vm, bi);
1657  tcp_init_buffer (vm, b);
1658  tcp_make_syn (tc, b);
1659 
1660  TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1661 
1662  /* This goes straight to ipx_lookup */
1663  tcp_push_ip_hdr (wrk, tc, b);
1664  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
1665 
1666  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
1667  tc->rto * TCP_TO_TIMER_TICK);
1668 }
1669 
1670 /**
1671  * Got 0 snd_wnd from peer, try to do something about it.
1672  *
1673  */
1674 void
1676 {
1677  u32 thread_index = vlib_get_thread_index ();
1678  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1679  u32 bi, max_snd_bytes, available_bytes, offset;
1680  tcp_main_t *tm = vnet_get_tcp_main ();
1681  vlib_main_t *vm = wrk->vm;
1682  tcp_connection_t *tc;
1683  vlib_buffer_t *b;
1684  int n_bytes = 0;
1685  u8 *data;
1686 
1687  tc = tcp_connection_get_if_valid (index, thread_index);
1688  if (!tc)
1689  return;
1690 
1691  /* Make sure timer handle is set to invalid */
1692  tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
1693 
1694  /* Problem already solved or worse */
1695  if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1696  || (tc->flags & TCP_CONN_FINSNT))
1697  return;
1698 
1699  available_bytes = transport_max_tx_dequeue (&tc->connection);
1700  offset = tc->snd_nxt - tc->snd_una;
1701 
1702  /* Reprogram persist if no new bytes available to send. We may have data
1703  * next time */
1704  if (!available_bytes)
1705  {
1706  tcp_persist_timer_set (tc);
1707  return;
1708  }
1709 
1710  if (available_bytes <= offset)
1711  {
1712  ASSERT (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT));
1713  return;
1714  }
1715 
1716  /* Increment RTO backoff */
1717  tc->rto_boff += 1;
1718  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1719 
1720  /*
1721  * Try to force the first unsent segment (or buffer)
1722  */
1723  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1724  {
1725  tcp_persist_timer_set (tc);
1726  return;
1727  }
1728  b = vlib_get_buffer (vm, bi);
1729  data = tcp_init_buffer (vm, b);
1730 
1731  tcp_validate_txf_size (tc, offset);
1732  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1733  max_snd_bytes = clib_min (tc->snd_mss,
1734  tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1735  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1736  max_snd_bytes);
1737  b->current_length = n_bytes;
1738  ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
1739  || tc->snd_nxt == tc->snd_una_max
1740  || tc->rto_boff > 1));
1741 
1742  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1743  {
1745  tcp_bt_track_tx (tc, n_bytes);
1746  }
1747 
1748  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1749  /* burst */ 0, /* update_snd_nxt */ 1);
1750  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1751  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
1752  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1753 
1754  /* Just sent new data, enable retransmit */
1756 }
1757 
1758 /**
1759  * Retransmit first unacked segment
1760  */
1761 int
1763 {
1764  vlib_main_t *vm = wrk->vm;
1765  vlib_buffer_t *b;
1766  u32 bi, n_bytes;
1767 
1768  TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1769 
1770  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
1771  if (!n_bytes)
1772  return -1;
1773 
1774  bi = vlib_get_buffer_index (vm, b);
1775  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1776 
1777  return 0;
1778 }
1779 
1780 static int
1782  u32 burst_size)
1783 {
1784  u32 offset, n_segs = 0, n_written, bi, available_wnd;
1785  vlib_main_t *vm = wrk->vm;
1786  vlib_buffer_t *b = 0;
1787 
1788  offset = tc->snd_nxt - tc->snd_una;
1789  available_wnd = tc->snd_wnd - offset;
1790  burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1791 
1792  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1794 
1795  while (n_segs < burst_size)
1796  {
1797  n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1798  if (!n_written)
1799  goto done;
1800 
1801  bi = vlib_get_buffer_index (vm, b);
1802  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1803  offset += n_written;
1804  n_segs += 1;
1805 
1806  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1807  tcp_bt_track_tx (tc, n_written);
1808 
1809  tc->snd_nxt += n_written;
1810  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1811  }
1812 
1813 done:
1814  return n_segs;
1815 }
1816 
1817 /**
1818  * Estimate send space using proportional rate reduction (RFC6937)
1819  */
1820 int
1822 {
1823  u32 pipe, prr_out;
1824  int space;
1825 
1826  pipe = tcp_flight_size (tc);
1827  prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1828 
1829  if (pipe > tc->ssthresh)
1830  {
1831  space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1832  - prr_out;
1833  }
1834  else
1835  {
1836  int limit;
1837  limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1838  space = clib_min (tc->ssthresh - pipe, limit);
1839  }
1840  space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1841  return space;
1842 }
1843 
1844 static inline u8
1846  sack_scoreboard_t * sb)
1847 {
1848  u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1849  f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1850 
1851  if (tcp_fastrecovery_first (tc))
1852  return 1;
1853 
1854  return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1855 }
1856 
1857 static inline u8
1859 {
1860  return (transport_max_tx_dequeue (&tc->connection)
1861  - (tc->snd_nxt - tc->snd_una));
1862 }
1863 
1864 #define scoreboard_rescue_rxt_valid(_sb, _tc) \
1865  (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1866  && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1867 
1868 /**
1869  * Do retransmit with SACKs
1870  */
1871 static int
1873  u32 burst_size)
1874 {
1875  u8 snd_limited = 0, can_rescue = 0, reset_pacer = 0;
1876  u32 n_written = 0, offset, max_bytes, n_segs = 0;
1877  u32 bi, max_deq, burst_bytes, sent_bytes;
1878  sack_scoreboard_hole_t *hole;
1879  vlib_main_t *vm = wrk->vm;
1880  vlib_buffer_t *b = 0;
1881  sack_scoreboard_t *sb;
1882  int snd_space;
1883 
1885 
1886  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1887  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1888  if (!burst_size)
1889  {
1891  return 0;
1892  }
1893 
1894  if (tcp_in_recovery (tc))
1895  snd_space = tcp_available_cc_snd_space (tc);
1896  else
1897  snd_space = tcp_fastrecovery_prr_snd_space (tc);
1898 
1899  if (snd_space < tc->snd_mss)
1900  {
1901  reset_pacer = burst_bytes > tc->snd_mss;
1902  goto done;
1903  }
1904 
1905  reset_pacer = snd_space < burst_bytes;
1906 
1907  sb = &tc->sack_sb;
1908 
1909  /* Check if snd_una is a lost retransmit */
1910  if (pool_elts (sb->holes)
1911  && seq_gt (sb->high_sacked, tc->snd_congestion)
1912  && tc->rxt_head != tc->snd_una
1914  {
1915  max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1916  n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
1917  if (!n_written)
1918  {
1920  goto done;
1921  }
1922  bi = vlib_get_buffer_index (vm, b);
1923  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1924  n_segs = 1;
1925 
1926  tc->rxt_head = tc->snd_una;
1927  tc->rxt_delivered += n_written;
1928  tc->prr_delivered += n_written;
1929  ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1930  }
1931 
1933 
1934  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1935  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1936 
1937  max_deq = transport_max_tx_dequeue (&tc->connection);
1938  max_deq -= tc->snd_nxt - tc->snd_una;
1939 
1940  while (snd_space > 0 && n_segs < burst_size)
1941  {
1942  hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
1943  &snd_limited);
1944  if (!hole)
1945  {
1946  /* We are out of lost holes to retransmit so send some new data. */
1947  if (max_deq > tc->snd_mss)
1948  {
1949  u32 n_segs_new;
1950  int av_wnd;
1951 
1952  av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1953  av_wnd = clib_max (av_wnd, 0);
1954  snd_space = clib_min (snd_space, av_wnd);
1955  snd_space = clib_min (max_deq, snd_space);
1956  burst_size = clib_min (burst_size - n_segs,
1957  snd_space / tc->snd_mss);
1958  burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1959  n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1960  if (max_deq > n_segs_new * tc->snd_mss)
1962 
1963  n_segs += n_segs_new;
1964  goto done;
1965  }
1966 
1967  if (tcp_in_recovery (tc) || !can_rescue
1968  || scoreboard_rescue_rxt_valid (sb, tc))
1969  break;
1970 
1971  /* If rescue rxt undefined or less than snd_una then one segment of
1972  * up to SMSS octets that MUST include the highest outstanding
1973  * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1974  * RecoveryPoint. HighRxt MUST NOT be updated.
1975  */
1976  max_bytes = clib_min (tc->snd_mss,
1977  tc->snd_congestion - tc->snd_una);
1978  max_bytes = clib_min (max_bytes, snd_space);
1979  offset = tc->snd_congestion - tc->snd_una - max_bytes;
1980  sb->rescue_rxt = tc->snd_congestion;
1981  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1982  max_bytes, &b);
1983  if (!n_written)
1984  goto done;
1985 
1986  bi = vlib_get_buffer_index (vm, b);
1987  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1988  n_segs += 1;
1989  break;
1990  }
1991 
1992  max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1993  max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1994  if (max_bytes == 0)
1995  break;
1996 
1997  offset = sb->high_rxt - tc->snd_una;
1998  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1999  &b);
2000  ASSERT (n_written <= snd_space);
2001 
2002  /* Nothing left to retransmit */
2003  if (n_written == 0)
2004  break;
2005 
2006  bi = vlib_get_buffer_index (vm, b);
2007  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
2008 
2009  sb->high_rxt += n_written;
2010  ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
2011 
2012  snd_space -= n_written;
2013  n_segs += 1;
2014  }
2015 
2016  if (hole)
2018 
2019 done:
2020 
2021  if (reset_pacer)
2022  {
2024  }
2025  else
2026  {
2027  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
2029  sent_bytes);
2030  }
2031 
2032  return n_segs;
2033 }
2034 
2035 /**
2036  * Fast retransmit without SACK info
2037  */
2038 static int
2040  u32 burst_size)
2041 {
2042  u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
2043  u32 burst_bytes, sent_bytes;
2044  vlib_main_t *vm = wrk->vm;
2045  int snd_space, n_segs = 0;
2046  u8 cc_limited = 0;
2047  vlib_buffer_t *b;
2048 
2050  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
2051 
2052  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
2053  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
2054  if (!burst_size)
2055  {
2057  return 0;
2058  }
2059 
2060  snd_space = tcp_available_cc_snd_space (tc);
2061  cc_limited = snd_space < burst_bytes;
2062 
2063  if (!tcp_fastrecovery_first (tc))
2064  goto send_unsent;
2065 
2066  /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
2067  * segment. */
2068  while (snd_space > 0 && n_segs < burst_size)
2069  {
2070  max_bytes = clib_min (tc->snd_mss,
2071  tc->snd_congestion - tc->snd_una - offset);
2072  if (!max_bytes)
2073  break;
2074  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
2075  &b);
2076 
2077  /* Nothing left to retransmit */
2078  if (n_written == 0)
2079  break;
2080 
2081  bi = vlib_get_buffer_index (vm, b);
2082  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
2083  snd_space -= n_written;
2084  offset += n_written;
2085  n_segs += 1;
2086  }
2087 
2088  if (n_segs == burst_size)
2089  goto done;
2090 
2091 send_unsent:
2092 
2093  /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
2094  if (snd_space < tc->snd_mss || tc->snd_mss == 0)
2095  goto done;
2096 
2097  max_deq = transport_max_tx_dequeue (&tc->connection);
2098  max_deq -= tc->snd_nxt - tc->snd_una;
2099  if (max_deq)
2100  {
2101  snd_space = clib_min (max_deq, snd_space);
2102  burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
2103  n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
2104  if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
2106  n_segs += n_segs_now;
2107  }
2108 
2109 done:
2111 
2112  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
2113  sent_bytes = cc_limited ? burst_bytes : sent_bytes;
2114  transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
2115 
2116  return n_segs;
2117 }
2118 
2119 static int
2120 tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
2121 {
2122  int j, n_acks;
2123 
2124  if (!tc->pending_dupacks)
2125  {
2126  if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
2127  || tc->state != TCP_STATE_ESTABLISHED)
2128  {
2129  tcp_send_ack (tc);
2130  return 1;
2131  }
2132  return 0;
2133  }
2134 
2135  /* If we're supposed to send dupacks but have no ooo data
2136  * send only one ack */
2137  if (!vec_len (tc->snd_sacks))
2138  {
2139  tcp_send_ack (tc);
2140  tc->pending_dupacks = 0;
2141  return 1;
2142  }
2143 
2144  /* Start with first sack block */
2145  tc->snd_sack_pos = 0;
2146 
2147  /* Generate enough dupacks to cover all sack blocks. Do not generate
2148  * more sacks than the number of packets received. But do generate at
2149  * least 3, i.e., the number needed to signal congestion, if needed. */
2150  n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
2151  n_acks = clib_min (n_acks, tc->pending_dupacks);
2152  n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
2153  for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
2154  tcp_send_ack (tc);
2155 
2156  if (n_acks < max_burst_size)
2157  {
2158  tc->pending_dupacks = 0;
2159  tc->snd_sack_pos = 0;
2160  tc->dupacks_out += n_acks;
2161  return n_acks;
2162  }
2163  else
2164  {
2165  TCP_DBG ("constrained by burst size");
2166  tc->pending_dupacks = n_acks - max_burst_size;
2167  tc->dupacks_out += max_burst_size;
2168  tcp_program_dupack (tc);
2169  return max_burst_size;
2170  }
2171 }
2172 
2173 static int
2175 {
2176  tcp_worker_ctx_t *wrk;
2177  u32 n_segs;
2178 
2179  if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
2180  return 0;
2181 
2182  wrk = tcp_get_worker (tc->c_thread_index);
2183 
2184  if (tcp_opts_sack_permitted (&tc->rcv_opts))
2185  n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
2186  else
2187  n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
2188 
2189  return n_segs;
2190 }
2191 
2192 int
2193 tcp_session_custom_tx (void *conn, u32 max_burst_size)
2194 {
2195  tcp_connection_t *tc = (tcp_connection_t *) conn;
2196  u32 n_segs = 0;
2197 
2198  if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
2199  {
2200  tc->flags &= ~TCP_CONN_RXT_PENDING;
2201  n_segs = tcp_do_retransmit (tc, max_burst_size);
2202  max_burst_size -= n_segs;
2203  }
2204 
2205  if (!(tc->flags & TCP_CONN_SNDACK))
2206  return n_segs;
2207 
2208  tc->flags &= ~TCP_CONN_SNDACK;
2209 
2210  /* We have retransmitted packets and no dupack */
2211  if (n_segs && !tc->pending_dupacks)
2212  return n_segs;
2213 
2214  if (!max_burst_size)
2215  {
2216  tcp_program_ack (tc);
2217  return max_burst_size;
2218  }
2219 
2220  n_segs += tcp_send_acks (tc, max_burst_size);
2221 
2222  return n_segs;
2223 }
2224 #endif /* CLIB_MARCH_VARIANT */
2225 
2226 static void
2228  u16 * next0, u32 * error0)
2229 {
2230  ip_adjacency_t *adj;
2231  adj_index_t ai;
2232 
2233  /* Not thread safe but as long as the connection exists the adj should
2234  * not be removed */
2235  ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2236  tc0->sw_if_index);
2237  if (ai == ADJ_INDEX_INVALID)
2238  {
2239  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2240  *next0 = TCP_OUTPUT_NEXT_DROP;
2241  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2242  return;
2243  }
2244 
2245  adj = adj_get (ai);
2247  *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2248  else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2249  *next0 = TCP_OUTPUT_NEXT_IP_ARP;
2250  else
2251  {
2252  *next0 = TCP_OUTPUT_NEXT_DROP;
2253  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2254  }
2255  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
2256 }
2257 
2258 static void
2260  u32 * to_next, u32 n_bufs)
2261 {
2262  u32 n_trace = vlib_get_trace_count (vm, node);
2263  tcp_connection_t *tc;
2264  tcp_tx_trace_t *t;
2265  vlib_buffer_t *b;
2266  tcp_header_t *th;
2267  int i;
2268 
2269  for (i = 0; i < clib_min (n_trace, n_bufs); i++)
2270  {
2271  b = vlib_get_buffer (vm, to_next[i]);
2272  th = vlib_buffer_get_current (b);
2273  tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2274  vm->thread_index);
2275  t = vlib_add_trace (vm, node, b, sizeof (*t));
2276  clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2277  clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
2278  }
2279 }
2280 
2281 always_inline void
2283  tcp_connection_t * tc0, u8 is_ip4)
2284 {
2285  u8 __clib_unused *ih0;
2286  tcp_header_t __clib_unused *th0 = vlib_buffer_get_current (b0);
2287 
2288  TCP_EVT (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
2289 
2290  if (is_ip4)
2291  ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2292  IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
2293  else
2294  ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2295  IP_PROTOCOL_TCP);
2296 
2297 }
2298 
2299 always_inline void
2301 {
2302  if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
2303  return;
2304 
2305  u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
2306 
2307  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2309 
2310  if (PREDICT_TRUE (data_len <= tc->snd_mss))
2311  return;
2312  else
2313  {
2314  ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2315  ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2316  b->flags |= VNET_BUFFER_F_GSO;
2317  vnet_buffer2 (b)->gso_l4_hdr_sz =
2318  sizeof (tcp_header_t) + tc->snd_opts_len;
2319  vnet_buffer2 (b)->gso_size = tc->snd_mss;
2320  }
2321 }
2322 
2323 always_inline void
2325  vlib_node_runtime_t * error_node, u16 * next0,
2326  u8 is_ip4)
2327 {
2328  /* If next_index is not drop use it */
2329  if (tc0->next_node_index)
2330  {
2331  *next0 = tc0->next_node_index;
2332  vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2333  }
2334  else
2335  {
2336  *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2337  }
2338 
2339  vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2340  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
2341 
2342  if (!is_ip4)
2343  {
2344  u32 error0 = 0;
2345 
2346  if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
2347  tcp_output_handle_link_local (tc0, b0, next0, &error0);
2348 
2349  if (PREDICT_FALSE (error0))
2350  {
2351  b0->error = error_node->errors[error0];
2352  return;
2353  }
2354  }
2355 
2356  if (!TCP_ALWAYS_ACK)
2357  tcp_timer_reset (tc0, TCP_TIMER_DELACK);
2358 
2359  tc0->segs_out += 1;
2360 }
2361 
2364  vlib_frame_t * frame, int is_ip4)
2365 {
2366  u32 n_left_from, *from, thread_index = vm->thread_index;
2367  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2368  u16 nexts[VLIB_FRAME_SIZE], *next;
2369  vlib_node_runtime_t *error_node;
2370 
2371  error_node = vlib_node_get_runtime (vm, tcp_node_index (output, is_ip4));
2372 
2373  from = vlib_frame_vector_args (frame);
2374  n_left_from = frame->n_vectors;
2375  tcp_set_time_now (tcp_get_worker (thread_index));
2376 
2378  tcp46_output_trace_frame (vm, node, from, n_left_from);
2379 
2380  vlib_get_buffers (vm, from, bufs, n_left_from);
2381  b = bufs;
2382  next = nexts;
2383 
2384  while (n_left_from >= 4)
2385  {
2386  tcp_connection_t *tc0, *tc1;
2387 
2388  {
2389  vlib_prefetch_buffer_header (b[2], STORE);
2390  CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2391 
2392  vlib_prefetch_buffer_header (b[3], STORE);
2393  CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2394  }
2395 
2396  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2397  thread_index);
2398  tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2399  thread_index);
2400 
2401  if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2402  {
2403  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2404  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2405 
2406  tcp_check_if_gso (tc0, b[0]);
2407  tcp_check_if_gso (tc1, b[1]);
2408 
2409  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
2410  tcp_output_handle_packet (tc1, b[1], error_node, &next[1], is_ip4);
2411  }
2412  else
2413  {
2414  if (tc0 != 0)
2415  {
2416  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2417  tcp_check_if_gso (tc0, b[0]);
2418  tcp_output_handle_packet (tc0, b[0], error_node, &next[0],
2419  is_ip4);
2420  }
2421  else
2422  {
2423  b[0]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2424  next[0] = TCP_OUTPUT_NEXT_DROP;
2425  }
2426  if (tc1 != 0)
2427  {
2428  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2429  tcp_check_if_gso (tc1, b[1]);
2430  tcp_output_handle_packet (tc1, b[1], error_node, &next[1],
2431  is_ip4);
2432  }
2433  else
2434  {
2435  b[1]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2436  next[1] = TCP_OUTPUT_NEXT_DROP;
2437  }
2438  }
2439 
2440  b += 2;
2441  next += 2;
2442  n_left_from -= 2;
2443  }
2444  while (n_left_from > 0)
2445  {
2446  tcp_connection_t *tc0;
2447 
2448  if (n_left_from > 1)
2449  {
2450  vlib_prefetch_buffer_header (b[1], STORE);
2451  CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2452  }
2453 
2454  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2455  thread_index);
2456 
2457  if (PREDICT_TRUE (tc0 != 0))
2458  {
2459  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2460  tcp_check_if_gso (tc0, b[0]);
2461  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
2462  }
2463  else
2464  {
2465  b[0]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2466  next[0] = TCP_OUTPUT_NEXT_DROP;
2467  }
2468 
2469  b += 1;
2470  next += 1;
2471  n_left_from -= 1;
2472  }
2473 
2474  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2475  vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2476  TCP_ERROR_PKTS_SENT, frame->n_vectors);
2477  return frame->n_vectors;
2478 }
2479 
2481  vlib_frame_t * from_frame)
2482 {
2483  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2484 }
2485 
2487  vlib_frame_t * from_frame)
2488 {
2489  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2490 }
2491 
2492 /* *INDENT-OFF* */
2494 {
2495  .name = "tcp4-output",
2496  /* Takes a vector of packets. */
2497  .vector_size = sizeof (u32),
2498  .n_errors = TCP_N_ERROR,
2499  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2500  .error_strings = tcp_error_strings,
2501  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2502  .next_nodes = {
2503 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2505 #undef _
2506  },
2507  .format_buffer = format_tcp_header,
2508  .format_trace = format_tcp_tx_trace,
2509 };
2510 /* *INDENT-ON* */
2511 
2512 /* *INDENT-OFF* */
2514 {
2515  .name = "tcp6-output",
2516  /* Takes a vector of packets. */
2517  .vector_size = sizeof (u32),
2518  .n_errors = TCP_N_ERROR,
2519  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2520  .error_strings = tcp_error_strings,
2521  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2522  .next_nodes = {
2523 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2525 #undef _
2526  },
2527  .format_buffer = format_tcp_header,
2528  .format_trace = format_tcp_tx_trace,
2529 };
2530 /* *INDENT-ON* */
2531 
2532 typedef enum _tcp_reset_next
2533 {
2538 
2539 #define foreach_tcp4_reset_next \
2540  _(DROP, "error-drop") \
2541  _(IP_LOOKUP, "ip4-lookup")
2542 
2543 #define foreach_tcp6_reset_next \
2544  _(DROP, "error-drop") \
2545  _(IP_LOOKUP, "ip6-lookup")
2546 
2547 static uword
2549  vlib_frame_t * from_frame, u8 is_ip4)
2550 {
2551  u32 n_left_from, next_index, *from, *to_next;
2552  u32 my_thread_index = vm->thread_index;
2553 
2554  from = vlib_frame_vector_args (from_frame);
2555  n_left_from = from_frame->n_vectors;
2556 
2557  next_index = node->cached_next_index;
2558 
2559  while (n_left_from > 0)
2560  {
2561  u32 n_left_to_next;
2562 
2563  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2564 
2565  while (n_left_from > 0 && n_left_to_next > 0)
2566  {
2567  u32 bi0;
2568  vlib_buffer_t *b0;
2569  tcp_tx_trace_t *t0;
2570  tcp_header_t *th0;
2571  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
2572 
2573  bi0 = from[0];
2574  to_next[0] = bi0;
2575  from += 1;
2576  to_next += 1;
2577  n_left_from -= 1;
2578  n_left_to_next -= 1;
2579 
2580  b0 = vlib_get_buffer (vm, bi0);
2581 
2582  if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
2583  my_thread_index, is_ip4))
2584  {
2585  error0 = TCP_ERROR_LOOKUP_DROPS;
2586  next0 = TCP_RESET_NEXT_DROP;
2587  goto done;
2588  }
2589 
2590  /* Prepare to send to IP lookup */
2591  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2592  next0 = TCP_RESET_NEXT_IP_LOOKUP;
2593 
2594  done:
2595  b0->error = node->errors[error0];
2596  b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2597  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2598  {
2599  th0 = vlib_buffer_get_current (b0);
2600  if (is_ip4)
2601  th0 = ip4_next_header ((ip4_header_t *) th0);
2602  else
2603  th0 = ip6_next_header ((ip6_header_t *) th0);
2604  t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
2605  clib_memcpy_fast (&t0->tcp_header, th0,
2606  sizeof (t0->tcp_header));
2607  }
2608 
2609  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2610  n_left_to_next, bi0, next0);
2611  }
2612  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2613  }
2614  return from_frame->n_vectors;
2615 }
2616 
2618  vlib_frame_t * from_frame)
2619 {
2620  return tcp46_send_reset_inline (vm, node, from_frame, 1);
2621 }
2622 
2624  vlib_frame_t * from_frame)
2625 {
2626  return tcp46_send_reset_inline (vm, node, from_frame, 0);
2627 }
2628 
2629 /* *INDENT-OFF* */
2631  .name = "tcp4-reset",
2632  .vector_size = sizeof (u32),
2633  .n_errors = TCP_N_ERROR,
2634  .error_strings = tcp_error_strings,
2635  .n_next_nodes = TCP_RESET_N_NEXT,
2636  .next_nodes = {
2637 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2639 #undef _
2640  },
2641  .format_trace = format_tcp_tx_trace,
2642 };
2643 /* *INDENT-ON* */
2644 
2645 /* *INDENT-OFF* */
2647  .name = "tcp6-reset",
2648  .vector_size = sizeof (u32),
2649  .n_errors = TCP_N_ERROR,
2650  .error_strings = tcp_error_strings,
2651  .n_next_nodes = TCP_RESET_N_NEXT,
2652  .next_nodes = {
2653 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2655 #undef _
2656  },
2657  .format_trace = format_tcp_tx_trace,
2658 };
2659 /* *INDENT-ON* */
2660 
2661 /*
2662  * fd.io coding-style-patch-verification: ON
2663  *
2664  * Local Variables:
2665  * eval: (c-set-style "gnu")
2666  * End:
2667  */
void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:569
#define tcp_in_cong_recovery(tc)
Definition: tcp.h:474
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:124
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:2300
static void tcp_check_sack_reneging(tcp_connection_t *tc)
Definition: tcp_output.c:1419
void session_flush_frames_main_thread(vlib_main_t *vm)
Definition: session.c:1489
End of options.
Definition: tcp_packet.h:104
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_output.c:175
u32 flags
Definition: vhost_user.h:141
#define clib_min(x, y)
Definition: clib.h:295
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:2120
#define TCP_OPTION_LEN_EOL
Definition: tcp_packet.h:162
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:479
#define CLIB_UNUSED(x)
Definition: clib.h:82
#define tcp_in_recovery(tc)
Definition: tcp.h:465
static f64 tcp_time_now_us(u32 thread_index)
Definition: tcp.h:1021
static void tcp_retransmit_timer_set(tcp_connection_t *tc)
Definition: tcp.h:1144
static u32 transport_rx_fifo_size(transport_connection_t *tc)
Definition: session.h:490
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
#define seq_leq(_s1, _s2)
Definition: tcp.h:867
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:187
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:865
ip4_address_t src_address
Definition: ip4_packet.h:170
#define tcp_node_index(node_id, is_ip4)
Definition: tcp.h:677
static void session_add_pending_tx_buffer(session_type_t st, u32 thread_index, u32 bi)
Definition: session.h:627
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:502
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
Definition: tcp.h:95
#define vnet_buffer2(b)
Definition: buffer.h:424
Selective Ack permitted.
Definition: tcp_packet.h:108
#define TCP_FLAG_SYN
Definition: fa_node.h:13
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:604
#define PREDICT_TRUE(x)
Definition: clib.h:112
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:110
static void tcp_flush_frame_to_ip_lookup(tcp_worker_ctx_t *wrk, u8 is_ip4)
Flush ip lookup tx frames populated by timer pops.
Definition: tcp_output.c:989
static tcp_connection_t * tcp_connection_get_if_valid(u32 conn_index, u32 thread_index)
Definition: tcp.h:723
#define clib_memcpy_fast(a, b, c)
Definition: string.h:81
#define NULL
Definition: clib.h:58
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
struct _sack_scoreboard sack_scoreboard_t
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:2174
IP unicast adjacency.
Definition: adj.h:221
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
Definition: fib_table.c:972
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
Definition: tcp.h:770
#define tcp_zero_rwnd_sent_off(tc)
Definition: tcp.h:488
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
Definition: session.c:122
void tcp_timer_retransmit_handler(u32 tc_index)
Definition: tcp_output.c:1457
struct _tcp_main tcp_main_t
u32 thread_index
Definition: main.h:218
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:520
This packet is to be rewritten and forwarded to the next processing node.
Definition: adj.h:73
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:113
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Definition: tcp_output.c:1781
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:173
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP&#39;s initial window.
Definition: tcp_output.c:82
enum _tcp_output_next tcp_output_next_t
vl_api_address_t src
Definition: gre.api:51
int i
static u32 format_get_indent(u8 *s)
Definition: format.h:72
uword ip_csum_t
Definition: ip_packet.h:219
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
Definition: ip_packet.h:222
struct _tcp_connection tcp_connection_t
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
Definition: tcp.h:970
#define tcp_opts_sack(_to)
Definition: tcp_packet.h:158
u8 data[128]
Definition: ipsec.api:251
#define VLIB_NODE_FN(node)
Definition: node.h:202
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:894
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:450
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:2548
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:470
No operation.
Definition: tcp_packet.h:105
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:366
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:211
#define scoreboard_rescue_rxt_valid(_sb, _tc)
Definition: tcp_output.c:1864
ip6_address_t src_address
Definition: ip6_packet.h:383
unsigned char u8
Definition: types.h:56
struct _sack_scoreboard_hole sack_scoreboard_hole_t
u8 wscale
Option flags, see above.
Definition: tcp_packet.h:146
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:174
double f64
Definition: types.h:142
vlib_node_registration_t ip4_lookup_node
(constructor) VLIB_REGISTER_NODE (ip4_lookup_node)
Definition: ip4_forward.c:101
#define tcp_csum_offload(tc)
Definition: tcp.h:477
#define foreach_tcp4_reset_next
Definition: tcp_output.c:2539
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
Definition: tcp_output.c:1377
u16 src_port
Definition: udp.api:41
u8 session_type_t
Limit MSS.
Definition: tcp_packet.h:106
#define tcp_zero_rwnd_sent_on(tc)
Definition: tcp.h:487
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
Definition: ip.h:184
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
Definition: session.c:856
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
Definition: tcp_output.c:2363
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:438
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
Definition: adj.h:433
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
Definition: tcp_output.c:578
void tcp_timer_retransmit_syn_handler(u32 tc_index)
SYN retransmit timer handler.
Definition: tcp_output.c:1606
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
Definition: tcp_output.c:1265
#define seq_gt(_s1, _s2)
Definition: tcp.h:868
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
Definition: tcp.h:739
#define tcp_cfg
Definition: tcp.h:676
vl_api_interface_index_t sw_if_index
Definition: gre.api:50
sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
Definition: tcp_input.c:671
#define always_inline
Definition: clib.h:98
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
ip4_address_t dst_address
Definition: ip4_packet.h:170
#define TCP_FLAG_ACK
Definition: fa_node.h:16
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:129
static void tcp_cc_loss(tcp_connection_t *tc)
Definition: tcp.h:1066
tcp_main_t tcp_main
Definition: tcp.c:30
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
Definition: tcp.h:693
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:203
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
Definition: main.c:185
enum _tcp_state tcp_state_t
#define TCP_ALWAYS_ACK
On/off delayed acks.
Definition: tcp.h:39
#define TCP_RTO_MAX
Definition: tcp.h:99
vhost_vring_state_t state
Definition: vhost_user.h:146
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:241
static u32 tcp_time_now(void)
Definition: tcp.h:999
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:150
unsigned int u32
Definition: types.h:88
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
Definition: tcp_output.c:2259
#define TCP_ESTABLISH_TIME
Definition: tcp.h:105
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_sent_1_smss, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
Definition: tcp_input.c:865
#define tcp_validate_txf_size(_tc, _a)
Definition: tcp.h:1211
#define VLIB_FRAME_SIZE
Definition: node.h:378
static void tcp_enqueue_to_ip_lookup_now(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:662
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
Definition: tcp_output.c:1064
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:257
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:1137
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:2646
#define TCP_RTO_SYN_RETRIES
Definition: tcp.h:102
#define tcp_zero_rwnd_sent(tc)
Definition: tcp.h:486
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:136
#define tcp_trajectory_add_start(b, start)
Definition: tcp.h:706
#define TRANSPORT_MAX_HDRS_LEN
#define TRANSPORT_PACER_MIN_MSS
Definition: transport.h:22
static session_type_t session_type_from_proto_and_ip(transport_proto_t proto, u8 is_ip4)
vlib_main_t * vm
convenience pointer to this thread&#39;s vlib main
Definition: tcp.h:525
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:861
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:962
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
Definition: adj_types.h:36
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:285
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:256
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:350
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
Definition: tcp_output.c:1872
#define tcp_in_fastrecovery(tc)
Definition: tcp.h:464
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1402
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:1255
#define tcp_opts_mss(_to)
Definition: tcp_packet.h:155
unsigned short u16
Definition: types.h:57
void tcp_flush_frames_to_output(tcp_worker_ctx_t *wrk)
Flush v4 and v6 tcp and ip-lookup tx frames for thread index.
Definition: tcp_output.c:1005
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
Definition: main.c:194
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:229
#define TCP_TIMER_HANDLE_INVALID
Definition: tcp.h:92
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
Definition: tcp_output.c:2227
#define foreach_tcp6_output_next
Definition: tcp_output.c:34
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
Definition: tcp.h:893
#define PREDICT_FALSE(x)
Definition: clib.h:111
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1194
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b0, tcp_state_t state, u8 thread_index, u8 is_ip4)
Definition: tcp_output.c:693
#define TCP_FLAG_FIN
Definition: fa_node.h:12
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1821
static u8 tcp_window_compute_scale(u32 window)
Definition: tcp_output.c:70
int tcp_session_custom_tx(void *conn, u32 max_burst_size)
Definition: tcp_output.c:2193
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:218
vl_api_address_t dst
Definition: gre.api:52
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:338
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
#define foreach_tcp4_output_next
Definition: tcp_output.c:28
#define TCP_RXT_MAX_BURST
Definition: tcp.h:35
#define TCP_WND_MAX
Definition: tcp_packet.h:171
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:669
Selective Ack block.
Definition: tcp_packet.h:109
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
Definition: node_funcs.h:1150
#define TCP_FLAG_RST
Definition: fa_node.h:14
#define TCP_DBG(_fmt, _args...)
Definition: tcp_debug.h:146
u8 len
Definition: ip_types.api:90
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
Definition: tcp.h:1116
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
Definition: tcp_output.c:2324
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
Definition: tcp_input.c:925
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
Definition: adj.h:63
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:1292
tcp_header_t tcp_header
Definition: tcp_output.c:48
u16 n_vectors
Definition: node.h:397
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
Definition: tcp_input.c:966
static_always_inline uword vlib_get_thread_index(void)
Definition: threads.h:213
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:80
vlib_main_t * vm
Definition: buffer.c:323
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
Definition: buffer_node.h:332
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1239
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1206
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
Definition: tcp.h:1014
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:778
#define clib_warning(format, args...)
Definition: error.h:59
static vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
Get node runtime by node index.
Definition: node_funcs.h:89
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
Definition: tcp_bt.c:297
format_function_t format_tcp_header
Definition: format.h:101
struct _transport_connection transport_connection_t
#define TCP_USE_SACKS
Disable only for testing.
Definition: tcp.h:40
#define tcp_recovery_on(tc)
Definition: tcp.h:462
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:162
#define tcp_fastrecovery_first(tc)
Definition: tcp.h:470
u32 adj_index_t
An index for adjacencies.
Definition: adj_types.h:30
#define ARRAY_LEN(x)
Definition: clib.h:62
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:456
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:147
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:410
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
Definition: tcp_output.c:2039
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:558
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
Definition: session.h:476
static void tcp_timer_update(tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp.h:1129
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1010
signed int i32
Definition: types.h:77
vlib_node_registration_t ip6_lookup_node
(constructor) VLIB_REGISTER_NODE (ip6_lookup_node)
Definition: ip6_forward.c:656
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:320
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:515
#define ASSERT(truth)
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
Definition: tcp_output.c:1435
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
Definition: tcp_output.c:2282
#define tcp_syn(_th)
Definition: tcp_packet.h:80
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:53
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: ip4_forward.c:1299
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:390
#define seq_geq(_s1, _s2)
Definition: tcp.h:869
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
Definition: ip6_packet.h:326
#define clib_mem_unaligned(pointer, type)
Definition: types.h:155
#define tcp_fastrecovery_first_off(tc)
Definition: tcp.h:472
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
Definition: tcp_output.c:113
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:1015
#define clib_max(x, y)
Definition: clib.h:288
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1165
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
Definition: transport.c:701
#define seq_lt(_s1, _s2)
Definition: tcp.h:866
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
Definition: tcp_output.c:1762
template key/value backing page structure
Definition: bihash_doc.h:44
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:370
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
Definition: defs.h:47
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
Definition: tcp_bt.c:282
u32 tsval
Timestamp value.
Definition: tcp_packet.h:148
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:149
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6.h:662
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
Definition: tcp_output.c:1858
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
Definition: adj.h:236
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:140
#define foreach_tcp6_reset_next
Definition: tcp_output.c:2543
sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp_input.c:695
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:489
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:687
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
Definition: session.c:944
static void tcp_retransmit_timer_update(tcp_connection_t *tc)
Definition: tcp.h:1192
VLIB buffer representation.
Definition: buffer.h:102
u64 uword
Definition: types.h:112
#define seq_max(_s1, _s2)
Definition: tcp.h:870
static void tcp_enqueue_to_ip_lookup_i(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index, u8 flush)
Definition: tcp_output.c:626
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:244
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:525
void tcp_timer_delack_handler(u32 index)
Delayed ack timer handler.
Definition: tcp_output.c:1221
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc)
Reset tx pacer bucket.
Definition: transport.c:640
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:453
struct clib_bihash_value offset
template key/value backing page structure
static void tcp_retransmit_timer_force_update(tcp_connection_t *tc)
Definition: tcp.h:1158
u8 * format_tcp_connection(u8 *s, va_list *args)
Definition: tcp.c:1060
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:101
#define vnet_buffer(b)
Definition: buffer.h:365
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp.h:714
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
Definition: tcp.h:1085
void tcp_update_rto(tcp_connection_t *tc)
Definition: tcp_input.c:478
int session_stream_connect_notify(transport_connection_t *tc, u8 is_fail)
Definition: session.c:756
static u32 vlib_num_workers()
Definition: threads.h:367
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:239
static u32 tcp_buffer_len(vlib_buffer_t *b)
Definition: tcp_output.c:1128
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
Definition: tcp_output.c:1845
#define TCP_OPTION_LEN_NOOP
Definition: tcp_packet.h:163
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:927
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2513
u16 flags
Copy of main node flags.
Definition: node.h:509
Window scale.
Definition: tcp_packet.h:107
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:499
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
Definition: session.h:469
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:898
tcp_connection_t tcp_connection
Definition: tcp_output.c:49
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1184
u16 dst_port
Definition: udp.api:42
vlib_frame_t * ip_lookup_tx_frames[2]
tx frames for ip 4/6 lookup nodes
Definition: tcp.h:516
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:421
u8 ip_version_and_header_length
Definition: ip4_packet.h:138
Timestamps.
Definition: tcp_packet.h:110
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
Definition: buffer_funcs.h:244
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:2630
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:302
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2493
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:167
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:678
static u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:612
static void tcp_persist_timer_set(tcp_connection_t *tc)
Definition: tcp.h:1165
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:681
#define TCP_RTO_BOFF_MAX
Definition: tcp.h:104
static char * tcp_error_strings[]
Definition: tcp_output.c:40
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
Definition: ip4.h:384
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:85
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
Definition: tcp.h:1027
void tcp_timer_persist_handler(u32 index)
Got 0 snd_wnd from peer, try to do something about it.
Definition: tcp_output.c:1675
#define tcp_ack(_th)
Definition: tcp_packet.h:83
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
Definition: tcp_bt.c:333
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
Definition: transport.c:663
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp.h:1206
Definition: defs.h:46
static void tcp_cc_congestion(tcp_connection_t *tc)
Definition: tcp.h:1060
ip6_address_t dst_address
Definition: ip6_packet.h:383
u32 * tx_buffers
tx buffer free list
Definition: tcp.h:513
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
Definition: adj_nbr.c:99
#define TCP_EVT(_evt, _args...)
Definition: tcp_debug.h:145
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:355
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:128