FD.io VPP  v20.01-48-g3e0dafb74
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <math.h>
18 
19 typedef enum _tcp_output_next
20 {
27 
28 #define foreach_tcp4_output_next \
29  _ (DROP, "error-drop") \
30  _ (IP_LOOKUP, "ip4-lookup") \
31  _ (IP_REWRITE, "ip4-rewrite") \
32  _ (IP_ARP, "ip4-arp")
33 
34 #define foreach_tcp6_output_next \
35  _ (DROP, "error-drop") \
36  _ (IP_LOOKUP, "ip6-lookup") \
37  _ (IP_REWRITE, "ip6-rewrite") \
38  _ (IP_ARP, "ip6-discover-neighbor")
39 
40 static char *tcp_error_strings[] = {
41 #define tcp_error(n,s) s,
42 #include <vnet/tcp/tcp_error.def>
43 #undef tcp_error
44 };
45 
46 typedef struct
47 {
51 
52 static u8 *
53 format_tcp_tx_trace (u8 * s, va_list * args)
54 {
55  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
56  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
57  tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
59  u32 indent = format_get_indent (s);
60 
61  s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
62  format_tcp_state, tc->state, format_white_space, indent,
63  format_tcp_header, &t->tcp_header, 128);
64 
65  return s;
66 }
67 
68 #ifndef CLIB_MARCH_VARIANT
69 static u8
71 {
72  u8 wnd_scale = 0;
73  while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
74  wnd_scale++;
75  return wnd_scale;
76 }
77 
78 /**
79  * TCP's initial window
80  */
83 {
84  /* RFC 6928 recommends the value lower. However at the time our connections
85  * are initialized, fifos may not be allocated. Therefore, advertise the
86  * smallest possible unscaled window size and update once fifos are
87  * assigned to the session.
88  */
89  /*
90  tcp_update_rcv_mss (tc);
91  TCP_IW_N_SEGMENTS * tc->mss;
92  */
93  return tcp_cfg.min_rx_fifo;
94 }
95 
96 /**
97  * Compute initial window and scale factor. As per RFC1323, window field in
98  * SYN and SYN-ACK segments is never scaled.
99  */
100 u32
102 {
103  /* Compute rcv wscale only if peer advertised support for it */
104  if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
105  tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
106 
107  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
108 
109  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
110 }
111 
112 static inline void
114 {
115  u32 available_space, wnd;
116  i32 observed_wnd;
117 
118  ASSERT (tc->rcv_opts.mss < transport_rx_fifo_size (&tc->connection));
119 
120  /*
121  * Figure out how much space we have available
122  */
123  available_space = transport_max_rx_enqueue (&tc->connection);
124  if (PREDICT_FALSE (available_space < tc->rcv_opts.mss))
125  {
126  tc->rcv_wnd = 0;
127  return;
128  }
129 
130  /*
131  * Use the above and what we know about what we've previously advertised
132  * to compute the new window
133  */
134  observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
135 
136  /* Bad. Thou shalt not shrink */
137  if (PREDICT_FALSE ((i32) available_space < observed_wnd))
138  {
139  wnd = clib_max (observed_wnd, 0);
140  TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
141  }
142  else
143  {
144  wnd = available_space;
145  }
146 
147  /* Make sure we have a multiple of rcv_wscale */
148  if (wnd && tc->rcv_wscale)
149  {
150  wnd &= ~((1 << tc->rcv_wscale) - 1);
151  if (wnd == 0)
152  wnd = 1 << tc->rcv_wscale;
153  }
154 
155  tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
156 }
157 
158 /**
159  * Compute and return window to advertise, scaled as per RFC1323
160  */
161 static inline u32
163 {
164  if (state < TCP_STATE_ESTABLISHED)
166 
167  tcp_update_rcv_wnd (tc);
168  return tc->rcv_wnd >> tc->rcv_wscale;
169 }
170 
171 /**
172  * Write TCP options to segment.
173  */
174 static u32
176 {
177  u32 opts_len = 0;
178  u32 buf, seq_len = 4;
179 
180  if (tcp_opts_mss (opts))
181  {
182  *data++ = TCP_OPTION_MSS;
183  *data++ = TCP_OPTION_LEN_MSS;
184  buf = clib_host_to_net_u16 (opts->mss);
185  clib_memcpy_fast (data, &buf, sizeof (opts->mss));
186  data += sizeof (opts->mss);
187  opts_len += TCP_OPTION_LEN_MSS;
188  }
189 
190  if (tcp_opts_wscale (opts))
191  {
192  *data++ = TCP_OPTION_WINDOW_SCALE;
193  *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
194  *data++ = opts->wscale;
195  opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
196  }
197 
198  if (tcp_opts_sack_permitted (opts))
199  {
200  *data++ = TCP_OPTION_SACK_PERMITTED;
202  opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
203  }
204 
205  if (tcp_opts_tstamp (opts))
206  {
207  *data++ = TCP_OPTION_TIMESTAMP;
208  *data++ = TCP_OPTION_LEN_TIMESTAMP;
209  buf = clib_host_to_net_u32 (opts->tsval);
210  clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
211  data += sizeof (opts->tsval);
212  buf = clib_host_to_net_u32 (opts->tsecr);
213  clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
214  data += sizeof (opts->tsecr);
215  opts_len += TCP_OPTION_LEN_TIMESTAMP;
216  }
217 
218  if (tcp_opts_sack (opts))
219  {
220  int i;
221 
222  if (opts->n_sack_blocks != 0)
223  {
224  *data++ = TCP_OPTION_SACK_BLOCK;
225  *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
226  for (i = 0; i < opts->n_sack_blocks; i++)
227  {
228  buf = clib_host_to_net_u32 (opts->sacks[i].start);
229  clib_memcpy_fast (data, &buf, seq_len);
230  data += seq_len;
231  buf = clib_host_to_net_u32 (opts->sacks[i].end);
232  clib_memcpy_fast (data, &buf, seq_len);
233  data += seq_len;
234  }
235  opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
236  }
237  }
238 
239  /* Terminate TCP options */
240  if (opts_len % 4)
241  {
242  *data++ = TCP_OPTION_EOL;
243  opts_len += TCP_OPTION_LEN_EOL;
244  }
245 
246  /* Pad with zeroes to a u32 boundary */
247  while (opts_len % 4)
248  {
249  *data++ = TCP_OPTION_NOOP;
250  opts_len += TCP_OPTION_LEN_NOOP;
251  }
252  return opts_len;
253 }
254 
255 static int
257 {
258  u8 len = 0;
259 
260  opts->flags |= TCP_OPTS_FLAG_MSS;
261  opts->mss = tc->mss;
262  len += TCP_OPTION_LEN_MSS;
263 
264  opts->flags |= TCP_OPTS_FLAG_WSCALE;
265  opts->wscale = tc->rcv_wscale;
267 
268  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
269  opts->tsval = tcp_time_now ();
270  opts->tsecr = 0;
272 
273  if (TCP_USE_SACKS)
274  {
275  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
277  }
278 
279  /* Align to needed boundary */
280  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
281  return len;
282 }
283 
284 static int
286 {
287  u8 len = 0;
288 
289  opts->flags |= TCP_OPTS_FLAG_MSS;
290  opts->mss = tc->mss;
291  len += TCP_OPTION_LEN_MSS;
292 
293  if (tcp_opts_wscale (&tc->rcv_opts))
294  {
295  opts->flags |= TCP_OPTS_FLAG_WSCALE;
296  opts->wscale = tc->rcv_wscale;
298  }
299 
300  if (tcp_opts_tstamp (&tc->rcv_opts))
301  {
302  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
303  opts->tsval = tcp_time_now ();
304  opts->tsecr = tc->tsval_recent;
306  }
307 
308  if (tcp_opts_sack_permitted (&tc->rcv_opts))
309  {
310  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
312  }
313 
314  /* Align to needed boundary */
315  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
316  return len;
317 }
318 
319 static int
321 {
322  u8 len = 0;
323 
324  opts->flags = 0;
325 
326  if (tcp_opts_tstamp (&tc->rcv_opts))
327  {
328  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
329  opts->tsval = tcp_tstamp (tc);
330  opts->tsecr = tc->tsval_recent;
332  }
333  if (tcp_opts_sack_permitted (&tc->rcv_opts))
334  {
335  if (vec_len (tc->snd_sacks))
336  {
337  opts->flags |= TCP_OPTS_FLAG_SACK;
338  if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
339  tc->snd_sack_pos = 0;
340  opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
341  opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
342  opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
344  tc->snd_sack_pos += opts->n_sack_blocks;
345  len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
346  }
347  }
348 
349  /* Align to needed boundary */
350  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
351  return len;
352 }
353 
354 always_inline int
357 {
358  switch (state)
359  {
360  case TCP_STATE_ESTABLISHED:
361  case TCP_STATE_CLOSE_WAIT:
362  case TCP_STATE_FIN_WAIT_1:
363  case TCP_STATE_LAST_ACK:
364  case TCP_STATE_CLOSING:
365  case TCP_STATE_FIN_WAIT_2:
366  case TCP_STATE_TIME_WAIT:
367  case TCP_STATE_CLOSED:
368  return tcp_make_established_options (tc, opts);
369  case TCP_STATE_SYN_RCVD:
370  return tcp_make_synack_options (tc, opts);
371  case TCP_STATE_SYN_SENT:
372  return tcp_make_syn_options (tc, opts);
373  default:
374  clib_warning ("State not handled! %d", state);
375  return 0;
376  }
377 }
378 
379 /**
380  * Update burst send vars
381  *
382  * - Updates snd_mss to reflect the effective segment size that we can send
383  * by taking into account all TCP options, including SACKs.
384  * - Cache 'on the wire' options for reuse
385  * - Updates receive window which can be reused for a burst.
386  *
387  * This should *only* be called when doing bursts
388  */
389 void
391 {
392  tcp_main_t *tm = &tcp_main;
393 
394  /* Compute options to be used for connection. These may be reused when
395  * sending data or to compute the effective mss (snd_mss) */
396  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
397  TCP_STATE_ESTABLISHED);
398 
399  /* XXX check if MTU has been updated */
400  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
401  ASSERT (tc->snd_mss > 0);
402 
403  tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
404  &tc->snd_opts);
405 
406  tcp_update_rcv_wnd (tc);
407 
408  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
410 
411  if (tc->snd_una == tc->snd_nxt)
412  {
415  }
416 }
417 
418 #endif /* CLIB_MARCH_VARIANT */
419 
420 static void *
422 {
423  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
425  /* Zero all flags but free list index and trace flag */
426  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
427  b->current_data = 0;
428  b->current_length = 0;
430  vnet_buffer (b)->tcp.flags = 0;
431 
432  /* Leave enough space for headers */
434 }
435 
436 #ifndef CLIB_MARCH_VARIANT
437 static void *
439 {
440  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
441  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
443  b->current_data = 0;
444  vnet_buffer (b)->tcp.flags = 0;
446  /* Leave enough space for headers */
448 }
449 
450 
451 /* Compute TCP checksum in software when offloading is disabled for a connection */
452 u16
454  ip46_address_t * src, ip46_address_t * dst)
455 {
456  ip_csum_t sum0;
457  u16 payload_length_host_byte_order;
458  u32 i;
459 
460  /* Initialize checksum with ip header. */
461  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
462  clib_host_to_net_u16 (IP_PROTOCOL_TCP);
463  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
464 
465  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
466  {
467  sum0 = ip_csum_with_carry
468  (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
469  sum0 = ip_csum_with_carry
470  (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
471  }
472 
473  return ip_calculate_l4_checksum (vm, p0, sum0,
474  payload_length_host_byte_order, NULL, 0,
475  NULL);
476 }
477 
478 u16
480  ip46_address_t * src, ip46_address_t * dst)
481 {
482  ip_csum_t sum0;
483  u32 payload_length_host_byte_order;
484 
485  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
486  sum0 =
487  clib_host_to_net_u32 (payload_length_host_byte_order +
488  (IP_PROTOCOL_TCP << 16));
489 
490  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
491  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
492 
493  return ip_calculate_l4_checksum (vm, p0, sum0,
494  payload_length_host_byte_order, NULL, 0,
495  NULL);
496 }
497 
498 static inline u16
500 {
501  u16 checksum = 0;
502  if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
503  {
504  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
505  vlib_main_t *vm = wrk->vm;
506 
507  if (tc->c_is_ip4)
509  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
510  else
512  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
513  }
514  else
515  {
516  b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
517  }
518  return checksum;
519 }
520 
521 /**
522  * Prepare ACK
523  */
524 static inline void
526  u8 flags)
527 {
528  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
529  u8 tcp_opts_len, tcp_hdr_opts_len;
530  tcp_header_t *th;
531  u16 wnd;
532 
533  wnd = tcp_window_to_advertise (tc, state);
534 
535  /* Make and write options */
536  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
537  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
538 
539  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
540  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
541 
542  tcp_options_write ((u8 *) (th + 1), snd_opts);
543 
544  th->checksum = tcp_compute_checksum (tc, b);
545 
546  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
547 
548  if (wnd == 0)
550  else
552 }
553 
554 /**
555  * Convert buffer to ACK
556  */
557 static inline void
559 {
560  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
561  TCP_EVT (TCP_EVT_ACK_SENT, tc);
562  tc->rcv_las = tc->rcv_nxt;
563 }
564 
565 /**
566  * Convert buffer to FIN-ACK
567  */
568 void
570 {
571  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
572 }
573 
574 /**
575  * Convert buffer to SYN
576  */
577 void
579 {
580  u8 tcp_hdr_opts_len, tcp_opts_len;
581  tcp_header_t *th;
582  u16 initial_wnd;
583  tcp_options_t snd_opts;
584 
585  initial_wnd = tcp_initial_window_to_advertise (tc);
586 
587  /* Make and write options */
588  clib_memset (&snd_opts, 0, sizeof (snd_opts));
589  tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
590  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
591 
592  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
593  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
594  initial_wnd);
595  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
596  tcp_options_write ((u8 *) (th + 1), &snd_opts);
597  th->checksum = tcp_compute_checksum (tc, b);
598 }
599 
600 /**
601  * Convert buffer to SYN-ACK
602  */
603 void
605 {
606  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
607  u8 tcp_opts_len, tcp_hdr_opts_len;
608  tcp_header_t *th;
609  u16 initial_wnd;
610 
611  clib_memset (snd_opts, 0, sizeof (*snd_opts));
612  initial_wnd = tcp_initial_window_to_advertise (tc);
613  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
614  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
615 
616  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
617  tc->rcv_nxt, tcp_hdr_opts_len,
618  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
619  tcp_options_write ((u8 *) (th + 1), snd_opts);
620 
621  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
622  th->checksum = tcp_compute_checksum (tc, b);
623 }
624 
625 always_inline void
627  u8 is_ip4, u32 fib_index, u8 flush)
628 {
629  vlib_main_t *vm = wrk->vm;
630  u32 *to_next, next_index;
631  vlib_frame_t *f;
632 
633  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
634  b->error = 0;
635 
636  vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
637  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
638 
639  /* Send to IP lookup */
640  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
642 
643  f = wrk->ip_lookup_tx_frames[!is_ip4];
644  if (!f)
645  {
646  f = vlib_get_frame_to_node (vm, next_index);
647  ASSERT (f);
648  wrk->ip_lookup_tx_frames[!is_ip4] = f;
649  }
650 
651  to_next = vlib_frame_vector_args (f);
652  to_next[f->n_vectors] = bi;
653  f->n_vectors += 1;
654  if (flush || f->n_vectors == VLIB_FRAME_SIZE)
655  {
656  vlib_put_frame_to_node (vm, next_index, f);
657  wrk->ip_lookup_tx_frames[!is_ip4] = 0;
658  }
659 }
660 
661 static void
663  u32 bi, u8 is_ip4, u32 fib_index)
664 {
665  tcp_enqueue_to_ip_lookup_i (wrk, b, bi, is_ip4, fib_index, 1);
666 }
667 
668 static void
670  u8 is_ip4, u32 fib_index)
671 {
672  tcp_enqueue_to_ip_lookup_i (wrk, b, bi, is_ip4, fib_index, 0);
673  if (wrk->vm->thread_index == 0 && vlib_num_workers ())
675 }
676 
677 static void
679  u8 is_ip4)
680 {
681  session_type_t st;
682 
683  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
684  b->error = 0;
685 
686  st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TCP, is_ip4);
688 }
689 
690 #endif /* CLIB_MARCH_VARIANT */
691 
692 static int
694  tcp_state_t state, u8 thread_index, u8 is_ip4)
695 {
696  ip4_header_t *ih4;
697  ip6_header_t *ih6;
698  tcp_header_t *th0;
699  ip4_address_t src_ip40, dst_ip40;
700  ip6_address_t src_ip60, dst_ip60;
702  u32 tmp;
703  u32 seq, ack;
704  u8 flags;
705 
706  /* Find IP and TCP headers */
707  th0 = tcp_buffer_hdr (b0);
708 
709  /* Save src and dst ip */
710  if (is_ip4)
711  {
712  ih4 = vlib_buffer_get_current (b0);
713  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
714  src_ip40.as_u32 = ih4->src_address.as_u32;
715  dst_ip40.as_u32 = ih4->dst_address.as_u32;
716  }
717  else
718  {
719  ih6 = vlib_buffer_get_current (b0);
720  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
721  clib_memcpy_fast (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
722  clib_memcpy_fast (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t));
723  }
724 
725  src_port = th0->src_port;
726  dst_port = th0->dst_port;
727 
728  /* Try to determine what/why we're actually resetting */
729  if (state == TCP_STATE_CLOSED)
730  {
731  if (!tcp_syn (th0))
732  return -1;
733 
734  tmp = clib_net_to_host_u32 (th0->seq_number);
735 
736  /* Got a SYN for no listener. */
737  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
738  ack = clib_host_to_net_u32 (tmp + 1);
739  seq = 0;
740  }
741  else
742  {
743  flags = TCP_FLAG_RST;
744  seq = th0->ack_number;
745  ack = 0;
746  }
747 
748  tcp_reuse_buffer (vm, b0);
749  tcp_trajectory_add_start (b0, 4);
750  th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
751  sizeof (tcp_header_t), flags, 0);
752 
753  if (is_ip4)
754  {
755  ih4 = vlib_buffer_push_ip4 (vm, b0, &dst_ip40, &src_ip40,
756  IP_PROTOCOL_TCP, 1);
757  th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
758  }
759  else
760  {
761  int bogus = ~0;
762  ih6 = vlib_buffer_push_ip6 (vm, b0, &dst_ip60, &src_ip60,
763  IP_PROTOCOL_TCP);
764  th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
765  ASSERT (!bogus);
766  }
767 
768  return 0;
769 }
770 
771 #ifndef CLIB_MARCH_VARIANT
772 /**
773  * Send reset without reusing existing buffer
774  *
775  * It extracts connection info out of original packet
776  */
777 void
779  u32 thread_index, u8 is_ip4)
780 {
781  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
782  vlib_main_t *vm = wrk->vm;
783  vlib_buffer_t *b;
784  u32 bi, sw_if_index, fib_index;
785  u8 tcp_hdr_len, flags = 0;
786  tcp_header_t *th, *pkt_th;
787  u32 seq, ack;
788  ip4_header_t *ih4, *pkt_ih4;
789  ip6_header_t *ih6, *pkt_ih6;
790  fib_protocol_t fib_proto;
791 
792  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
793  return;
794 
795  b = vlib_get_buffer (vm, bi);
796  sw_if_index = vnet_buffer (pkt)->sw_if_index[VLIB_RX];
797  fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
798  fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
799  tcp_init_buffer (vm, b);
800 
801  /* Make and write options */
802  tcp_hdr_len = sizeof (tcp_header_t);
803 
804  if (is_ip4)
805  {
806  pkt_ih4 = vlib_buffer_get_current (pkt);
807  pkt_th = ip4_next_header (pkt_ih4);
808  }
809  else
810  {
811  pkt_ih6 = vlib_buffer_get_current (pkt);
812  pkt_th = ip6_next_header (pkt_ih6);
813  }
814 
815  if (tcp_ack (pkt_th))
816  {
817  flags = TCP_FLAG_RST;
818  seq = pkt_th->ack_number;
819  ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
820  }
821  else
822  {
823  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
824  seq = 0;
825  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
826  }
827 
828  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
829  seq, ack, tcp_hdr_len, flags, 0);
830 
831  /* Swap src and dst ip */
832  if (is_ip4)
833  {
834  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
835  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
836  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
837  tcp_csum_offload (tc));
838  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
839  }
840  else
841  {
842  int bogus = ~0;
843  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
844  0x60);
845  ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
846  &pkt_ih6->src_address,
847  IP_PROTOCOL_TCP,
848  tc->ipv6_flow_label);
849  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
850  ASSERT (!bogus);
851  }
852 
853  tcp_enqueue_to_ip_lookup_now (wrk, b, bi, is_ip4, fib_index);
854  TCP_EVT (TCP_EVT_RST_SENT, tc);
855  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
856  TCP_ERROR_RST_SENT, 1);
857 }
858 
859 /**
860  * Build and set reset packet for connection
861  */
862 void
864 {
865  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
866  vlib_main_t *vm = wrk->vm;
867  vlib_buffer_t *b;
868  u32 bi;
869  tcp_header_t *th;
870  u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
871  u8 flags;
872 
873  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
874  return;
875  b = vlib_get_buffer (vm, bi);
876  tcp_init_buffer (vm, b);
877 
878  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
879  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
880  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
881  flags = TCP_FLAG_RST;
882  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
883  tc->rcv_nxt, tcp_hdr_opts_len, flags,
884  advertise_wnd);
885  opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
886  th->checksum = tcp_compute_checksum (tc, b);
887  ASSERT (opts_write_len == tc->snd_opts_len);
888  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
889  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
890  TCP_EVT (TCP_EVT_RST_SENT, tc);
891  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
892  TCP_ERROR_RST_SENT, 1);
893 }
894 
895 static void
897  vlib_buffer_t * b)
898 {
899  if (tc->c_is_ip4)
900  {
901  vlib_buffer_push_ip4 (wrk->vm, b, &tc->c_lcl_ip4, &tc->c_rmt_ip4,
902  IP_PROTOCOL_TCP, tcp_csum_offload (tc));
903  }
904  else
905  {
906  vlib_buffer_push_ip6_custom (wrk->vm, b, &tc->c_lcl_ip6, &tc->c_rmt_ip6,
907  IP_PROTOCOL_TCP, tc->ipv6_flow_label);
908  }
909 }
910 
911 /**
912  * Send SYN
913  *
914  * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
915  * The packet is not forwarded through tcpx_output to avoid doing lookups
916  * in the half_open pool.
917  */
918 void
920 {
921  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
922  vlib_main_t *vm = wrk->vm;
923  vlib_buffer_t *b;
924  u32 bi;
925 
926  /*
927  * Setup retransmit and establish timers before requesting buffer
928  * such that we can return if we've ran out.
929  */
930  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
931  tc->rto * TCP_TO_TIMER_TICK);
932 
933  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
934  {
935  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, 1);
936  return;
937  }
938 
939  b = vlib_get_buffer (vm, bi);
940  tcp_init_buffer (vm, b);
941  tcp_make_syn (tc, b);
942 
943  /* Measure RTT with this */
944  tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
945  tc->rtt_seq = tc->snd_nxt;
946  tc->rto_boff = 0;
947 
948  tcp_push_ip_hdr (wrk, tc, b);
949  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
950  TCP_EVT (TCP_EVT_SYN_SENT, tc);
951 }
952 
953 void
955 {
956  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
957  vlib_main_t *vm = wrk->vm;
958  vlib_buffer_t *b;
959  u32 bi;
960 
962 
963  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
964  {
965  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
966  return;
967  }
968 
969  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
970  b = vlib_get_buffer (vm, bi);
971  tcp_init_buffer (vm, b);
972  tcp_make_synack (tc, b);
973  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
974  TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
975 }
976 
977 /**
978  * Flush ip lookup tx frames populated by timer pops
979  */
980 static void
982 {
983  if (wrk->ip_lookup_tx_frames[!is_ip4])
984  {
985  u32 next_index;
986  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
987  vlib_put_frame_to_node (wrk->vm, next_index,
988  wrk->ip_lookup_tx_frames[!is_ip4]);
989  wrk->ip_lookup_tx_frames[!is_ip4] = 0;
990  }
991 }
992 
993 /**
994  * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
995  */
996 void
998 {
1001 }
1002 
1003 /**
1004  * Send FIN
1005  */
1006 void
1008 {
1009  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1010  vlib_main_t *vm = wrk->vm;
1011  vlib_buffer_t *b;
1012  u32 bi;
1013  u8 fin_snt = 0;
1014 
1015  fin_snt = tc->flags & TCP_CONN_FINSNT;
1016  if (fin_snt)
1017  tc->snd_nxt -= 1;
1018 
1019  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1020  {
1021  /* Out of buffers so program fin retransmit ASAP */
1022  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1023  if (fin_snt)
1024  tc->snd_nxt += 1;
1025  else
1026  /* Make sure retransmit retries a fin not data */
1027  tc->flags |= TCP_CONN_FINSNT;
1028  return;
1029  }
1030 
1031  /* If we have non-dupacks programmed, no need to send them */
1032  if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
1033  tc->flags &= ~TCP_CONN_SNDACK;
1034 
1036  b = vlib_get_buffer (vm, bi);
1037  tcp_init_buffer (vm, b);
1038  tcp_make_fin (tc, b);
1039  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1040  TCP_EVT (TCP_EVT_FIN_SENT, tc);
1041  /* Account for the FIN */
1042  tc->snd_nxt += 1;
1043  if (!fin_snt)
1044  {
1045  tc->flags |= TCP_CONN_FINSNT;
1046  tc->flags &= ~TCP_CONN_FINPNDG;
1047  tc->snd_una_max = seq_max (tc->snd_una_max, tc->snd_nxt);
1048  }
1049 }
1050 
1051 /**
1052  * Push TCP header and update connection variables. Should only be called
1053  * for segments with data, not for 'control' packets.
1054  */
1055 always_inline void
1057  u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
1058 {
1059  u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
1060  u32 advertise_wnd, data_len;
1061  tcp_main_t *tm = &tcp_main;
1062  tcp_header_t *th;
1063 
1064  data_len = b->current_length;
1065  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
1067 
1068  vnet_buffer (b)->tcp.flags = 0;
1069  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
1070 
1071  if (compute_opts)
1072  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1073 
1074  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
1075 
1076  if (maybe_burst)
1077  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
1078  else
1079  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
1080 
1081  if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
1082  {
1083  if (seq_geq (tc->psh_seq, snd_nxt)
1084  && seq_lt (tc->psh_seq, snd_nxt + data_len))
1085  flags |= TCP_FLAG_PSH;
1086  }
1087  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
1088  tc->rcv_nxt, tcp_hdr_opts_len, flags,
1089  advertise_wnd);
1090 
1091  if (maybe_burst)
1092  {
1093  clib_memcpy_fast ((u8 *) (th + 1),
1094  tm->wrk_ctx[tc->c_thread_index].cached_opts,
1095  tc->snd_opts_len);
1096  }
1097  else
1098  {
1099  u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
1100  ASSERT (len == tc->snd_opts_len);
1101  }
1102 
1103  /*
1104  * Update connection variables
1105  */
1106 
1107  if (update_snd_nxt)
1108  tc->snd_nxt += data_len;
1109  tc->rcv_las = tc->rcv_nxt;
1110 
1111  tc->bytes_out += data_len;
1112  tc->data_segs_out += 1;
1113 
1114  th->checksum = tcp_compute_checksum (tc, b);
1115 
1116  TCP_EVT (TCP_EVT_PKTIZE, tc);
1117 }
1118 
1121 {
1122  u32 data_len = b->current_length;
1123  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
1125  return data_len;
1126 }
1127 
1128 u32
1130 {
1131  tcp_connection_t *tc = (tcp_connection_t *) tconn;
1132 
1133  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1134  tcp_bt_track_tx (tc, tcp_buffer_len (b));
1135 
1136  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
1137  /* update_snd_nxt */ 1);
1138 
1139  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1140  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
1141  /* If not tracking an ACK, start tracking */
1142  if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
1143  {
1144  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
1145  tc->rtt_seq = tc->snd_nxt;
1146  }
1147  if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
1148  {
1150  tc->rto_boff = 0;
1151  }
1152  tcp_trajectory_add_start (b, 3);
1153  return 0;
1154 }
1155 
1156 void
1158 {
1159  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1160  vlib_main_t *vm = wrk->vm;
1161  vlib_buffer_t *b;
1162  u32 bi;
1163 
1164  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1165  {
1166  tcp_update_rcv_wnd (tc);
1167  return;
1168  }
1169  b = vlib_get_buffer (vm, bi);
1170  tcp_init_buffer (vm, b);
1171  tcp_make_ack (tc, b);
1172  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1173 }
1174 
1175 void
1177 {
1178  if (!(tc->flags & TCP_CONN_SNDACK))
1179  {
1180  session_add_self_custom_tx_evt (&tc->connection, 1);
1181  tc->flags |= TCP_CONN_SNDACK;
1182  }
1183 }
1184 
1185 void
1187 {
1188  if (!(tc->flags & TCP_CONN_SNDACK))
1189  {
1190  session_add_self_custom_tx_evt (&tc->connection, 1);
1191  tc->flags |= TCP_CONN_SNDACK;
1192  }
1193  if (tc->pending_dupacks < 255)
1194  tc->pending_dupacks += 1;
1195 }
1196 
1197 void
1199 {
1200  if (!(tc->flags & TCP_CONN_RXT_PENDING))
1201  {
1202  session_add_self_custom_tx_evt (&tc->connection, 0);
1203  tc->flags |= TCP_CONN_RXT_PENDING;
1204  }
1205 }
1206 
1207 /**
1208  * Delayed ack timer handler
1209  *
1210  * Sends delayed ACK when timer expires
1211  */
1212 void
1213 tcp_timer_delack_handler (u32 index, u32 thread_index)
1214 {
1215  tcp_connection_t *tc;
1216 
1217  tc = tcp_connection_get (index, thread_index);
1218  tcp_send_ack (tc);
1219 }
1220 
1221 /**
1222  * Send window update ack
1223  *
1224  * Ensures that it will be sent only once, after a zero rwnd has been
1225  * advertised in a previous ack, and only if rwnd has grown beyond a
1226  * configurable value.
1227  */
1228 void
1230 {
1231  if (tcp_zero_rwnd_sent (tc))
1232  {
1233  tcp_update_rcv_wnd (tc);
1234  if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1235  {
1237  tcp_program_ack (tc);
1238  }
1239  }
1240 }
1241 
1242 /**
1243  * Allocate a new buffer and build a new tcp segment
1244  *
1245  * @param wrk tcp worker
1246  * @param tc connection for which the segment will be allocated
1247  * @param offset offset of the first byte in the tx fifo
1248  * @param max_deq_byte segment size
1249  * @param[out] b pointer to buffer allocated
1250  *
1251  * @return the number of bytes in the segment or 0 if buffer cannot be
1252  * allocated or no data available
1253  */
1254 static int
1256  u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
1257 {
1258  u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1259  vlib_main_t *vm = wrk->vm;
1260  u32 bi, seg_size;
1261  int n_bytes = 0;
1262  u8 *data;
1263 
1264  seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
1265 
1266  /*
1267  * Prepare options
1268  */
1269  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1270 
1271  /*
1272  * Allocate and fill in buffer(s)
1273  */
1274 
1275  /* Easy case, buffer size greater than mss */
1276  if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
1277  {
1278  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1279  return 0;
1280  *b = vlib_get_buffer (vm, bi);
1281  data = tcp_init_buffer (vm, *b);
1282  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1283  max_deq_bytes);
1284  ASSERT (n_bytes == max_deq_bytes);
1285  b[0]->current_length = n_bytes;
1286  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1287  /* burst */ 0, /* update_snd_nxt */ 0);
1288  }
1289  /* Split mss into multiple buffers */
1290  else
1291  {
1292  u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1293  u16 n_peeked, len_to_deq;
1294  vlib_buffer_t *chain_b, *prev_b;
1295  int i;
1296 
1297  /* Make sure we have enough buffers */
1298  n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
1299  vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1301  n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1302  if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
1303  {
1304  if (n_bufs)
1305  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1306  return 0;
1307  }
1308 
1309  *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
1310  data = tcp_init_buffer (vm, *b);
1311  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1312  bytes_per_buffer -
1313  TRANSPORT_MAX_HDRS_LEN);
1314  b[0]->current_length = n_bytes;
1315  b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1317  max_deq_bytes -= n_bytes;
1318 
1319  chain_b = *b;
1320  for (i = 1; i < n_bufs_per_seg; i++)
1321  {
1322  prev_b = chain_b;
1323  len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
1324  chain_bi = wrk->tx_buffers[--n_bufs];
1325  chain_b = vlib_get_buffer (vm, chain_bi);
1326  chain_b->current_data = 0;
1327  data = vlib_buffer_get_current (chain_b);
1328  n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1329  offset + n_bytes,
1330  len_to_deq);
1331  ASSERT (n_peeked == len_to_deq);
1332  n_bytes += n_peeked;
1333  chain_b->current_length = n_peeked;
1334  chain_b->next_buffer = 0;
1335 
1336  /* update previous buffer */
1337  prev_b->next_buffer = chain_bi;
1338  prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1339 
1340  max_deq_bytes -= n_peeked;
1341  b[0]->total_length_not_including_first_buffer += n_peeked;
1342  }
1343 
1344  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1345  /* burst */ 0, /* update_snd_nxt */ 0);
1346 
1347  if (PREDICT_FALSE (n_bufs))
1348  {
1349  clib_warning ("not all buffers consumed");
1350  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1351  }
1352  }
1353 
1354  ASSERT (n_bytes > 0);
1355  ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1356 
1357  return n_bytes;
1358 }
1359 
1360 /**
1361  * Build a retransmit segment
1362  *
1363  * @return the number of bytes in the segment or 0 if there's nothing to
1364  * retransmit
1365  */
1366 static u32
1368  tcp_connection_t * tc, u32 offset,
1369  u32 max_deq_bytes, vlib_buffer_t ** b)
1370 {
1371  u32 start, available_bytes;
1372  int n_bytes = 0;
1373 
1374  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1375  ASSERT (max_deq_bytes != 0);
1376 
1377  /*
1378  * Make sure we can retransmit something
1379  */
1380  available_bytes = transport_max_tx_dequeue (&tc->connection);
1381  ASSERT (available_bytes >= offset);
1382  available_bytes -= offset;
1383  if (!available_bytes)
1384  return 0;
1385 
1386  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1387  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1388 
1389  start = tc->snd_una + offset;
1390  ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1391 
1392  n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1393  if (!n_bytes)
1394  return 0;
1395 
1396  tc->snd_rxt_bytes += n_bytes;
1397 
1398  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1399  tcp_bt_track_rxt (tc, start, start + n_bytes);
1400 
1401  tc->bytes_retrans += n_bytes;
1402  tc->segs_retrans += 1;
1403  TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1404 
1405  return n_bytes;
1406 }
1407 
1408 static void
1410 {
1411  sack_scoreboard_t *sb = &tc->sack_sb;
1412  sack_scoreboard_hole_t *hole;
1413 
1414  hole = scoreboard_first_hole (sb);
1415  if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1416  return;
1417 
1418  scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1419 }
1420 
1421 /**
1422  * Reset congestion control, switch cwnd to loss window and try again.
1423  */
1424 static void
1426 {
1427  TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1428 
1429  tc->prev_ssthresh = tc->ssthresh;
1430  tc->prev_cwnd = tc->cwnd;
1431 
1432  /* If we entrered loss without fast recovery, notify cc algo of the
1433  * congestion event such that it can update ssthresh and its state */
1434  if (!tcp_in_fastrecovery (tc))
1435  tcp_cc_congestion (tc);
1436 
1437  /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
1438  tcp_cc_loss (tc);
1439 
1440  tc->rtt_ts = 0;
1441  tc->cwnd_acc_bytes = 0;
1442  tc->tr_occurences += 1;
1443  tcp_recovery_on (tc);
1444 }
1445 
1446 void
1447 tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index)
1448 {
1449  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1450  vlib_main_t *vm = wrk->vm;
1451  tcp_connection_t *tc;
1452  vlib_buffer_t *b = 0;
1453  u32 bi, n_bytes;
1454 
1455  tc = tcp_connection_get (tc_index, thread_index);
1456 
1457  /* Note: the connection may have been closed and pool_put */
1458  if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT))
1459  return;
1460 
1461  /* Wait-close and retransmit could pop at the same time */
1462  if (tc->state == TCP_STATE_CLOSED)
1463  return;
1464 
1465  if (tc->state >= TCP_STATE_ESTABLISHED)
1466  {
1467  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1468 
1469  /* Lost FIN, retransmit and return */
1470  if (tc->flags & TCP_CONN_FINSNT)
1471  {
1472  tcp_send_fin (tc);
1473  tc->rto_boff += 1;
1474  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1475  return;
1476  }
1477 
1478  /* Shouldn't be here. This condition is tricky because it has to take
1479  * into account boff > 0 due to persist timeout. */
1480  if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_nxt)
1481  || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion)
1482  && !tcp_flight_size (tc)))
1483  {
1484  ASSERT (!tcp_in_recovery (tc));
1485  tc->rto_boff = 0;
1486  return;
1487  }
1488 
1489  /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1490  * to persist timer timeout */
1491  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1492  {
1493  tc->rto_boff = 0;
1494  tcp_update_rto (tc);
1495  }
1496 
1497  /* Peer is dead or network connectivity is lost. Close connection.
1498  * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1499  * a min rto of 0.2s we need to retry about 8 times. */
1500  if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1501  {
1502  tcp_send_reset (tc);
1503  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1504  session_transport_closing_notify (&tc->connection);
1505  session_transport_closed_notify (&tc->connection);
1507  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
1508  return;
1509  }
1510 
1511  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1513 
1514  /* Update send congestion to make sure that rxt has data to send */
1515  tc->snd_congestion = tc->snd_nxt;
1516 
1517  /* Send the first unacked segment. If we're short on buffers, return
1518  * as soon as possible */
1519  n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1520  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
1521  if (!n_bytes)
1522  {
1523  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1524  return;
1525  }
1526 
1527  bi = vlib_get_buffer_index (vm, b);
1528  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1529 
1530  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1532 
1533  tc->rto_boff += 1;
1534  if (tc->rto_boff == 1)
1535  {
1537  /* Record timestamp. Eifel detection algorithm RFC3522 */
1538  tc->snd_rxt_ts = tcp_tstamp (tc);
1539  }
1540 
1541  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1542  scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
1543 
1545  }
1546  /* Retransmit SYN-ACK */
1547  else if (tc->state == TCP_STATE_SYN_RCVD)
1548  {
1549  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1550 
1551  tc->rtt_ts = 0;
1552 
1553  /* Passive open establish timeout */
1554  if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1555  {
1556  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1558  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
1559  return;
1560  }
1561 
1562  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1563  {
1564  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
1565  return;
1566  }
1567 
1568  tc->rto_boff += 1;
1569  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1570  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1571 
1573 
1574  b = vlib_get_buffer (vm, bi);
1575  tcp_init_buffer (vm, b);
1576  tcp_make_synack (tc, b);
1577  TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1578 
1579  /* Retransmit timer already updated, just enqueue to output */
1580  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1581  }
1582  else
1583  {
1584  ASSERT (tc->state == TCP_STATE_CLOSED);
1585  return;
1586  }
1587 }
1588 
1589 /**
1590  * SYN retransmit timer handler. Active open only.
1591  */
1592 void
1593 tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index)
1594 {
1595  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1596  vlib_main_t *vm = wrk->vm;
1597  tcp_connection_t *tc;
1598  vlib_buffer_t *b = 0;
1599  u32 bi;
1600 
1601  tc = tcp_half_open_connection_get (tc_index);
1602 
1603  /* Note: the connection may have transitioned to ESTABLISHED... */
1604  if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT))
1605  return;
1606 
1607  /* Half-open connection actually moved to established but we were
1608  * waiting for syn retransmit to pop to call cleanup from the right
1609  * thread. */
1610  if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1611  {
1613  TCP_DBG ("could not remove half-open connection");
1614  return;
1615  }
1616 
1617  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1618  tc->rtt_ts = 0;
1619 
1620  /* Active open establish timeout */
1621  if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1622  {
1623  session_stream_connect_notify (&tc->connection, 1 /* fail */ );
1625  return;
1626  }
1627 
1628  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1629  {
1630  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, 1);
1631  return;
1632  }
1633 
1634  /* Try without increasing RTO a number of times. If this fails,
1635  * start growing RTO exponentially */
1636  tc->rto_boff += 1;
1637  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1638  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1639 
1640  b = vlib_get_buffer (vm, bi);
1641  tcp_init_buffer (vm, b);
1642  tcp_make_syn (tc, b);
1643 
1644  TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1645 
1646  /* This goes straight to ipx_lookup */
1647  tcp_push_ip_hdr (wrk, tc, b);
1648  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
1649 
1650  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
1651  tc->rto * TCP_TO_TIMER_TICK);
1652 }
1653 
1654 /**
1655  * Got 0 snd_wnd from peer, try to do something about it.
1656  *
1657  */
1658 void
1659 tcp_timer_persist_handler (u32 index, u32 thread_index)
1660 {
1661  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1662  u32 bi, max_snd_bytes, available_bytes, offset;
1663  tcp_main_t *tm = vnet_get_tcp_main ();
1664  vlib_main_t *vm = wrk->vm;
1665  tcp_connection_t *tc;
1666  vlib_buffer_t *b;
1667  int n_bytes = 0;
1668  u8 *data;
1669 
1670  tc = tcp_connection_get_if_valid (index, thread_index);
1671  if (!tc)
1672  return;
1673 
1674  /* Problem already solved or worse */
1675  if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1676  || (tc->flags & TCP_CONN_FINSNT))
1677  return;
1678 
1679  available_bytes = transport_max_tx_dequeue (&tc->connection);
1680  offset = tc->snd_nxt - tc->snd_una;
1681 
1682  /* Reprogram persist if no new bytes available to send. We may have data
1683  * next time */
1684  if (!available_bytes)
1685  {
1686  tcp_persist_timer_set (tc);
1687  return;
1688  }
1689 
1690  if (available_bytes <= offset)
1691  return;
1692 
1693  /* Increment RTO backoff */
1694  tc->rto_boff += 1;
1695  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1696 
1697  /*
1698  * Try to force the first unsent segment (or buffer)
1699  */
1700  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1701  {
1702  tcp_persist_timer_set (tc);
1703  return;
1704  }
1705  b = vlib_get_buffer (vm, bi);
1706  data = tcp_init_buffer (vm, b);
1707 
1708  tcp_validate_txf_size (tc, offset);
1709  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1710  max_snd_bytes = clib_min (tc->snd_mss,
1711  tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1712  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1713  max_snd_bytes);
1714  b->current_length = n_bytes;
1715  ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
1716  || tc->snd_nxt == tc->snd_una_max
1717  || tc->rto_boff > 1));
1718 
1719  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1720  {
1722  tcp_bt_track_tx (tc, n_bytes);
1723  }
1724 
1725  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1726  /* burst */ 0, /* update_snd_nxt */ 1);
1727  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1728  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
1729  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1730 
1731  /* Just sent new data, enable retransmit */
1733 }
1734 
1735 /**
1736  * Retransmit first unacked segment
1737  */
1738 int
1740 {
1741  vlib_main_t *vm = wrk->vm;
1742  vlib_buffer_t *b;
1743  u32 bi, n_bytes;
1744 
1745  TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1746 
1747  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
1748  if (!n_bytes)
1749  return -1;
1750 
1751  bi = vlib_get_buffer_index (vm, b);
1752  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1753 
1754  return 0;
1755 }
1756 
1757 static int
1759  u32 burst_size)
1760 {
1761  u32 offset, n_segs = 0, n_written, bi, available_wnd;
1762  vlib_main_t *vm = wrk->vm;
1763  vlib_buffer_t *b = 0;
1764 
1765  offset = tc->snd_nxt - tc->snd_una;
1766  available_wnd = tc->snd_wnd - offset;
1767  burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1768 
1769  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1771 
1772  while (n_segs < burst_size)
1773  {
1774  n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1775  if (!n_written)
1776  goto done;
1777 
1778  bi = vlib_get_buffer_index (vm, b);
1779  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1780  offset += n_written;
1781  n_segs += 1;
1782 
1783  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1784  tcp_bt_track_tx (tc, n_written);
1785 
1786  tc->snd_nxt += n_written;
1787  tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
1788  }
1789 
1790 done:
1791  return n_segs;
1792 }
1793 
1794 /**
1795  * Estimate send space using proportional rate reduction (RFC6937)
1796  */
1797 int
1799 {
1800  u32 pipe, prr_out;
1801  int space;
1802 
1803  pipe = tcp_flight_size (tc);
1804  prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1805 
1806  if (pipe > tc->ssthresh)
1807  {
1808  space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1809  - prr_out;
1810  }
1811  else
1812  {
1813  int limit;
1814  limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1815  space = clib_min (tc->ssthresh - pipe, limit);
1816  }
1817  space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1818  return space;
1819 }
1820 
1821 static inline u8
1823  sack_scoreboard_t * sb)
1824 {
1825  u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1826  f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1827 
1828  if (tcp_fastrecovery_first (tc))
1829  return 1;
1830 
1831  return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1832 }
1833 
1834 static inline u8
1836 {
1837  return (transport_max_tx_dequeue (&tc->connection)
1838  - (tc->snd_nxt - tc->snd_una));
1839 }
1840 
1841 #define scoreboard_rescue_rxt_valid(_sb, _tc) \
1842  (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1843  && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1844 
1845 /**
1846  * Do retransmit with SACKs
1847  */
1848 static int
1850  u32 burst_size)
1851 {
1852  u32 n_written = 0, offset, max_bytes, n_segs = 0;
1853  u8 snd_limited = 0, can_rescue = 0;
1854  u32 bi, max_deq, burst_bytes;
1855  sack_scoreboard_hole_t *hole;
1856  vlib_main_t *vm = wrk->vm;
1857  vlib_buffer_t *b = 0;
1858  sack_scoreboard_t *sb;
1859  int snd_space;
1860 
1862 
1863  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1864  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1865  if (!burst_size)
1866  {
1868  return 0;
1869  }
1870 
1871  if (tcp_in_recovery (tc))
1872  snd_space = tcp_available_cc_snd_space (tc);
1873  else
1874  snd_space = tcp_fastrecovery_prr_snd_space (tc);
1875 
1876  if (snd_space < tc->snd_mss)
1877  goto done;
1878 
1879  sb = &tc->sack_sb;
1880 
1881  /* Check if snd_una is a lost retransmit */
1882  if (pool_elts (sb->holes)
1883  && seq_gt (sb->high_sacked, tc->snd_congestion)
1884  && tc->rxt_head != tc->snd_una
1886  {
1887  max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1888  n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
1889  if (!n_written)
1890  {
1892  goto done;
1893  }
1894  bi = vlib_get_buffer_index (vm, b);
1895  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1896  n_segs = 1;
1897 
1898  tc->rxt_head = tc->snd_una;
1899  tc->rxt_delivered += n_written;
1900  tc->prr_delivered += n_written;
1901  ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1902  }
1903 
1905 
1906  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1907  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1908 
1909  max_deq = transport_max_tx_dequeue (&tc->connection);
1910  max_deq -= tc->snd_nxt - tc->snd_una;
1911 
1912  while (snd_space > 0 && n_segs < burst_size)
1913  {
1914  hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
1915  &snd_limited);
1916  if (!hole)
1917  {
1918  /* We are out of lost holes to retransmit so send some new data. */
1919  if (max_deq > tc->snd_mss)
1920  {
1921  u32 n_segs_new;
1922  int av_wnd;
1923 
1924  /* Make sure we don't exceed available window and leave space
1925  * for one more packet, to avoid zero window acks */
1926  av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1927  av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
1928  snd_space = clib_min (snd_space, av_wnd);
1929  snd_space = clib_min (max_deq, snd_space);
1930  burst_size = clib_min (burst_size - n_segs,
1931  snd_space / tc->snd_mss);
1932  burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1933  n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1934  if (max_deq > n_segs_new * tc->snd_mss)
1936 
1937  n_segs += n_segs_new;
1938  goto done;
1939  }
1940 
1941  if (tcp_in_recovery (tc) || !can_rescue
1942  || scoreboard_rescue_rxt_valid (sb, tc))
1943  break;
1944 
1945  /* If rescue rxt undefined or less than snd_una then one segment of
1946  * up to SMSS octets that MUST include the highest outstanding
1947  * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1948  * RecoveryPoint. HighRxt MUST NOT be updated.
1949  */
1950  hole = scoreboard_last_hole (sb);
1951  max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
1952  max_bytes = clib_min (max_bytes, snd_space);
1953  offset = hole->end - tc->snd_una - max_bytes;
1954  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1955  max_bytes, &b);
1956  if (!n_written)
1957  goto done;
1958 
1959  sb->rescue_rxt = tc->snd_congestion;
1960  bi = vlib_get_buffer_index (vm, b);
1961  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1962  n_segs += 1;
1963  break;
1964  }
1965 
1966  max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1967  max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1968  if (max_bytes == 0)
1969  break;
1970 
1971  offset = sb->high_rxt - tc->snd_una;
1972  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1973  &b);
1974  ASSERT (n_written <= snd_space);
1975 
1976  /* Nothing left to retransmit */
1977  if (n_written == 0)
1978  break;
1979 
1980  bi = vlib_get_buffer_index (vm, b);
1981  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1982 
1983  sb->high_rxt += n_written;
1984  ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
1985 
1986  snd_space -= n_written;
1987  n_segs += 1;
1988  }
1989 
1990  if (hole)
1992 
1993 done:
1994 
1995  transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
1996  return n_segs;
1997 }
1998 
1999 /**
2000  * Fast retransmit without SACK info
2001  */
2002 static int
2004  u32 burst_size)
2005 {
2006  u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
2007  u32 burst_bytes, sent_bytes;
2008  vlib_main_t *vm = wrk->vm;
2009  int snd_space, n_segs = 0;
2010  u8 cc_limited = 0;
2011  vlib_buffer_t *b;
2012 
2014  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
2015 
2016  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
2017  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
2018  if (!burst_size)
2019  {
2021  return 0;
2022  }
2023 
2024  snd_space = tcp_available_cc_snd_space (tc);
2025  cc_limited = snd_space < burst_bytes;
2026 
2027  if (!tcp_fastrecovery_first (tc))
2028  goto send_unsent;
2029 
2030  /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
2031  * segment. */
2032  while (snd_space > 0 && n_segs < burst_size)
2033  {
2034  max_bytes = clib_min (tc->snd_mss,
2035  tc->snd_congestion - tc->snd_una - offset);
2036  if (!max_bytes)
2037  break;
2038  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
2039  &b);
2040 
2041  /* Nothing left to retransmit */
2042  if (n_written == 0)
2043  break;
2044 
2045  bi = vlib_get_buffer_index (vm, b);
2046  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
2047  snd_space -= n_written;
2048  offset += n_written;
2049  n_segs += 1;
2050  }
2051 
2052  if (n_segs == burst_size)
2053  goto done;
2054 
2055 send_unsent:
2056 
2057  /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
2058  if (snd_space < tc->snd_mss || tc->snd_mss == 0)
2059  goto done;
2060 
2061  max_deq = transport_max_tx_dequeue (&tc->connection);
2062  max_deq -= tc->snd_nxt - tc->snd_una;
2063  if (max_deq)
2064  {
2065  snd_space = clib_min (max_deq, snd_space);
2066  burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
2067  n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
2068  if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
2070  n_segs += n_segs_now;
2071  }
2072 
2073 done:
2075 
2076  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
2077  sent_bytes = cc_limited ? burst_bytes : sent_bytes;
2078  transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
2079 
2080  return n_segs;
2081 }
2082 
2083 static int
2084 tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
2085 {
2086  int j, n_acks;
2087 
2088  if (!tc->pending_dupacks)
2089  {
2090  if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
2091  || tc->state != TCP_STATE_ESTABLISHED)
2092  {
2093  tcp_send_ack (tc);
2094  return 1;
2095  }
2096  return 0;
2097  }
2098 
2099  /* If we're supposed to send dupacks but have no ooo data
2100  * send only one ack */
2101  if (!vec_len (tc->snd_sacks))
2102  {
2103  tcp_send_ack (tc);
2104  tc->pending_dupacks = 0;
2105  return 1;
2106  }
2107 
2108  /* Start with first sack block */
2109  tc->snd_sack_pos = 0;
2110 
2111  /* Generate enough dupacks to cover all sack blocks. Do not generate
2112  * more sacks than the number of packets received. But do generate at
2113  * least 3, i.e., the number needed to signal congestion, if needed. */
2114  n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
2115  n_acks = clib_min (n_acks, tc->pending_dupacks);
2116  n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
2117  for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
2118  tcp_send_ack (tc);
2119 
2120  if (n_acks < max_burst_size)
2121  {
2122  tc->pending_dupacks = 0;
2123  tc->snd_sack_pos = 0;
2124  tc->dupacks_out += n_acks;
2125  return n_acks;
2126  }
2127  else
2128  {
2129  TCP_DBG ("constrained by burst size");
2130  tc->pending_dupacks = n_acks - max_burst_size;
2131  tc->dupacks_out += max_burst_size;
2132  tcp_program_dupack (tc);
2133  return max_burst_size;
2134  }
2135 }
2136 
2137 static int
2139 {
2140  tcp_worker_ctx_t *wrk;
2141  u32 n_segs;
2142 
2143  if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
2144  return 0;
2145 
2146  wrk = tcp_get_worker (tc->c_thread_index);
2147 
2148  if (tcp_opts_sack_permitted (&tc->rcv_opts))
2149  n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
2150  else
2151  n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
2152 
2153  return n_segs;
2154 }
2155 
2156 int
2157 tcp_session_custom_tx (void *conn, u32 max_burst_size)
2158 {
2159  tcp_connection_t *tc = (tcp_connection_t *) conn;
2160  u32 n_segs = 0;
2161 
2162  if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
2163  {
2164  tc->flags &= ~TCP_CONN_RXT_PENDING;
2165  n_segs = tcp_do_retransmit (tc, max_burst_size);
2166  max_burst_size -= n_segs;
2167  }
2168 
2169  if (!(tc->flags & TCP_CONN_SNDACK))
2170  return n_segs;
2171 
2172  tc->flags &= ~TCP_CONN_SNDACK;
2173 
2174  /* We have retransmitted packets and no dupack */
2175  if (n_segs && !tc->pending_dupacks)
2176  return n_segs;
2177 
2178  if (!max_burst_size)
2179  {
2180  tcp_program_ack (tc);
2181  return max_burst_size;
2182  }
2183 
2184  n_segs += tcp_send_acks (tc, max_burst_size);
2185 
2186  return n_segs;
2187 }
2188 #endif /* CLIB_MARCH_VARIANT */
2189 
2190 static void
2192  u16 * next0, u32 * error0)
2193 {
2194  ip_adjacency_t *adj;
2195  adj_index_t ai;
2196 
2197  /* Not thread safe but as long as the connection exists the adj should
2198  * not be removed */
2199  ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2200  tc0->sw_if_index);
2201  if (ai == ADJ_INDEX_INVALID)
2202  {
2203  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2204  *next0 = TCP_OUTPUT_NEXT_DROP;
2205  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2206  return;
2207  }
2208 
2209  adj = adj_get (ai);
2211  *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2212  else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2213  *next0 = TCP_OUTPUT_NEXT_IP_ARP;
2214  else
2215  {
2216  *next0 = TCP_OUTPUT_NEXT_DROP;
2217  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2218  }
2219  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
2220 }
2221 
2222 static void
2224  u32 * to_next, u32 n_bufs)
2225 {
2226  tcp_connection_t *tc;
2227  tcp_tx_trace_t *t;
2228  vlib_buffer_t *b;
2229  tcp_header_t *th;
2230  int i;
2231 
2232  for (i = 0; i < n_bufs; i++)
2233  {
2234  b = vlib_get_buffer (vm, to_next[i]);
2235  if (!(b->flags & VLIB_BUFFER_IS_TRACED))
2236  continue;
2237  th = vlib_buffer_get_current (b);
2238  tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2239  vm->thread_index);
2240  t = vlib_add_trace (vm, node, b, sizeof (*t));
2241  clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2242  clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
2243  }
2244 }
2245 
2246 always_inline void
2248  tcp_connection_t * tc0, u8 is_ip4)
2249 {
2250  TCP_EVT (TCP_EVT_OUTPUT, tc0,
2252  b0->current_length);
2253 
2254  if (is_ip4)
2255  vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2256  IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
2257  else
2258  vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2259  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2260 }
2261 
2262 always_inline void
2264 {
2265  if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
2266  return;
2267 
2268  u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
2269 
2270  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2272 
2273  if (PREDICT_TRUE (data_len <= tc->snd_mss))
2274  return;
2275  else
2276  {
2277  ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2278  ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2279  b->flags |= VNET_BUFFER_F_GSO;
2280  vnet_buffer2 (b)->gso_l4_hdr_sz =
2281  sizeof (tcp_header_t) + tc->snd_opts_len;
2282  vnet_buffer2 (b)->gso_size = tc->snd_mss;
2283  }
2284 }
2285 
2286 always_inline void
2288  vlib_node_runtime_t * error_node, u16 * next0,
2289  u8 is_ip4)
2290 {
2291  /* If next_index is not drop use it */
2292  if (tc0->next_node_index)
2293  {
2294  *next0 = tc0->next_node_index;
2295  vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2296  }
2297  else
2298  {
2299  *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2300  }
2301 
2302  vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2303  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
2304 
2305  if (!is_ip4)
2306  {
2307  u32 error0 = 0;
2308 
2309  if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
2310  tcp_output_handle_link_local (tc0, b0, next0, &error0);
2311 
2312  if (PREDICT_FALSE (error0))
2313  {
2314  b0->error = error_node->errors[error0];
2315  return;
2316  }
2317  }
2318 
2319  if (!TCP_ALWAYS_ACK)
2320  tcp_timer_reset (tc0, TCP_TIMER_DELACK);
2321 
2322  tc0->segs_out += 1;
2323 }
2324 
2327  vlib_frame_t * frame, int is_ip4)
2328 {
2329  u32 n_left_from, *from, thread_index = vm->thread_index;
2330  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2331  u16 nexts[VLIB_FRAME_SIZE], *next;
2332  vlib_node_runtime_t *error_node;
2333 
2334  error_node = vlib_node_get_runtime (vm, tcp_node_index (output, is_ip4));
2335 
2336  from = vlib_frame_vector_args (frame);
2337  n_left_from = frame->n_vectors;
2338  tcp_set_time_now (tcp_get_worker (thread_index));
2339 
2341  tcp46_output_trace_frame (vm, node, from, n_left_from);
2342 
2343  vlib_get_buffers (vm, from, bufs, n_left_from);
2344  b = bufs;
2345  next = nexts;
2346 
2347  while (n_left_from >= 4)
2348  {
2349  tcp_connection_t *tc0, *tc1;
2350 
2351  {
2352  vlib_prefetch_buffer_header (b[2], STORE);
2353  CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2354 
2355  vlib_prefetch_buffer_header (b[3], STORE);
2356  CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2357  }
2358 
2359  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2360  thread_index);
2361  tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2362  thread_index);
2363 
2364  if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2365  {
2366  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2367  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2368 
2369  tcp_check_if_gso (tc0, b[0]);
2370  tcp_check_if_gso (tc1, b[1]);
2371 
2372  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
2373  tcp_output_handle_packet (tc1, b[1], error_node, &next[1], is_ip4);
2374  }
2375  else
2376  {
2377  if (tc0 != 0)
2378  {
2379  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2380  tcp_check_if_gso (tc0, b[0]);
2381  tcp_output_handle_packet (tc0, b[0], error_node, &next[0],
2382  is_ip4);
2383  }
2384  else
2385  {
2386  b[0]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2387  next[0] = TCP_OUTPUT_NEXT_DROP;
2388  }
2389  if (tc1 != 0)
2390  {
2391  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2392  tcp_check_if_gso (tc1, b[1]);
2393  tcp_output_handle_packet (tc1, b[1], error_node, &next[1],
2394  is_ip4);
2395  }
2396  else
2397  {
2398  b[1]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2399  next[1] = TCP_OUTPUT_NEXT_DROP;
2400  }
2401  }
2402 
2403  b += 2;
2404  next += 2;
2405  n_left_from -= 2;
2406  }
2407  while (n_left_from > 0)
2408  {
2409  tcp_connection_t *tc0;
2410 
2411  if (n_left_from > 1)
2412  {
2413  vlib_prefetch_buffer_header (b[1], STORE);
2414  CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2415  }
2416 
2417  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2418  thread_index);
2419 
2420  if (PREDICT_TRUE (tc0 != 0))
2421  {
2422  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2423  tcp_check_if_gso (tc0, b[0]);
2424  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
2425  }
2426  else
2427  {
2428  b[0]->error = error_node->errors[TCP_ERROR_INVALID_CONNECTION];
2429  next[0] = TCP_OUTPUT_NEXT_DROP;
2430  }
2431 
2432  b += 1;
2433  next += 1;
2434  n_left_from -= 1;
2435  }
2436 
2437  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2438  vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2439  TCP_ERROR_PKTS_SENT, frame->n_vectors);
2440  return frame->n_vectors;
2441 }
2442 
2444  vlib_frame_t * from_frame)
2445 {
2446  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2447 }
2448 
2450  vlib_frame_t * from_frame)
2451 {
2452  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2453 }
2454 
2455 /* *INDENT-OFF* */
2457 {
2458  .name = "tcp4-output",
2459  /* Takes a vector of packets. */
2460  .vector_size = sizeof (u32),
2461  .n_errors = TCP_N_ERROR,
2462  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2463  .error_strings = tcp_error_strings,
2464  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2465  .next_nodes = {
2466 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2468 #undef _
2469  },
2470  .format_buffer = format_tcp_header,
2471  .format_trace = format_tcp_tx_trace,
2472 };
2473 /* *INDENT-ON* */
2474 
2475 /* *INDENT-OFF* */
2477 {
2478  .name = "tcp6-output",
2479  /* Takes a vector of packets. */
2480  .vector_size = sizeof (u32),
2481  .n_errors = TCP_N_ERROR,
2482  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2483  .error_strings = tcp_error_strings,
2484  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2485  .next_nodes = {
2486 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2488 #undef _
2489  },
2490  .format_buffer = format_tcp_header,
2491  .format_trace = format_tcp_tx_trace,
2492 };
2493 /* *INDENT-ON* */
2494 
2495 typedef enum _tcp_reset_next
2496 {
2501 
2502 #define foreach_tcp4_reset_next \
2503  _(DROP, "error-drop") \
2504  _(IP_LOOKUP, "ip4-lookup")
2505 
2506 #define foreach_tcp6_reset_next \
2507  _(DROP, "error-drop") \
2508  _(IP_LOOKUP, "ip6-lookup")
2509 
2510 static uword
2512  vlib_frame_t * from_frame, u8 is_ip4)
2513 {
2514  u32 n_left_from, next_index, *from, *to_next;
2515  u32 my_thread_index = vm->thread_index;
2516 
2517  from = vlib_frame_vector_args (from_frame);
2518  n_left_from = from_frame->n_vectors;
2519 
2520  next_index = node->cached_next_index;
2521 
2522  while (n_left_from > 0)
2523  {
2524  u32 n_left_to_next;
2525 
2526  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2527 
2528  while (n_left_from > 0 && n_left_to_next > 0)
2529  {
2530  u32 bi0;
2531  vlib_buffer_t *b0;
2532  tcp_tx_trace_t *t0;
2533  tcp_header_t *th0;
2534  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
2535 
2536  bi0 = from[0];
2537  to_next[0] = bi0;
2538  from += 1;
2539  to_next += 1;
2540  n_left_from -= 1;
2541  n_left_to_next -= 1;
2542 
2543  b0 = vlib_get_buffer (vm, bi0);
2544 
2545  if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
2546  my_thread_index, is_ip4))
2547  {
2548  error0 = TCP_ERROR_LOOKUP_DROPS;
2549  next0 = TCP_RESET_NEXT_DROP;
2550  goto done;
2551  }
2552 
2553  /* Prepare to send to IP lookup */
2554  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2555  next0 = TCP_RESET_NEXT_IP_LOOKUP;
2556 
2557  done:
2558  b0->error = node->errors[error0];
2559  b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2560  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2561  {
2562  th0 = vlib_buffer_get_current (b0);
2563  if (is_ip4)
2564  th0 = ip4_next_header ((ip4_header_t *) th0);
2565  else
2566  th0 = ip6_next_header ((ip6_header_t *) th0);
2567  t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
2568  clib_memcpy_fast (&t0->tcp_header, th0,
2569  sizeof (t0->tcp_header));
2570  }
2571 
2572  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2573  n_left_to_next, bi0, next0);
2574  }
2575  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2576  }
2577  return from_frame->n_vectors;
2578 }
2579 
2581  vlib_frame_t * from_frame)
2582 {
2583  return tcp46_send_reset_inline (vm, node, from_frame, 1);
2584 }
2585 
2587  vlib_frame_t * from_frame)
2588 {
2589  return tcp46_send_reset_inline (vm, node, from_frame, 0);
2590 }
2591 
2592 /* *INDENT-OFF* */
2594  .name = "tcp4-reset",
2595  .vector_size = sizeof (u32),
2596  .n_errors = TCP_N_ERROR,
2597  .error_strings = tcp_error_strings,
2598  .n_next_nodes = TCP_RESET_N_NEXT,
2599  .next_nodes = {
2600 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2602 #undef _
2603  },
2604  .format_trace = format_tcp_tx_trace,
2605 };
2606 /* *INDENT-ON* */
2607 
2608 /* *INDENT-OFF* */
2610  .name = "tcp6-reset",
2611  .vector_size = sizeof (u32),
2612  .n_errors = TCP_N_ERROR,
2613  .error_strings = tcp_error_strings,
2614  .n_next_nodes = TCP_RESET_N_NEXT,
2615  .next_nodes = {
2616 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2618 #undef _
2619  },
2620  .format_trace = format_tcp_tx_trace,
2621 };
2622 /* *INDENT-ON* */
2623 
2624 /*
2625  * fd.io coding-style-patch-verification: ON
2626  *
2627  * Local Variables:
2628  * eval: (c-set-style "gnu")
2629  * End:
2630  */
void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:569
#define tcp_in_cong_recovery(tc)
Definition: tcp.h:474
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:124
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:2263
static void tcp_check_sack_reneging(tcp_connection_t *tc)
Definition: tcp_output.c:1409
void session_flush_frames_main_thread(vlib_main_t *vm)
Definition: session.c:1491
End of options.
Definition: tcp_packet.h:104
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_output.c:175
#define clib_min(x, y)
Definition: clib.h:295
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:2084
#define TCP_OPTION_LEN_EOL
Definition: tcp_packet.h:162
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:479
#define CLIB_UNUSED(x)
Definition: clib.h:82
#define tcp_in_recovery(tc)
Definition: tcp.h:465
static f64 tcp_time_now_us(u32 thread_index)
Definition: tcp.h:1028
static void tcp_retransmit_timer_set(tcp_connection_t *tc)
Definition: tcp.h:1151
static u32 transport_rx_fifo_size(transport_connection_t *tc)
Definition: session.h:492
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
#define seq_leq(_s1, _s2)
Definition: tcp.h:874
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:890
ip4_address_t src_address
Definition: ip4_packet.h:170
#define tcp_node_index(node_id, is_ip4)
Definition: tcp.h:680
static void session_add_pending_tx_buffer(session_type_t st, u32 thread_index, u32 bi)
Definition: session.h:629
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:503
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
Definition: tcp.h:95
#define vnet_buffer2(b)
Definition: buffer.h:467
Selective Ack permitted.
Definition: tcp_packet.h:108
#define TCP_FLAG_SYN
Definition: fa_node.h:13
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:604
#define PREDICT_TRUE(x)
Definition: clib.h:112
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:110
static void tcp_flush_frame_to_ip_lookup(tcp_worker_ctx_t *wrk, u8 is_ip4)
Flush ip lookup tx frames populated by timer pops.
Definition: tcp_output.c:981
static tcp_connection_t * tcp_connection_get_if_valid(u32 conn_index, u32 thread_index)
Definition: tcp.h:726
#define clib_memcpy_fast(a, b, c)
Definition: string.h:81
#define NULL
Definition: clib.h:58
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
struct _sack_scoreboard sack_scoreboard_t
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:2138
IP unicast adjacency.
Definition: adj.h:221
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
Definition: fib_table.c:989
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
Definition: tcp.h:777
sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
Definition: tcp_input.c:681
#define tcp_zero_rwnd_sent_off(tc)
Definition: tcp.h:488
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
Definition: session.c:123
struct _tcp_main tcp_main_t
u32 thread_index
Definition: main.h:218
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:519
This packet is to be rewritten and forwarded to the next processing node.
Definition: adj.h:73
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:113
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Definition: tcp_output.c:1758
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:173
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP&#39;s initial window.
Definition: tcp_output.c:82
enum _tcp_output_next tcp_output_next_t
vl_api_address_t src
Definition: gre.api:60
int i
void tcp_timer_delack_handler(u32 index, u32 thread_index)
Delayed ack timer handler.
Definition: tcp_output.c:1213
static u32 format_get_indent(u8 *s)
Definition: format.h:72
uword ip_csum_t
Definition: ip_packet.h:244
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
Definition: ip_packet.h:247
struct _tcp_connection tcp_connection_t
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
Definition: tcp.h:977
#define tcp_opts_sack(_to)
Definition: tcp_packet.h:158
#define VLIB_NODE_FN(node)
Definition: node.h:202
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:896
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:451
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:2511
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:470
No operation.
Definition: tcp_packet.h:105
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:366
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:210
#define scoreboard_rescue_rxt_valid(_sb, _tc)
Definition: tcp_output.c:1841
ip6_address_t src_address
Definition: ip6_packet.h:307
unsigned char u8
Definition: types.h:56
struct _sack_scoreboard_hole sack_scoreboard_hole_t
u8 wscale
Option flags, see above.
Definition: tcp_packet.h:146
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:174
double f64
Definition: types.h:142
vlib_node_registration_t ip4_lookup_node
(constructor) VLIB_REGISTER_NODE (ip4_lookup_node)
Definition: ip4_forward.c:102
#define tcp_csum_offload(tc)
Definition: tcp.h:477
#define foreach_tcp4_reset_next
Definition: tcp_output.c:2502
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
Definition: tcp_output.c:1367
u16 src_port
Definition: udp.api:41
u8 session_type_t
Limit MSS.
Definition: tcp_packet.h:106
#define tcp_zero_rwnd_sent_on(tc)
Definition: tcp.h:487
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
Definition: ip.h:183
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
Definition: session.c:858
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
Definition: tcp_output.c:2326
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:438
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
Definition: adj.h:431
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
Definition: tcp_output.c:578
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
Definition: tcp_output.c:1255
#define seq_gt(_s1, _s2)
Definition: tcp.h:875
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
Definition: tcp.h:742
#define tcp_cfg
Definition: tcp.h:679
vl_api_interface_index_t sw_if_index
Definition: gre.api:59
u8 * format_tcp_connection_id(u8 *s, va_list *args)
Definition: tcp.c:1040
sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
Definition: tcp_input.c:649
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
ip4_address_t dst_address
Definition: ip4_packet.h:170
#define TCP_FLAG_ACK
Definition: fa_node.h:16
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:129
static void tcp_cc_loss(tcp_connection_t *tc)
Definition: tcp.h:1073
tcp_main_t tcp_main
Definition: tcp.c:29
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
Definition: tcp.h:696
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:203
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
Definition: main.c:185
enum _tcp_state tcp_state_t
#define TCP_ALWAYS_ACK
On/off delayed acks.
Definition: tcp.h:39
#define TCP_RTO_MAX
Definition: tcp.h:99
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:241
static u32 tcp_time_now(void)
Definition: tcp.h:1006
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:150
unsigned int u32
Definition: types.h:88
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
Definition: tcp_output.c:2223
#define TCP_ESTABLISH_TIME
Definition: tcp.h:105
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_sent_1_smss, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
Definition: tcp_input.c:845
#define tcp_validate_txf_size(_tc, _a)
Definition: tcp.h:1218
#define VLIB_FRAME_SIZE
Definition: node.h:378
static void tcp_enqueue_to_ip_lookup_now(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:662
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
Definition: tcp_output.c:1056
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:257
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:1129
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:2609
#define TCP_RTO_SYN_RETRIES
Definition: tcp.h:102
#define tcp_zero_rwnd_sent(tc)
Definition: tcp.h:486
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:136
#define tcp_trajectory_add_start(b, start)
Definition: tcp.h:709
#define TRANSPORT_MAX_HDRS_LEN
static session_type_t session_type_from_proto_and_ip(transport_proto_t proto, u8 is_ip4)
vlib_main_t * vm
convenience pointer to this thread&#39;s vlib main
Definition: tcp.h:525
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:863
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:954
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
Definition: adj_types.h:36
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:285
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:256
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:350
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
Definition: tcp_output.c:1849
#define tcp_in_fastrecovery(tc)
Definition: tcp.h:464
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1409
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:1262
#define tcp_opts_mss(_to)
Definition: tcp_packet.h:155
unsigned short u16
Definition: types.h:57
void tcp_flush_frames_to_output(tcp_worker_ctx_t *wrk)
Flush v4 and v6 tcp and ip-lookup tx frames for thread index.
Definition: tcp_output.c:997
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
Definition: main.c:194
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:229
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
Definition: tcp_output.c:2191
#define foreach_tcp6_output_next
Definition: tcp_output.c:34
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
Definition: tcp.h:900
#define PREDICT_FALSE(x)
Definition: clib.h:111
void tcp_timer_persist_handler(u32 index, u32 thread_index)
Got 0 snd_wnd from peer, try to do something about it.
Definition: tcp_output.c:1659
#define always_inline
Definition: ipsec.h:28
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1186
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b0, tcp_state_t state, u8 thread_index, u8 is_ip4)
Definition: tcp_output.c:693
#define TCP_FLAG_FIN
Definition: fa_node.h:12
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1798
static u8 tcp_window_compute_scale(u32 window)
Definition: tcp_output.c:70
int tcp_session_custom_tx(void *conn, u32 max_burst_size)
Definition: tcp_output.c:2157
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:218
vl_api_address_t dst
Definition: gre.api:61
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:338
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
vlib_main_t * vm
Definition: in2out_ed.c:1810
#define foreach_tcp4_output_next
Definition: tcp_output.c:28
#define TCP_RXT_MAX_BURST
Definition: tcp.h:35
#define TCP_WND_MAX
Definition: tcp_packet.h:171
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:669
Selective Ack block.
Definition: tcp_packet.h:109
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
Definition: node_funcs.h:1150
#define TCP_FLAG_RST
Definition: fa_node.h:14
#define TCP_DBG(_fmt, _args...)
Definition: tcp_debug.h:146
u8 len
Definition: ip_types.api:91
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
Definition: tcp.h:1123
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
Definition: tcp_output.c:2287
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
Definition: tcp_input.c:905
u8 is_ip4
Definition: lisp_gpe.api:232
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
Definition: adj.h:63
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:1299
tcp_header_t tcp_header
Definition: tcp_output.c:48
u32 flags
Definition: vhost_user.h:141
u16 n_vectors
Definition: node.h:397
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
Definition: tcp_input.c:946
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:80
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
Definition: buffer_node.h:332
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1229
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1198
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
Definition: tcp.h:1021
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:778
format_function_t format_tcp_state
Definition: tcp.h:64
#define clib_warning(format, args...)
Definition: error.h:59
static vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
Get node runtime by node index.
Definition: node_funcs.h:89
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
Definition: tcp_bt.c:297
format_function_t format_tcp_header
Definition: format.h:100
struct _transport_connection transport_connection_t
#define TCP_USE_SACKS
Disable only for testing.
Definition: tcp.h:40
#define tcp_recovery_on(tc)
Definition: tcp.h:462
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:162
#define tcp_fastrecovery_first(tc)
Definition: tcp.h:470
u32 adj_index_t
An index for adjacencies.
Definition: adj_types.h:30
#define ARRAY_LEN(x)
Definition: clib.h:62
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:456
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:147
vlib_main_t vlib_node_runtime_t * node
Definition: in2out_ed.c:1810
void tcp_timer_retransmit_syn_handler(u32 tc_index, u32 thread_index)
SYN retransmit timer handler.
Definition: tcp_output.c:1593
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:368
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
Definition: tcp_output.c:2003
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:558
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
Definition: session.h:478
static void tcp_timer_update(tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp.h:1136
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1021
signed int i32
Definition: types.h:77
vlib_node_registration_t ip6_lookup_node
(constructor) VLIB_REGISTER_NODE (ip6_lookup_node)
Definition: ip6_forward.c:667
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:320
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:515
#define ASSERT(truth)
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
Definition: tcp_output.c:1425
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
Definition: tcp_output.c:2247
#define tcp_syn(_th)
Definition: tcp_packet.h:80
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:53
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: ip4_forward.c:1302
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
Definition: transport.c:672
u8 data[128]
Definition: ipsec_types.api:87
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:390
#define seq_geq(_s1, _s2)
Definition: tcp.h:876
#define TRANSPORT_PACER_MIN_BURST
Definition: transport.h:23
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
Definition: ip6_packet.h:250
#define clib_mem_unaligned(pointer, type)
Definition: types.h:155
#define tcp_fastrecovery_first_off(tc)
Definition: tcp.h:472
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
Definition: tcp_output.c:113
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:1007
#define clib_max(x, y)
Definition: clib.h:288
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1157
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
Definition: transport.c:716
#define seq_lt(_s1, _s2)
Definition: tcp.h:873
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
Definition: tcp_output.c:1739
template key/value backing page structure
Definition: bihash_doc.h:44
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:294
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
Definition: defs.h:47
void tcp_timer_retransmit_handler(u32 tc_index, u32 thread_index)
Definition: tcp_output.c:1447
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
Definition: tcp_bt.c:282
u32 tsval
Timestamp value.
Definition: tcp_packet.h:148
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:149
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6.h:604
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
Definition: tcp_output.c:1835
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
Definition: adj.h:236
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:140
#define foreach_tcp6_reset_next
Definition: tcp_output.c:2506
sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp_input.c:673
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:492
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:690
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
Definition: session.c:946
static void tcp_retransmit_timer_update(tcp_connection_t *tc)
Definition: tcp.h:1199
VLIB buffer representation.
Definition: buffer.h:102
u64 uword
Definition: types.h:112
#define seq_max(_s1, _s2)
Definition: tcp.h:877
static void tcp_enqueue_to_ip_lookup_i(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index, u8 flush)
Definition: tcp_output.c:626
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:244
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:525
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:453
struct clib_bihash_value offset
template key/value backing page structure
static void tcp_retransmit_timer_force_update(tcp_connection_t *tc)
Definition: tcp.h:1165
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:101
#define vnet_buffer(b)
Definition: buffer.h:408
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp.h:717
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
Definition: tcp.h:1092
void tcp_update_rto(tcp_connection_t *tc)
Definition: tcp_input.c:478
int session_stream_connect_notify(transport_connection_t *tc, u8 is_fail)
Definition: session.c:757
vl_api_dhcp_client_state_t state
Definition: dhcp.api:201
static u32 vlib_num_workers()
Definition: threads.h:372
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:238
static u32 tcp_buffer_len(vlib_buffer_t *b)
Definition: tcp_output.c:1120
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
Definition: tcp_output.c:1822
#define TCP_OPTION_LEN_NOOP
Definition: tcp_packet.h:163
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:919
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2476
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: in2out_ed.c:1811
u16 flags
Copy of main node flags.
Definition: node.h:509
Window scale.
Definition: tcp_packet.h:107
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:499
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
Definition: session.h:471
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:923
tcp_connection_t tcp_connection
Definition: tcp_output.c:49
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1176
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
Definition: ip6.h:561
u16 dst_port
Definition: udp.api:42
vlib_frame_t * ip_lookup_tx_frames[2]
tx frames for ip 4/6 lookup nodes
Definition: tcp.h:516
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:421
u8 ip_version_and_header_length
Definition: ip4_packet.h:138
Timestamps.
Definition: tcp_packet.h:110
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
Definition: buffer_funcs.h:244
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:2593
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:302
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2456
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:167
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:678
static u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:630
static void tcp_persist_timer_set(tcp_connection_t *tc)
Definition: tcp.h:1172
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:684
#define TCP_RTO_BOFF_MAX
Definition: tcp.h:104
static char * tcp_error_strings[]
Definition: tcp_output.c:40
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
Definition: ip4.h:378
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:85
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
Definition: tcp.h:1034
#define tcp_ack(_th)
Definition: tcp_packet.h:83
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
Definition: tcp_bt.c:333
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
Definition: transport.c:696
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp.h:1213
Definition: defs.h:46
static void tcp_cc_congestion(tcp_connection_t *tc)
Definition: tcp.h:1067
ip6_address_t dst_address
Definition: ip6_packet.h:307
u32 * tx_buffers
tx buffer free list
Definition: tcp.h:513
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
Definition: adj_nbr.c:99
#define TCP_EVT(_evt, _args...)
Definition: tcp_debug.h:145
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:355
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:128