22 typedef enum _tcp_output_next
31 #define foreach_tcp4_output_next \ 32 _ (DROP, "error-drop") \ 33 _ (IP_LOOKUP, "ip4-lookup") \ 34 _ (IP_REWRITE, "ip4-rewrite") \ 37 #define foreach_tcp6_output_next \ 38 _ (DROP, "error-drop") \ 39 _ (IP_LOOKUP, "ip6-lookup") \ 40 _ (IP_REWRITE, "ip6-rewrite") \ 41 _ (IP_ARP, "ip6-discover-neighbor") 44 #define tcp_error(n,s) s, 71 #ifndef CLIB_MARCH_VARIANT 107 if (tc->state != TCP_STATE_SYN_RCVD ||
tcp_opts_wscale (&tc->rcv_opts))
118 u32 available_space, wnd;
130 observed_wnd = (
i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
137 TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
158 if (state < TCP_STATE_ESTABLISHED)
162 return tc->rcv_wnd >> tc->rcv_wscale;
170 opts->
flags |= TCP_OPTS_FLAG_MSS;
174 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
175 opts->
wscale = tc->rcv_wscale;
178 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
185 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
199 opts->
flags |= TCP_OPTS_FLAG_MSS;
205 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
206 opts->
wscale = tc->rcv_wscale;
212 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
214 opts->
tsecr = tc->tsval_recent;
220 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
238 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
240 opts->
tsecr = tc->tsval_recent;
247 opts->
flags |= TCP_OPTS_FLAG_SACK;
248 if (tc->snd_sack_pos >=
vec_len (tc->snd_sacks))
249 tc->snd_sack_pos = 0;
250 opts->
sacks = &tc->snd_sacks[tc->snd_sack_pos];
270 case TCP_STATE_ESTABLISHED:
271 case TCP_STATE_CLOSE_WAIT:
272 case TCP_STATE_FIN_WAIT_1:
273 case TCP_STATE_LAST_ACK:
274 case TCP_STATE_CLOSING:
275 case TCP_STATE_FIN_WAIT_2:
276 case TCP_STATE_TIME_WAIT:
277 case TCP_STATE_CLOSED:
279 case TCP_STATE_SYN_RCVD:
281 case TCP_STATE_SYN_SENT:
307 TCP_STATE_ESTABLISHED);
310 tc->snd_mss =
clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
318 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
321 if (tc->snd_una == tc->snd_nxt)
327 if (tc->flags & TCP_CONN_PSH_PENDING)
331 tc->psh_seq = tc->snd_una + max_deq - 1;
340 if (b->
flags & VLIB_BUFFER_NEXT_PRESENT)
343 b->
flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
353 #ifndef CLIB_MARCH_VARIANT 357 ASSERT ((b->
flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
358 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
370 ip46_address_t *
src, ip46_address_t *
dst)
373 u16 payload_length_host_byte_order;
378 clib_host_to_net_u16 (IP_PROTOCOL_TCP);
381 for (i = 0; i <
ARRAY_LEN (src->ip6.as_uword); i++)
390 payload_length_host_byte_order, NULL, 0,
396 ip46_address_t *
src, ip46_address_t *
dst)
399 u32 payload_length_host_byte_order;
403 clib_host_to_net_u32 (payload_length_host_byte_order +
404 (IP_PROTOCOL_TCP << 16));
410 payload_length_host_byte_order, NULL, 0,
418 if (
PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
425 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
428 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
445 u8 tcp_opts_len, tcp_hdr_opts_len;
453 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
456 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
462 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
480 TCP_EVT (TCP_EVT_ACK_SENT, tc);
481 tc->rcv_las = tc->rcv_nxt;
499 u8 tcp_hdr_opts_len, tcp_opts_len;
509 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
514 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
526 u8 tcp_opts_len, tcp_hdr_opts_len;
533 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
536 tc->rcv_nxt, tcp_hdr_opts_len,
540 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
546 u8 is_ip4,
u32 fib_index)
551 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
558 tm->ipl_next_node[!is_ip4]);
568 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
584 ip6_address_t src_ip6, dst_ip6;
608 src_port = th->src_port;
609 dst_port = th->dst_port;
620 seq = th->ack_number;
626 tmp = clib_net_to_host_u32 (th->seq_number);
628 ack = clib_host_to_net_u32 (tmp + len);
653 #ifndef CLIB_MARCH_VARIANT 703 seq = pkt_th->ack_number;
704 ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
710 ack = clib_host_to_net_u32 (
vnet_buffer (pkt)->tcp.seq_end);
714 seq, ack, tcp_hdr_len, flags, 0);
733 tc->ipv6_flow_label);
739 TCP_EVT (TCP_EVT_RST_SENT, tc);
741 TCP_ERROR_RST_SENT, 1);
755 u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
767 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
768 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
771 tc->rcv_nxt, tcp_hdr_opts_len, flags,
775 ASSERT (opts_write_len == tc->snd_opts_len);
776 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
778 TCP_EVT (TCP_EVT_RST_SENT, tc);
780 TCP_ERROR_RST_SENT, 1);
795 IP_PROTOCOL_TCP, tc->ipv6_flow_label);
834 tc->rtt_seq = tc->snd_nxt;
839 TCP_EVT (TCP_EVT_SYN_SENT, tc);
850 ASSERT (tc->snd_una != tc->snd_nxt);
865 TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
880 fin_snt = tc->
flags & TCP_CONN_FINSNT;
892 tc->flags |= TCP_CONN_FINSNT;
898 if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
899 tc->flags &= ~TCP_CONN_SNDACK;
905 TCP_EVT (TCP_EVT_FIN_SENT, tc);
911 tc->flags |= TCP_CONN_FINSNT;
912 tc->flags &= ~TCP_CONN_FINPNDG;
922 u8 compute_opts,
u8 maybe_burst,
u8 update_snd_nxt)
934 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
939 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
942 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
948 if (
seq_geq (tc->psh_seq, snd_nxt)
949 &&
seq_lt (tc->psh_seq, snd_nxt + data_len))
950 flags |= TCP_FLAG_PSH;
953 tc->rcv_nxt, tcp_hdr_opts_len, flags,
959 tm->wrk_ctx[tc->c_thread_index].cached_opts,
965 ASSERT (len == tc->snd_opts_len);
974 tc->rcv_las = tc->rcv_nxt;
977 tc->data_segs_out += 1;
998 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1009 tc->rtt_seq = tc->snd_nxt;
1043 if (!(tc->flags & TCP_CONN_SNDACK))
1046 tc->flags |= TCP_CONN_SNDACK;
1053 if (!(tc->flags & TCP_CONN_SNDACK))
1056 tc->flags |= TCP_CONN_SNDACK;
1058 if (tc->pending_dupacks < 255)
1059 tc->pending_dupacks += 1;
1065 if (!(tc->flags & TCP_CONN_RXT_PENDING))
1068 tc->flags |= TCP_CONN_RXT_PENDING;
1085 if (tc->rcv_wnd >=
tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1138 ASSERT (n_bytes == max_deq_bytes);
1146 u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1147 u16 n_peeked, len_to_deq;
1152 n_bufs_per_seg = ceil ((
double) seg_size / bytes_per_buffer);
1168 TRANSPORT_MAX_HDRS_LEN);
1170 b[0]->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1175 for (i = 1; i < n_bufs_per_seg; i++)
1178 len_to_deq =
clib_min (max_deq_bytes, bytes_per_buffer);
1186 ASSERT (n_peeked == len_to_deq);
1187 n_bytes += n_peeked;
1193 prev_b->
flags |= VLIB_BUFFER_NEXT_PRESENT;
1195 max_deq_bytes -= n_peeked;
1210 ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1226 u32 start, available_bytes;
1229 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1230 ASSERT (max_deq_bytes != 0);
1236 ASSERT (available_bytes >= offset);
1237 available_bytes -=
offset;
1238 if (!available_bytes)
1241 max_deq_bytes =
clib_min (tc->snd_mss, max_deq_bytes);
1242 max_deq_bytes =
clib_min (available_bytes, max_deq_bytes);
1244 start = tc->snd_una +
offset;
1253 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1257 tc->segs_retrans += 1;
1259 TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1271 if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1283 TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1285 tc->prev_ssthresh = tc->ssthresh;
1286 tc->prev_cwnd = tc->cwnd;
1297 tc->cwnd_acc_bytes = 0;
1298 tc->tr_occurences += 1;
1318 if (tc->state == TCP_STATE_CLOSED)
1321 if (tc->state >= TCP_STATE_ESTABLISHED)
1323 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1326 if (tc->flags & TCP_CONN_FINSNT)
1335 if (tc->snd_una == tc->snd_nxt)
1369 tc->snd_congestion = tc->snd_nxt;
1373 n_bytes =
clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1388 if (tc->rto_boff == 1)
1401 else if (tc->state == TCP_STATE_SYN_RCVD)
1403 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1428 ASSERT (tc->snd_una != tc->snd_nxt);
1434 TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1441 ASSERT (tc->state == TCP_STATE_CLOSED);
1464 if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1467 TCP_DBG (
"could not remove half-open connection");
1471 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1499 TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1517 u32 bi, max_snd_bytes, available_bytes,
offset;
1525 if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1526 || (tc->flags & TCP_CONN_FINSNT))
1527 goto update_scheduler;
1530 offset = tc->snd_nxt - tc->snd_una;
1534 if (!available_bytes)
1540 if (available_bytes <= offset)
1541 goto update_scheduler;
1562 max_snd_bytes =
clib_min (tc->snd_mss,
1568 || tc->snd_una == tc->snd_nxt
1569 || tc->rto_boff > 1));
1571 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1603 TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1619 u32 offset, n_segs = 0, n_written, bi, available_wnd;
1623 offset = tc->snd_nxt - tc->snd_una;
1624 available_wnd = tc->snd_wnd -
offset;
1625 burst_size =
clib_min (burst_size, available_wnd / tc->snd_mss);
1627 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1630 while (n_segs < burst_size)
1638 offset += n_written;
1641 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1644 tc->snd_nxt += n_written;
1661 prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1663 if (pipe > tc->ssthresh)
1665 space = ((int) tc->prr_delivered * ((
f64) tc->ssthresh / tc->prev_cwnd))
1671 limit =
clib_max ((
int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1672 space =
clib_min (tc->ssthresh - pipe, limit);
1674 space =
clib_max (space, prr_out ? 0 : tc->snd_mss);
1682 u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1683 f64 rr = (
f64) tc->ssthresh / tc->prev_cwnd;
1688 return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1695 - (tc->snd_nxt - tc->snd_una));
1698 #define scoreboard_rescue_rxt_valid(_sb, _tc) \ 1699 (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ 1700 && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) 1709 u32 n_written = 0,
offset, max_bytes, n_segs = 0;
1710 u8 snd_limited = 0, can_rescue = 0;
1711 u32 bi, max_deq, burst_bytes;
1721 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1733 if (snd_space < tc->snd_mss)
1740 &&
seq_gt (sb->high_sacked, tc->snd_congestion)
1741 && tc->rxt_head != tc->snd_una
1744 max_bytes =
clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1755 tc->rxt_head = tc->snd_una;
1756 tc->rxt_delivered += n_written;
1757 tc->prr_delivered += n_written;
1758 ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1763 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1767 max_deq -= tc->snd_nxt - tc->snd_una;
1769 while (snd_space > 0 && n_segs < burst_size)
1776 if (max_deq > tc->snd_mss)
1783 av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1784 av_wnd =
clib_max (av_wnd - tc->snd_mss, 0);
1785 snd_space =
clib_min (snd_space, av_wnd);
1786 snd_space =
clib_min (max_deq, snd_space);
1787 burst_size =
clib_min (burst_size - n_segs,
1788 snd_space / tc->snd_mss);
1791 if (max_deq > n_segs_new * tc->snd_mss)
1794 n_segs += n_segs_new;
1808 max_bytes =
clib_min (tc->snd_mss, hole->end - hole->start);
1809 max_bytes =
clib_min (max_bytes, snd_space);
1810 offset = hole->end - tc->snd_una - max_bytes;
1816 sb->rescue_rxt = tc->snd_congestion;
1823 max_bytes =
clib_min (hole->end - sb->high_rxt, snd_space);
1824 max_bytes = snd_limited ?
clib_min (max_bytes, tc->snd_mss) : max_bytes;
1828 offset = sb->high_rxt - tc->snd_una;
1831 ASSERT (n_written <= snd_space);
1840 sb->high_rxt += n_written;
1843 snd_space -= n_written;
1863 u32 n_written = 0,
offset = 0, bi, max_deq, n_segs_now, max_bytes;
1864 u32 burst_bytes, sent_bytes;
1866 int snd_space, n_segs = 0;
1871 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1874 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1882 cc_limited = snd_space < burst_bytes;
1889 while (snd_space > 0 && n_segs < burst_size)
1892 tc->snd_congestion - tc->snd_una -
offset);
1904 snd_space -= n_written;
1909 if (n_segs == burst_size)
1915 if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1919 max_deq -= tc->snd_nxt - tc->snd_una;
1922 snd_space =
clib_min (max_deq, snd_space);
1923 burst_size =
clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1925 if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1927 n_segs += n_segs_now;
1933 sent_bytes =
clib_min (n_segs * tc->snd_mss, burst_bytes);
1934 sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1945 if (!tc->pending_dupacks)
1948 || tc->state != TCP_STATE_ESTABLISHED)
1961 tc->dupacks_out += 1;
1962 tc->pending_dupacks = 0;
1967 tc->snd_sack_pos = 0;
1973 n_acks =
clib_min (n_acks, tc->pending_dupacks);
1975 for (j = 0; j <
clib_min (n_acks, max_burst_size); j++)
1978 if (n_acks < max_burst_size)
1980 tc->pending_dupacks = 0;
1981 tc->snd_sack_pos = 0;
1982 tc->dupacks_out += n_acks;
1987 TCP_DBG (
"constrained by burst size");
1988 tc->pending_dupacks = n_acks - max_burst_size;
1989 tc->dupacks_out += max_burst_size;
1991 return max_burst_size;
2022 tc->flags &= ~TCP_CONN_RXT_PENDING;
2026 if (!(tc->flags & TCP_CONN_SNDACK))
2029 tc->flags &= ~TCP_CONN_SNDACK;
2032 if (n_segs && !tc->pending_dupacks)
2049 u16 * next0,
u32 * error0)
2062 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2074 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2081 u32 * to_next,
u32 n_bufs)
2089 for (i = 0; i < n_bufs; i++)
2092 if (!(b->
flags & VLIB_BUFFER_IS_TRACED))
2116 IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2134 ASSERT ((b->
flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2135 ASSERT ((b->
flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2136 b->
flags |= VNET_BUFFER_F_GSO;
2149 if (tc0->next_node_index)
2151 *next0 = tc0->next_node_index;
2152 vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2198 while (n_left_from >= 4)
2236 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2247 b[1]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2256 while (n_left_from > 0)
2260 if (n_left_from > 1)
2277 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2307 .name =
"tcp4-output",
2309 .vector_size =
sizeof (
u32),
2315 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2327 .name =
"tcp6-output",
2329 .vector_size =
sizeof (
u32),
2335 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2344 typedef enum _tcp_reset_next
2351 #define foreach_tcp4_reset_next \ 2352 _(DROP, "error-drop") \ 2353 _(IP_LOOKUP, "ip4-lookup") 2355 #define foreach_tcp6_reset_next \ 2356 _(DROP, "error-drop") \ 2357 _(IP_LOOKUP, "ip6-lookup") 2371 while (n_left_from > 0)
2377 while (n_left_from > 0 && n_left_to_next > 0)
2389 n_left_to_next -= 1;
2398 b0->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2412 n_left_to_next, bi0, next0);
2433 .name =
"tcp4-reset",
2434 .vector_size =
sizeof (
u32),
2439 #define _(s,n) [TCP_RESET_NEXT_##s] = n, 2449 .name =
"tcp6-reset",
2450 .vector_size =
sizeof (
u32),
2455 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
static void tcp_check_sack_reneging(tcp_connection_t *tc)
#define TCP_RXT_MAX_BURST
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
#define TCP_OPTION_LEN_SACK_PERMITTED
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
vl_api_wireguard_peer_flags_t flags
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
#define tcp_node_index(node_id, is_ip4)
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
void session_queue_run_on_main_thread(vlib_main_t *vm)
vlib_main_t vlib_node_runtime_t vlib_frame_t * from_frame
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
#define tcp_opts_tstamp(_to)
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
vl_api_ip_port_and_mask_t dst_port
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
u16 nexts[VLIB_FRAME_SIZE]
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
#define tcp_fastrecovery_first_off(tc)
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
struct _tcp_main tcp_main_t
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static void transport_rx_fifo_req_deq_ntf(transport_connection_t *tc)
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
This packet is to be rewritten and forwarded to the next processing node.
u16 current_length
Nbytes between current data and the end of this buffer.
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
struct _tcp_connection tcp_connection_t
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
enum _tcp_output_next tcp_output_next_t
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
static void tcp_cc_congestion(tcp_connection_t *tc)
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
#define TCP_RTO_SYN_RETRIES
#define VLIB_NODE_FN(node)
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
vlib_error_t * errors
Vector of errors for this node.
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
static u8 tcp_is_descheduled(tcp_connection_t *tc)
u8 n_sack_blocks
Number of SACKs blocks.
struct _sack_scoreboard sack_scoreboard_t
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
#define scoreboard_rescue_rxt_valid(_sb, _tc)
#define tcp_in_cong_recovery(tc)
u8 wscale
Window scale advertised.
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
#define foreach_tcp4_reset_next
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
if(node->flags &VLIB_NODE_FLAG_TRACE) vnet_interface_output_trace(vm
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
vlib_get_buffers(vm, from, b, n_left_from)
#define TCP_OPTION_LEN_SACK_BLOCK
#define seq_leq(_s1, _s2)
description fragment has unexpected format
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
static void * ip4_next_header(ip4_header_t *i)
#define tcp_zero_rwnd_sent(tc)
sack_block_t * sacks
SACK blocks.
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
vlib_buffer_enqueue_to_next(vm, node, from,(u16 *) nexts, frame->n_vectors)
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
#define tcp_validate_txf_size(_tc, _a)
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
#define tcp_in_fastrecovery(tc)
#define tcp_csum_offload(tc)
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
#define TCP_OPTION_LEN_WINDOW_SCALE
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define TRANSPORT_MAX_HDRS_LEN
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
vlib_main_t * vm
convenience pointer to this thread's vlib main
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
void tcp_send_synack(tcp_connection_t *tc)
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
vl_api_interface_index_t sw_if_index
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
static void tcp_cc_loss(tcp_connection_t *tc)
format_function_t format_tcp_connection_id
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
#define TCP_DUPACK_THRESHOLD
description No buffer space
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
#define TCP_ESTABLISH_TIME
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
tcp_timer_wheel_t timer_wheel
worker timer wheel
#define foreach_tcp6_output_next
static u32 tcp_time_tstamp(u32 thread_index)
Time used to generate timestamps, not the timestamp.
void tcp_program_dupack(tcp_connection_t *tc)
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
static u8 tcp_window_compute_scale(u32 window)
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
#define TCP_OPTION_LEN_TIMESTAMP
#define foreach_tcp4_output_next
static void tcp_update_time_now(tcp_worker_ctx_t *wrk)
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
#define TCP_DBG(_fmt, _args...)
#define TCP_MAX_WND_SCALE
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
vl_api_ip_port_and_mask_t src_port
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
void transport_connection_reschedule(transport_connection_t *tc)
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
#define VLIB_REGISTER_NODE(x,...)
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
#define CLIB_PREFETCH(addr, size, type)
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
sll srl srl sll sra u16x4 i
void tcp_program_retransmit(tcp_connection_t *tc)
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
format_function_t format_tcp_state
static void tcp_update_rto(tcp_connection_t *tc)
#define clib_warning(format, args...)
#define tcp_in_recovery(tc)
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
u32 adj_index_t
An index for adjacencies.
#define tcp_zero_rwnd_sent_on(tc)
u16 mss
Maximum segment size advertised.
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
static void * ip6_next_header(ip6_header_t *i)
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
#define seq_geq(_s1, _s2)
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
#define tcp_recovery_on(tc)
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
u8 flags
Option flags, see above.
#define TRANSPORT_PACER_MIN_BURST
vlib_put_next_frame(vm, node, next_index, 0)
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
static_always_inline void * clib_memcpy_fast(void *restrict dst, const void *restrict src, size_t n)
#define clib_mem_unaligned(pointer, type)
nat44_ei_hairpin_src_next_t next_index
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
void tcp_send_ack(tcp_connection_t *tc)
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
template key/value backing page structure
#define tcp_opts_wscale(_to)
u32 tsval
Timestamp value.
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
u32 tsecr
Echoed/reflected time stamp.
#define tcp_fastrecovery_first(tc)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
vlib_main_t vlib_node_runtime_t * node
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
u32 next_buffer
Next buffer for this linked-list of buffers.
#define foreach_tcp6_reset_next
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
VLIB buffer representation.
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
#define TCP_OPTION_LEN_MSS
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static f64 tcp_time_now_us(u32 thread_index)
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
struct clib_bihash_value offset
template key/value backing page structure
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
#define TCP_USE_SACKS
Disable only for testing.
vl_api_dhcp_client_state_t state
static u32 vlib_num_workers()
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
static u32 tcp_buffer_len(vlib_buffer_t *b)
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
static_always_inline void vnet_buffer_offload_flags_set(vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
u16 flags
Copy of main node flags.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
#define tcp_opts_sack_permitted(_to)
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
tcp_connection_t tcp_connection
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
void tcp_program_ack(tcp_connection_t *tc)
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
#define VLIB_NODE_FLAG_TRACE
static uword round_down_pow2(uword x, uword pow2)
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
#define CLIB_CACHE_LINE_BYTES
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static tcp_main_t * vnet_get_tcp_main()
u32 tco_next_node[2]
Session layer edge indices to tcp output.
vlib_buffer_t * bufs[VLIB_FRAME_SIZE]
static char * tcp_error_strings[]
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
enum _tcp_state tcp_state_t
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
#define tcp_worker_stats_inc(_wrk, _stat, _val)
#define tcp_zero_rwnd_sent_off(tc)
u32 * tx_buffers
tx buffer free list
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
#define TCP_EVT(_evt, _args...)
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
static uword pool_elts(void *v)
Number of active elements in a pool.