22 typedef enum _tcp_output_nect
29 #define foreach_tcp4_output_next \ 30 _ (DROP, "error-drop") \ 31 _ (IP_LOOKUP, "ip4-lookup") 33 #define foreach_tcp6_output_next \ 34 _ (DROP, "error-drop") \ 35 _ (IP_LOOKUP, "ip6-lookup") 38 #define tcp_error(n,s) s, 72 snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss;
74 tc->snd_mss = snd_mss;
128 u32 available_space, max_fifo, observed_wnd;
130 if (state < TCP_STATE_ESTABLISHED)
139 ASSERT (tc->opt.mss < max_fifo);
141 if (available_space < tc->opt.mss && available_space < max_fifo / 8)
148 observed_wnd = tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
151 if (available_space < observed_wnd)
153 if (available_space == 0)
154 clib_warning (
"Didn't shrink rcv window despite not having space");
159 if (tc->rcv_wnd == 0)
161 tc->flags |= TCP_CONN_SENT_RCV_WND0;
164 return tc->rcv_wnd >> tc->rcv_wscale;
174 u32 buf, seq_len = 4;
180 buf = clib_host_to_net_u16 (opts->
mss);
182 data +=
sizeof (opts->
mss);
205 buf = clib_host_to_net_u32 (opts->
tsval);
207 data +=
sizeof (opts->
tsval);
208 buf = clib_host_to_net_u32 (opts->
tsecr);
210 data +=
sizeof (opts->
tsecr);
220 if (n_sack_blocks != 0)
224 for (i = 0; i < n_sack_blocks; i++)
226 buf = clib_host_to_net_u32 (opts->
sacks[i].start);
229 buf = clib_host_to_net_u32 (opts->
sacks[i].end);
258 opts->
flags |= TCP_OPTS_FLAG_MSS;
262 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
266 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
271 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
284 opts->
flags |= TCP_OPTS_FLAG_MSS;
290 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
291 opts->
wscale = tc->rcv_wscale;
297 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
299 opts->
tsecr = tc->tsval_recent;
305 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
323 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
325 opts->
tsecr = tc->tsval_recent;
332 opts->
flags |= TCP_OPTS_FLAG_SACK;
333 opts->
sacks = tc->snd_sacks;
350 case TCP_STATE_ESTABLISHED:
351 case TCP_STATE_FIN_WAIT_1:
353 case TCP_STATE_SYN_RCVD:
355 case TCP_STATE_SYN_SENT:
363 #define tcp_get_free_buffer_index(tm, bidx) \ 365 u32 *my_tx_buffers, n_free_buffers; \ 366 u32 cpu_index = tm->vlib_main->cpu_index; \ 367 my_tx_buffers = tm->tx_buffers[cpu_index]; \ 368 if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \ 370 n_free_buffers = 32; \ 371 vec_validate (my_tx_buffers, n_free_buffers - 1); \ 372 _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \ 373 tm->vlib_main, my_tx_buffers, n_free_buffers, \ 374 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \ 375 tm->tx_buffers[cpu_index] = my_tx_buffers; \ 378 if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \ 380 *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \ 381 _vec_len (my_tx_buffers) -= 1; \ 410 u8 tcp_opts_len, tcp_hdr_opts_len;
418 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
421 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
426 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
459 vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
473 u8 tcp_opts_len, tcp_hdr_opts_len;
478 memset (snd_opts, 0,
sizeof (*snd_opts));
486 tc->snd_una = tc->iss;
487 tc->snd_nxt = tc->iss + 1;
488 tc->snd_una_max = tc->snd_nxt;
494 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
497 tc->rcv_nxt, tcp_hdr_opts_len,
502 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
513 u32 *to_next, next_index;
583 if (state == TCP_STATE_CLOSED)
588 tmp = clib_net_to_host_u32 (th0->seq_number);
592 th0->ack_number = clib_host_to_net_u32 (tmp + 1);
596 else if (state >= TCP_STATE_SYN_SENT)
599 th0->seq_number = th0->ack_number;
603 src_port0 = th0->src_port;
604 th0->src_port = th0->dst_port;
605 th0->dst_port = src_port0;
607 th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4;
608 th0->urgent_pointer = 0;
664 seq = pkt_th->ack_number;
671 ack = clib_host_to_net_u32 (
vnet_buffer (pkt)->tcp.seq_end);
675 seq, ack, tcp_hdr_len, flags, 0);
710 &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
719 &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
740 u8 tcp_hdr_opts_len, tcp_opts_len;
756 tc->snd_una = tc->iss;
757 tc->snd_una_max = tc->snd_nxt = tc->iss + 1;
762 memset (&snd_opts, 0,
sizeof (snd_opts));
764 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
774 tc->rtt_seq = tc->snd_nxt;
790 u32 *to_next, next_index;
826 tc->flags |= TCP_CONN_FINSNT;
835 case TCP_STATE_ESTABLISHED:
837 case TCP_STATE_SYN_RCVD:
839 case TCP_STATE_SYN_SENT:
841 case TCP_STATE_LAST_ACK:
842 case TCP_STATE_FIN_WAIT_1:
857 u32 advertise_wnd, data_len;
858 u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len,
flags;
866 memset (snd_opts, 0,
sizeof (*snd_opts));
868 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
876 tc->rcv_nxt, tcp_hdr_opts_len, flags,
881 ASSERT (opts_write_len == tcp_opts_len);
884 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
886 tc->snd_nxt += data_len;
911 tc->flags &= ~TCP_CONN_DELACK;
933 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
943 offset = hole->start - tc->snd_una;
944 hole_size = hole->end - hole->start;
948 if (hole_size < max_bytes)
949 max_bytes = hole_size;
953 if (
seq_geq (tc->snd_nxt, tc->snd_una_max))
975 u32 bi, max_bytes, snd_space;
993 tc->snd_nxt = tc->snd_una;
999 if (tc->state >= TCP_STATE_ESTABLISHED)
1008 max_bytes =
clib_min (tc->snd_mss, snd_space);
1017 tc->rtx_bytes += max_bytes;
1025 ASSERT (tc->state == TCP_STATE_SYN_RCVD
1026 || tc->state == TCP_STATE_SYN_SENT);
1050 ASSERT (tc->state == TCP_STATE_SYN_SENT);
1080 u32 snd_nxt = tc->snd_nxt;
1084 tc->snd_nxt = tc->snd_una;
1093 tc->snd_nxt = snd_nxt;
1094 tc->rtx_bytes += tc->snd_mss;
1101 u32 snd_space, max_bytes, n_bytes, bi;
1109 tc->snd_nxt = tc->snd_una;
1118 max_bytes =
clib_min (tc->snd_mss, snd_space);
1127 snd_space -= n_bytes;
1131 tc->snd_nxt = tc->snd_una_max;
1147 u32 n_left_from, next_index, *from, *to_next;
1155 while (n_left_from > 0)
1161 while (n_left_from > 0 && n_left_to_next > 0)
1174 n_left_to_next -= 1;
1179 if (
PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
1181 error0 = TCP_ERROR_INVALID_CONNECTION;
1193 &tc0->c_rmt_ip4, IP_PROTOCOL_TCP);
1202 &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
1210 (
vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
1212 ASSERT (tc0->snt_dupacks > 0);
1216 error0 = TCP_ERROR_FILTERED_DUPACKS;
1223 tc0->rcv_las = tc0->rcv_nxt;
1227 ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK);
1236 if (
seq_lt (tc0->snd_una_max, tc0->snd_nxt))
1238 tc0->snd_una_max = tc0->snd_nxt;
1239 if (tc0->rtt_ts == 0)
1242 tc0->rtt_seq = tc0->snd_nxt;
1249 && tc0->snd_nxt != tc0->snd_una)
1269 n_left_to_next, bi0, next0);
1297 .vector_size =
sizeof (
u32),
1302 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 1317 .name =
"tcp6-output",
1319 .vector_size =
sizeof (
u32),
1324 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 1345 typedef enum _tcp_reset_next
1352 #define foreach_tcp4_reset_next \ 1353 _(DROP, "error-drop") \ 1354 _(IP_LOOKUP, "ip4-lookup") 1356 #define foreach_tcp6_reset_next \ 1357 _(DROP, "error-drop") \ 1358 _(IP_LOOKUP, "ip6-lookup") 1364 u32 n_left_from, next_index, *from, *to_next;
1372 while (n_left_from > 0)
1378 while (n_left_from > 0 && n_left_to_next > 0)
1389 n_left_to_next -= 1;
1394 my_thread_index, is_ip4))
1396 error0 = TCP_ERROR_LOOKUP_DROPS;
1414 n_left_to_next, bi0, next0);
1438 .name =
"tcp4-reset",
1439 .vector_size =
sizeof (
u32),
1444 #define _(s,n) [TCP_RESET_NEXT_##s] = n, 1456 .name =
"tcp6-reset",
1457 .vector_size =
sizeof (
u32),
1462 #define _(s,n) [TCP_RESET_NEXT_##s] = n, void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
#define TCP_IW_N_SEGMENTS
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
sll srl srl sll sra u16x4 i
u32 stream_session_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
#define TCP_OPTION_LEN_EOL
static void tcp_enqueue_to_ip_lookup(vlib_main_t *vm, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static void tcp_retransmit_timer_set(tcp_connection_t *tc)
#define TCP_OPTION_LEN_SACK_PERMITTED
void tcp_timer_retransmit_handler(u32 index)
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t next_state)
Push TCP header and update connection variables.
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
static u8 svm_fifo_has_ooo_data(svm_fifo_t *f)
#define tcp_opts_tstamp(_to)
static uword tcp4_send_reset(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
static void tcp_enqueue_to_output(vlib_main_t *vm, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static int tcp_make_syn_options(tcp_options_t *opts, u8 wnd_scale)
static int ip4_header_bytes(ip4_header_t *i)
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
struct _tcp_main tcp_main_t
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's IW as recommended by RFC6928.
struct _vlib_node_registration vlib_node_registration_t
static u32 tcp_session_has_ooo_data(tcp_connection_t *tc)
struct _tcp_connection tcp_connection_t
#define tcp_get_free_buffer_index(tm, bidx)
#define tcp_opts_sack(_to)
void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
static void scoreboard_clear(sack_scoreboard_t *sb)
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
vlib_error_t * errors
Vector of errors for this node.
u8 n_sack_blocks
Number of SACKs blocks.
struct _tcp_header tcp_header_t
static u32 tcp_available_snd_space(const tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
u8 wscale
Window scale advertised by peer.
static void tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
#define TCP_OPTS_MAX_SACK_BLOCKS
#define TCP_MAX_RX_FIFO_SIZE
vlib_node_registration_t ip4_lookup_node
(constructor) VLIB_REGISTER_NODE (ip4_lookup_node)
struct _stream_session_t stream_session_t
static void tcp_timer_retransmit_handler_i(u32 index, u8 is_syn)
#define foreach_tcp4_reset_next
static u32 stream_session_fifo_size(transport_connection_t *tc)
static stream_session_t * stream_session_get(u64 si, u32 thread_index)
#define VLIB_BUFFER_NEXT_PRESENT
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
void tcp_push_ip_hdr(tcp_main_t *tm, tcp_connection_t *tc, vlib_buffer_t *b)
#define TCP_OPTION_LEN_SACK_BLOCK
static u32 stream_session_max_enqueue(transport_connection_t *tc)
u8 * format_tcp_tx_trace(u8 *s, va_list *args)
enum _tcp_state tcp_state_t
static void * ip4_next_header(ip4_header_t *i)
static u32 tcp_time_now(void)
sack_block_t * sacks
SACK blocks received.
#define TCP_ESTABLISH_TIME
#define TCP_EVT_DBG(_evt, _args...)
static void tcp_timer_set(tcp_connection_t *tc, u8 timer_id, u32 interval)
#define TCP_OPTION_LEN_WINDOW_SCALE
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b0, tcp_state_t state, u32 my_thread_index, u8 is_ip4)
#define TCP_RTO_SYN_RETRIES
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
u16 current_length
Nbytes between current data and the end of this buffer.
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
#define tcp_in_fastrecovery(tc)
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
#define tcp_opts_mss(_to)
uword os_get_cpu_number(void)
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
#define TCP_TIMER_HANDLE_INVALID
#define foreach_tcp6_output_next
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto)
Push IPv4 header to buffer.
void tcp_fast_retransmit(tcp_connection_t *tc)
void tcp_timer_retransmit_syn_handler(u32 index)
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
#define TCP_OPTION_LEN_TIMESTAMP
#define foreach_tcp4_output_next
vlib_error_t error
Error code for buffers to be enqueued to error handler.
void tcp_set_snd_mss(tcp_connection_t *tc)
void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
#define TCP_MAX_WND_SCALE
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
#define VNET_BUFFER_LOCALLY_ORIGINATED
enum _tcp_output_nect tcp_output_next_t
#define clib_warning(format, args...)
#define VLIB_BUFFER_IS_TRACED
#define clib_memcpy(a, b, c)
u16 mss
Option flags, see above.
static void * ip6_next_header(ip6_header_t *i)
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
vlib_node_registration_t ip6_lookup_node
(constructor) VLIB_REGISTER_NODE (ip6_lookup_node)
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
static uword tcp6_output(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
u32 tcp_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
#define seq_geq(_s1, _s2)
u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
vhost_vring_state_t state
void tcp_retransmit_first_unacked(tcp_connection_t *tc)
Retansmit first unacked segment.
u32 next_buffer
Next buffer for this linked-list of buffers.
u32 tcp_prepare_retransmit_segment(tcp_connection_t *tc, vlib_buffer_t *b, u32 max_bytes)
Build a retransmit segment.
static uword tcp6_send_reset(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, int is_ip4)
static u8 tcp_make_state_flags(tcp_state_t next_state)
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
static u8 tcp_window_compute_scale(u32 available_space)
VLIB_NODE_FUNCTION_MULTIARCH(tcp4_output_node, tcp4_output)
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
template key/value backing page structure
#define tcp_opts_wscale(_to)
u32 tsval
Peer's timestamp value.
u32 tsecr
Echoed/reflected time stamp.
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
#define foreach_tcp6_reset_next
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
void tcp_timer_delack_handler(u32 index)
#define TCP_OPTION_LEN_MSS
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
static u32 random_u32(u32 *seed)
32-bit random number generator
#define VLIB_REGISTER_NODE(x,...)
#define TCP_OPTION_LEN_NOOP
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
enum _tcp_reset_next tcp_reset_next_t
#define tcp_opts_sack_permitted(_to)
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
static tcp_main_t * vnet_get_tcp_main()
#define tcp_fastrecovery_off(tc)
static char * tcp_error_strings[]
static uword tcp4_output(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
void tcp_send_reset(vlib_buffer_t *pkt, u8 is_ip4)
Send reset without reusing existing buffer.
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)