FD.io VPP  v18.07-34-g55fbdb9
Vector Packet Processing
input.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * input.c: Unix file input
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39 
40 #include <vlib/vlib.h>
41 #include <vlib/unix/unix.h>
42 #include <signal.h>
43 #include <unistd.h>
45 
46 /* FIXME autoconf */
47 #define HAVE_LINUX_EPOLL
48 
49 #ifdef HAVE_LINUX_EPOLL
50 
51 #include <sys/epoll.h>
52 
53 typedef struct
54 {
55  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
56  int epoll_fd;
57  struct epoll_event *epoll_events;
59 
60  /* Statistics. */
64 
65 static linux_epoll_main_t *linux_epoll_mains = 0;
66 
67 static void
69 {
71  linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
73  struct epoll_event e = { 0 };
74  int op, add_del = 0;
75 
76  e.events = EPOLLIN;
78  e.events |= EPOLLOUT;
80  e.events |= EPOLLET;
81  e.data.u32 = f - fm->file_pool;
82 
83  op = -1;
84 
85  switch (update_type)
86  {
88  op = EPOLL_CTL_ADD;
89  add_del = 1;
90  break;
91 
93  op = EPOLL_CTL_MOD;
94  break;
95 
97  op = EPOLL_CTL_DEL;
98  add_del = -1;
99  break;
100 
101  default:
102  clib_warning ("unknown update_type %d", update_type);
103  return;
104  }
105 
106  /* worker threads open epoll fd only if needed */
107  if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
108  {
109  em->epoll_fd = epoll_create (1);
110  if (em->epoll_fd < 0)
111  {
112  clib_unix_warning ("epoll_create");
113  return;
114  }
115  em->n_epoll_fds = 0;
116  }
117 
118  if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
119  {
120  clib_unix_warning ("epoll_ctl");
121  return;
122  }
123 
124  em->n_epoll_fds += add_del;
125 
126  if (em->n_epoll_fds == 0)
127  {
128  close (em->epoll_fd);
129  em->epoll_fd = -1;
130  }
131 }
132 
135  vlib_frame_t * frame, u32 thread_index)
136 {
137  unix_main_t *um = &unix_main;
139  linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
140  struct epoll_event *e;
141  int n_fds_ready;
142  int is_main = (thread_index == 0);
143 
144  {
145  vlib_node_main_t *nm = &vm->node_main;
146  u32 ticks_until_expiration;
147  f64 timeout;
148  int timeout_ms = 0, max_timeout_ms = 10;
149  f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
150 
151  /*
152  * If we've been asked for a fixed-sleep between main loop polls,
153  * do so right away.
154  */
155  if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
156  {
157  struct timespec ts, tsrem;
158  timeout = 0;
159  timeout_ms = 0;
160  node->input_main_loops_per_call = 0;
161  ts.tv_sec = 0;
162  ts.tv_nsec = 1000 * um->poll_sleep_usec;
163 
164  while (nanosleep (&ts, &tsrem) < 0)
165  {
166  ts = tsrem;
167  }
168  }
169  /* If we're not working very hard, decide how long to sleep */
170  else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
171  && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
172  {
173  ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
174  ((TWT (tw_timer_wheel) *) nm->timing_wheel);
175 
176  /* Nothing on the fast wheel, sleep 10ms */
177  if (ticks_until_expiration == TW_SLOTS_PER_RING)
178  {
179  timeout = 10e-3;
180  timeout_ms = max_timeout_ms;
181  }
182  else
183  {
184  timeout = (f64) ticks_until_expiration *1e-5;
185  if (timeout < 1e-3)
186  timeout_ms = 0;
187  else
188  {
189  timeout_ms = timeout * 1e3;
190  /* Must be between 1 and 10 ms. */
191  timeout_ms = clib_max (1, timeout_ms);
192  timeout_ms = clib_min (max_timeout_ms, timeout_ms);
193  }
194  }
195  node->input_main_loops_per_call = 0;
196  }
197  else if (is_main == 0 && vector_rate < 2 &&
198  nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
199  {
200  timeout = 10e-3;
201  timeout_ms = max_timeout_ms;
202  node->input_main_loops_per_call = 0;
203  }
204  else /* busy */
205  {
206  /* Don't come back for a respectable number of dispatch cycles */
207  node->input_main_loops_per_call = 1024;
208  }
209 
210  /* Allow any signal to wakeup our sleep. */
211  if (is_main || em->epoll_fd != -1)
212  {
213  static sigset_t unblock_all_signals;
214  n_fds_ready = epoll_pwait (em->epoll_fd,
215  em->epoll_events,
216  vec_len (em->epoll_events),
217  timeout_ms, &unblock_all_signals);
218 
219  /* This kludge is necessary to run over absurdly old kernels */
220  if (n_fds_ready < 0 && errno == ENOSYS)
221  {
222  n_fds_ready = epoll_wait (em->epoll_fd,
223  em->epoll_events,
224  vec_len (em->epoll_events), timeout_ms);
225  }
226  }
227  else
228  {
229  if (timeout_ms)
230  usleep (timeout_ms * 1000);
231  return 0;
232  }
233  }
234 
235  if (n_fds_ready < 0)
236  {
237  if (unix_error_is_fatal (errno))
238  vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
239 
240  /* non fatal error (e.g. EINTR). */
241  return 0;
242  }
243 
244  em->epoll_waits += 1;
245  em->epoll_files_ready += n_fds_ready;
246 
247  for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
248  {
249  u32 i = e->data.u32;
250  clib_file_t *f = fm->file_pool + i;
251  clib_error_t *errors[4];
252  int n_errors = 0;
253 
254  if (PREDICT_FALSE (pool_is_free (fm->file_pool, f)))
255  {
256  /*
257  * Under rare scenerop, epoll may still post us events for the
258  * deleted file descriptor. We just deal with it and throw away the
259  * events for the corresponding file descriptor.
260  */
261  if (e->events & EPOLLIN)
262  {
263  errors[n_errors] =
264  clib_error_return (0, "epoll event EPOLLIN dropped due "
265  "to free index %u", i);
266  n_errors++;
267  }
268  if (e->events & EPOLLOUT)
269  {
270  errors[n_errors] =
271  clib_error_return (0, "epoll event EPOLLOUT dropped due "
272  "to free index %u", i);
273  n_errors++;
274  }
275  if (e->events & EPOLLERR)
276  {
277  errors[n_errors] =
278  clib_error_return (0, "epoll event EPOLLERR dropped due "
279  "to free index %u", i);
280  n_errors++;
281  }
282  }
283  else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
284  {
285  if (e->events & EPOLLIN)
286  {
287  errors[n_errors] = f->read_function (f);
288  f->read_events++;
289  n_errors += errors[n_errors] != 0;
290  }
291  if (e->events & EPOLLOUT)
292  {
293  errors[n_errors] = f->write_function (f);
294  f->write_events++;
295  n_errors += errors[n_errors] != 0;
296  }
297  }
298  else
299  {
300  if (f->error_function)
301  {
302  errors[n_errors] = f->error_function (f);
303  f->error_events++;
304  n_errors += errors[n_errors] != 0;
305  }
306  else
307  close (f->file_descriptor);
308  }
309 
310  ASSERT (n_errors < ARRAY_LEN (errors));
311  for (i = 0; i < n_errors; i++)
312  {
313  unix_save_error (um, errors[i]);
314  }
315  }
316 
317  return 0;
318 }
319 
320 static uword
322  vlib_node_runtime_t * node, vlib_frame_t * frame)
323 {
324  u32 thread_index = vlib_get_thread_index ();
325 
326  if (thread_index == 0)
327  return linux_epoll_input_inline (vm, node, frame, 0);
328  else
329  return linux_epoll_input_inline (vm, node, frame, thread_index);
330 }
331 
332 /* *INDENT-OFF* */
334  .function = linux_epoll_input,
335  .type = VLIB_NODE_TYPE_PRE_INPUT,
336  .name = "unix-epoll-input",
337 };
338 /* *INDENT-ON* */
339 
340 clib_error_t *
342 {
343  linux_epoll_main_t *em;
346 
347 
348  vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
350 
351  vec_foreach (em, linux_epoll_mains)
352  {
353  /* Allocate some events. */
355 
356  if (linux_epoll_mains == em)
357  {
358  em->epoll_fd = epoll_create (1);
359  if (em->epoll_fd < 0)
360  return clib_error_return_unix (0, "epoll_create");
361  }
362  else
363  em->epoll_fd = -1;
364  }
365 
367 
368  return 0;
369 }
370 
372 
373 #endif /* HAVE_LINUX_EPOLL */
374 
375 static clib_error_t *
377 {
379 }
380 
382 
383 /*
384  * fd.io coding-style-patch-verification: ON
385  *
386  * Local Variables:
387  * eval: (c-set-style "gnu")
388  * End:
389  */
#define UNIX_FILE_EVENT_EDGE_TRIGGERED
Definition: file.h:58
unix_main_t unix_main
Definition: main.c:62
#define CLIB_CACHE_LINE_ALIGN_MARK(mark)
Definition: cache.h:63
#define clib_min(x, y)
Definition: clib.h:289
static void vlib_panic_with_error(vlib_main_t *vm, clib_error_t *error)
Definition: main.h:270
static clib_error_t * unix_input_init(vlib_main_t *vm)
Definition: input.c:376
static vlib_node_registration_t linux_epoll_input_node
(constructor) VLIB_REGISTER_NODE (linux_epoll_input_node)
Definition: input.c:333
#define PREDICT_TRUE(x)
Definition: clib.h:106
unsigned long u64
Definition: types.h:89
u32 poll_sleep_usec
Definition: unix.h:106
u32 file_descriptor
Definition: file.h:54
int i
u32 polling_thread_index
Definition: file.h:61
#define pool_is_free(P, E)
Use free bitmap to query whether given element is free.
Definition: pool.h:263
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:448
static u32 vlib_last_vectors_per_main_loop(vlib_main_t *vm)
Definition: main.h:298
clib_file_function_t * read_function
Definition: file.h:67
double f64
Definition: types.h:142
u32 input_main_loops_per_call
For input nodes: decremented on each main loop interation until it reaches zero and function is calle...
Definition: node.h:475
clib_file_t * file_pool
Definition: file.h:88
#define static_always_inline
Definition: clib.h:93
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:156
clib_file_update_type_t
Definition: file.h:78
static uword linux_epoll_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: input.c:321
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
Definition: error.h:99
clib_file_main_t file_main
Definition: main.c:63
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
Definition: vec.h:240
unsigned int u32
Definition: types.h:88
#define vlib_call_init_function(vm, x)
Definition: init.h:227
#define VLIB_FRAME_SIZE
Definition: node.h:364
#define clib_error_return_unix(e, args...)
Definition: error.h:102
#define TW_SLOTS_PER_RING
#define TWT(a)
u32 flags
Definition: file.h:56
#define PREDICT_FALSE(x)
Definition: clib.h:105
void(* file_update)(clib_file_t *file, clib_file_update_type_t update_type)
Definition: file.h:90
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:153
static_always_inline uword vlib_get_thread_index(void)
Definition: threads.h:221
vlib_main_t * vm
Definition: buffer.c:294
static void linux_epoll_file_update(clib_file_t *f, clib_file_update_type_t update_type)
Definition: input.c:68
#define clib_warning(format, args...)
Definition: error.h:59
#define ARRAY_LEN(x)
Definition: clib.h:59
#define ASSERT(truth)
static word unix_error_is_fatal(word error)
Definition: error.h:118
clib_error_t * linux_epoll_input_init(vlib_main_t *vm)
Definition: input.c:341
static_always_inline uword linux_epoll_input_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, u32 thread_index)
Definition: input.c:134
#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE
Definition: file.h:57
#define clib_max(x, y)
Definition: clib.h:282
struct epoll_event * epoll_events
Definition: input.c:57
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u32 input_node_counts_by_state[VLIB_N_NODE_STATE]
Definition: node.h:729
u64 read_events
Definition: file.h:73
vlib_node_main_t node_main
Definition: main.h:132
u64 uword
Definition: types.h:112
#define clib_unix_warning(format, args...)
Definition: error.h:68
u64 epoll_files_ready
Definition: input.c:61
clib_file_function_t * error_function
Definition: file.h:67
u64 write_events
Definition: file.h:74
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
static void unix_save_error(unix_main_t *um, clib_error_t *error)
Definition: unix.h:115
u64 error_events
Definition: file.h:75
#define vec_foreach(var, vec)
Vector iterator.
Definition: file.h:51
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:62
#define TW(a)
clib_file_function_t * write_function
Definition: file.h:67
void * timing_wheel
Definition: node.h:705