/* $NetBSD: wqinput.c,v 1.5.4.1 2019/09/24 03:10:35 martin Exp $ */ /*- * Copyright (c) 2017 Internet Initiative Japan Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifdef _KERNEL_OPT #include "opt_net_mpsafe.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #define WQINPUT_LIST_MAXLEN IFQ_MAXLEN struct wqinput_work { struct mbuf *ww_mbuf; int ww_off; int ww_proto; struct wqinput_work *ww_next; }; struct wqinput_worklist { /* * XXX: TAILQ cannot be used because TAILQ_INIT memories the address * of percpu data while percpu(9) may move percpu data during bootup. */ struct wqinput_work *wwl_head; struct wqinput_work *wwl_tail; unsigned int wwl_len; uint64_t wwl_dropped; struct work wwl_work; bool wwl_wq_is_active; }; struct wqinput { struct workqueue *wqi_wq; struct pool wqi_work_pool; struct percpu *wqi_worklists; /* struct wqinput_worklist */ void (*wqi_input)(struct mbuf *, int, int); }; static void wqinput_work(struct work *, void *); static void wqinput_sysctl_setup(const char *, struct wqinput *); static void wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused) { struct wqinput_worklist **const wwlp = p; struct wqinput_worklist *const wwl = *wwlp; uint64_t *sum = arg; *sum += wwl->wwl_dropped; } static int wqinput_sysctl_drops_handler(SYSCTLFN_ARGS) { struct sysctlnode node; struct wqinput *wqi; uint64_t sum = 0; int error; node = *rnode; wqi = node.sysctl_data; percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum); node.sysctl_data = ∑ error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error != 0 || newp == NULL) return error; return 0; } static void wqinput_sysctl_setup(const char *name, struct wqinput *wqi) { const struct sysctlnode *cnode, *rnode; int error; error = sysctl_createv(NULL, 0, NULL, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput", SYSCTL_DESCR("workqueue-based pr_input controls"), NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); if (error != 0) goto bad; error = sysctl_createv(NULL, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, name, SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); if (error != 0) goto bad; error = sysctl_createv(NULL, 0, &rnode, &rnode, CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq", SYSCTL_DESCR("wqinput input queue controls"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); if (error != 0) goto bad; error = sysctl_createv(NULL, 0, &rnode, &cnode, CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops", SYSCTL_DESCR("Total packets dropped due to full input queue"), wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL); if (error != 0) goto bad; return; bad: log(LOG_ERR, "%s: could not create a sysctl node for %s\n", __func__, name); return; } static struct wqinput_worklist * wqinput_percpu_getref(percpu_t *pc) { return *(struct wqinput_worklist **)percpu_getref(pc); } static void wqinput_percpu_putref(percpu_t *pc) { percpu_putref(pc); } static void wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) { struct wqinput_worklist **wwlp = p; *wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP); } struct wqinput * wqinput_create(const char *name, void (*func)(struct mbuf *, int, int)) { struct wqinput *wqi; int error; char namebuf[32]; snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name); wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP); error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU); if (error != 0) panic("%s: workqueue_create failed (%d)\n", __func__, error); pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0, name, NULL, IPL_SOFTNET); wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist *)); percpu_foreach(wqi->wqi_worklists, wqinput_percpu_init_cpu, NULL); wqi->wqi_input = func; wqinput_sysctl_setup(name, wqi); return wqi; } static struct wqinput_work * wqinput_work_get(struct wqinput_worklist *wwl) { struct wqinput_work *work; /* Must be called at IPL_SOFTNET */ work = wwl->wwl_head; if (work != NULL) { KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len); wwl->wwl_len--; wwl->wwl_head = work->ww_next; work->ww_next = NULL; if (wwl->wwl_head == NULL) wwl->wwl_tail = NULL; } else { KASSERT(wwl->wwl_len == 0); } return work; } static void wqinput_work(struct work *wk, void *arg) { struct wqinput *wqi = arg; struct wqinput_work *work; struct wqinput_worklist *wwl; int s; /* Users expect to run at IPL_SOFTNET */ s = splsoftnet(); /* This also prevents LWP migrations between CPUs */ wwl = wqinput_percpu_getref(wqi->wqi_worklists); /* We can allow enqueuing another work at this point */ wwl->wwl_wq_is_active = false; while ((work = wqinput_work_get(wwl)) != NULL) { mutex_enter(softnet_lock); KERNEL_LOCK_UNLESS_NET_MPSAFE(); wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto); KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); mutex_exit(softnet_lock); pool_put(&wqi->wqi_work_pool, work); } wqinput_percpu_putref(wqi->wqi_worklists); splx(s); } static void wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work) { if (wwl->wwl_tail != NULL) { wwl->wwl_tail->ww_next = work; } else { wwl->wwl_head = work; } wwl->wwl_tail = work; wwl->wwl_len++; } void wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto) { struct wqinput_work *work; struct wqinput_worklist *wwl; wwl = wqinput_percpu_getref(wqi->wqi_worklists); /* Prevent too much work and mbuf from being queued */ if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) { wwl->wwl_dropped++; m_freem(m); goto out; } work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT); if (work == NULL) { wwl->wwl_dropped++; m_freem(m); goto out; } work->ww_mbuf = m; work->ww_off = off; work->ww_proto = proto; work->ww_next = NULL; wqinput_work_put(wwl, work); /* Avoid enqueuing another work when one is already enqueued */ if (wwl->wwl_wq_is_active) goto out; wwl->wwl_wq_is_active = true; workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL); out: wqinput_percpu_putref(wqi->wqi_worklists); }