This is revision 2 of this patch (compared the previous patch, it removes a noisy log message that was a side effect of the privsep we added to our version of BIND. Apply by doing: cd /usr/src patch -p0 < 013_bind.patch Then rebuild and install bind: cd usr.sbin/bind make -f Makefile.bsd-wrapper obj make -f Makefile.bsd-wrapper make -f Makefile.bsd-wrapper install Index: usr.sbin/bind/README.OpenBSD =================================================================== RCS file: /cvs/src/usr.sbin/bind/README.OpenBSD,v retrieving revision 1.8 retrieving revision 1.8.12.1 diff -u -p -r1.8 -r1.8.12.1 --- usr.sbin/bind/README.OpenBSD 28 Sep 2004 17:14:01 -0000 1.8 +++ usr.sbin/bind/README.OpenBSD 23 Jul 2008 17:59:55 -0000 1.8.12.1 @@ -1,11 +1,11 @@ -$OpenBSD: README.OpenBSD,v 1.8 2004/09/28 17:14:01 jakob Exp $ +$OpenBSD: README.OpenBSD,v 1.8.12.1 2008/07/23 17:59:55 brad Exp $ additional features - write pid-file before chroot - privilege separation for binding to privileged ports from within chroot -- add LCG (Linear Congruential Generator) implementation to libisc -- use LCG instead of LFSR for ID generation until LFSR is proven reliable +- add 64K entry shuffle (somewhat like Fisher-Yates) implementation to libisc +- use shuffle instead of LFSR for ID generation - strlcpy/strlcat/snprintf fixes default parameter changes Index: usr.sbin/bind/bin/named/server.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/bin/named/server.c,v retrieving revision 1.14 retrieving revision 1.14.4.1 diff -u -p -r1.14 -r1.14.4.1 --- usr.sbin/bind/bin/named/server.c 10 Jan 2007 19:07:58 -0000 1.14 +++ usr.sbin/bind/bin/named/server.c 23 Jul 2008 17:59:55 -0000 1.14.4.1 @@ -477,6 +477,14 @@ get_view_querysource_dispatch(const cfg_ attrs |= DNS_DISPATCHATTR_IPV6; break; } + + if (isc_sockaddr_getport(&sa) != 0) { + INSIST(obj != NULL); + cfg_obj_log(obj, ns_g_lctx, ISC_LOG_INFO, + "using specific query-source port suppresses port " + "randomization and can be insecure."); + } + attrmask = 0; attrmask |= DNS_DISPATCHATTR_UDP; attrmask |= DNS_DISPATCHATTR_TCP; @@ -486,7 +494,7 @@ get_view_querysource_dispatch(const cfg_ disp = NULL; result = dns_dispatch_getudp(ns_g_dispatchmgr, ns_g_socketmgr, ns_g_taskmgr, &sa, 4096, - 1000, 32768, 16411, 16433, + 1024, 32768, 16411, 16433, attrs, attrmask, &disp); if (result != ISC_R_SUCCESS) { isc_sockaddr_t any; @@ -1858,7 +1866,9 @@ scan_interfaces(ns_server_t *server, isc } static isc_result_t -add_listenelt(isc_mem_t *mctx, ns_listenlist_t *list, isc_sockaddr_t *addr) { +add_listenelt(isc_mem_t *mctx, ns_listenlist_t *list, isc_sockaddr_t *addr, + isc_boolean_t wcardport_ok) +{ ns_listenelt_t *lelt = NULL; dns_acl_t *src_acl = NULL; dns_aclelement_t aelt; @@ -1868,7 +1878,8 @@ add_listenelt(isc_mem_t *mctx, ns_listen REQUIRE(isc_sockaddr_pf(addr) == AF_INET6); isc_sockaddr_any6(&any_sa6); - if (!isc_sockaddr_equal(&any_sa6, addr)) { + if (!isc_sockaddr_equal(&any_sa6, addr) && + (wcardport_ok || isc_sockaddr_getport(addr) != 0)) { aelt.type = dns_aclelementtype_ipprefix; aelt.negative = ISC_FALSE; aelt.u.ip_prefix.prefixlen = 128; @@ -1927,7 +1938,16 @@ adjust_interfaces(ns_server_t *server, i result = dns_dispatch_getlocaladdress(dispatch6, &addr); if (result != ISC_R_SUCCESS) goto fail; - result = add_listenelt(mctx, list, &addr); + + /* + * We always add non-wildcard address regardless of whether + * the port is 'any' (the fourth arg is TRUE): if the port is + * specific, we need to add it since it may conflict with a + * listening interface; if it's zero, we'll dynamically open + * query ports, and some of them may override an existing + * wildcard IPv6 port. + */ + result = add_listenelt(mctx, list, &addr, ISC_TRUE); if (result != ISC_R_SUCCESS) goto fail; } @@ -1957,12 +1977,12 @@ adjust_interfaces(ns_server_t *server, i continue; addrp = dns_zone_getnotifysrc6(zone); - result = add_listenelt(mctx, list, addrp); + result = add_listenelt(mctx, list, addrp, ISC_FALSE); if (result != ISC_R_SUCCESS) goto fail; addrp = dns_zone_getxfrsource6(zone); - result = add_listenelt(mctx, list, addrp); + result = add_listenelt(mctx, list, addrp, ISC_FALSE); if (result != ISC_R_SUCCESS) goto fail; } Index: usr.sbin/bind/lib/dns/dispatch.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/dns/dispatch.c,v retrieving revision 1.6 retrieving revision 1.6.4.1 diff -u -p -r1.6 -r1.6.4.1 --- usr.sbin/bind/lib/dns/dispatch.c 10 Jan 2007 19:07:59 -0000 1.6 +++ usr.sbin/bind/lib/dns/dispatch.c 23 Jul 2008 17:59:55 -0000 1.6.4.1 @@ -22,10 +22,11 @@ #include #include -#include #include #include #include +#include +#include #include #include #include @@ -46,10 +47,18 @@ typedef struct dns_qid { unsigned int qid_nbuckets; /* hash table size */ unsigned int qid_increment; /* id increment on collision */ isc_mutex_t lock; - isc_lcg_t qid_lcg; /* state generator info */ dns_displist_t *qid_table; /* the table itself */ + isc_shuffle_t qid_shuffle; /*%< state generator info */ } dns_qid_t; +/* ARC4 Random generator state */ +typedef struct arc4ctx { + isc_uint8_t i; + isc_uint8_t j; + isc_uint8_t s[256]; + int count; +} arc4ctx_t; + struct dns_dispatchmgr { /* Unlocked. */ unsigned int magic; @@ -62,6 +71,10 @@ struct dns_dispatchmgr { unsigned int state; ISC_LIST(dns_dispatch_t) list; + /* Locked by arc4_lock. */ + isc_mutex_t arc4_lock; + arc4ctx_t arc4ctx; /*%< ARC4 context for QID */ + /* locked by buffer lock */ dns_qid_t *qid; isc_mutex_t buffer_lock; @@ -88,6 +101,7 @@ struct dns_dispentry { unsigned int magic; dns_dispatch_t *disp; dns_messageid_t id; + in_port_t port; unsigned int bucket; isc_sockaddr_t host; isc_task_t *task; @@ -107,6 +121,7 @@ struct dns_dispatch { isc_task_t *task; /* internal task */ isc_socket_t *socket; /* isc socket attached to */ isc_sockaddr_t local; /* local address */ + in_port_t localport; /* local UDP port */ unsigned int maxrequests; /* max requests */ isc_event_t *ctlevent; @@ -149,14 +164,14 @@ struct dns_dispatch { * Statics. */ static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *, - dns_messageid_t, unsigned int); + dns_messageid_t, in_port_t, unsigned int); static isc_boolean_t destroy_disp_ok(dns_dispatch_t *); static void destroy_disp(isc_task_t *task, isc_event_t *event); static void udp_recv(isc_task_t *, isc_event_t *); static void tcp_recv(isc_task_t *, isc_event_t *); static void startrecv(dns_dispatch_t *); -static dns_messageid_t dns_randomid(dns_qid_t *); -static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t); +static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t, + in_port_t); static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); static void *allocate_udp_buffer(dns_dispatch_t *disp); static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev); @@ -258,26 +273,152 @@ request_log(dns_dispatch_t *disp, dns_di } /* - * Return an unpredictable message ID. + * ARC4 random number generator obtained from OpenBSD */ -static dns_messageid_t -dns_randomid(dns_qid_t *qid) { - isc_uint16_t id; +static void +dispatch_arc4init(arc4ctx_t *actx) { + int n; + for (n = 0; n < 256; n++) + actx->s[n] = n; + actx->i = 0; + actx->j = 0; + actx->count = 0; +} + +static void +dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) { + int n; + isc_uint8_t si; + + actx->i--; + for (n = 0; n < 256; n++) { + actx->i = (actx->i + 1); + si = actx->s[actx->i]; + actx->j = (actx->j + si + dat[n % datlen]); + actx->s[actx->i] = actx->s[actx->j]; + actx->s[actx->j] = si; + } + actx->j = actx->i; +} + +static inline isc_uint8_t +dispatch_arc4get8(arc4ctx_t *actx) { + isc_uint8_t si, sj; + + actx->i = (actx->i + 1); + si = actx->s[actx->i]; + actx->j = (actx->j + si); + sj = actx->s[actx->j]; + actx->s[actx->i] = sj; + actx->s[actx->j] = si; + + return (actx->s[(si + sj) & 0xff]); +} + +static inline isc_uint16_t +dispatch_arc4get16(arc4ctx_t *actx) { + isc_uint16_t val; + + val = dispatch_arc4get8(actx) << 8; + val |= dispatch_arc4get8(actx); + + return (val); +} - id = isc_lcg_generate16(&qid->qid_lcg); +static void +dispatch_arc4stir(dns_dispatchmgr_t *mgr) { + int i; + union { + unsigned char rnd[128]; + isc_uint32_t rnd32[32]; + } rnd; + isc_result_t result; - return (dns_messageid_t)(id & 0xFFFF); + if (mgr->entropy != NULL) { + /* + * We accept any quality of random data to avoid blocking. + */ + result = isc_entropy_getdata(mgr->entropy, rnd.rnd, + sizeof(rnd), NULL, 0); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } else { + for (i = 0; i < 32; i++) + isc_random_get(&rnd.rnd32[i]); + } + dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd)); + + /* + * Discard early keystream, as per recommendations in: + * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps + */ + for (i = 0; i < 256; i++) + (void)dispatch_arc4get8(&mgr->arc4ctx); + + /* + * Derived from OpenBSD's implementation. The rationale is not clear, + * but should be conservative enough in safety, and reasonably large + * for efficiency. + */ + mgr->arc4ctx.count = 1600000; +} + +static isc_uint16_t +dispatch_arc4random(dns_dispatchmgr_t *mgr) { + isc_uint16_t result; + + LOCK(&mgr->arc4_lock); + mgr->arc4ctx.count -= sizeof(isc_uint16_t); + if (mgr->arc4ctx.count <= 0) + dispatch_arc4stir(mgr); + result = dispatch_arc4get16(&mgr->arc4ctx); + UNLOCK(&mgr->arc4_lock); + return (result); +} + +static isc_uint16_t +dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) { + isc_uint16_t min, r; + /* The caller must hold the manager lock. */ + + if (upper_bound < 2) + return (0); + + /* + * Ensure the range of random numbers [min, 0xffff] be a multiple of + * upper_bound and contain at least a half of the 16 bit range. + */ + + if (upper_bound > 0x8000) + min = 1 + ~upper_bound; /* 0x8000 - upper_bound */ + else + min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound); + + /* + * This could theoretically loop forever but each retry has + * p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need + * to re-roll. + */ + for (;;) { + r = dispatch_arc4random(mgr); + if (r >= min) + break; + } + + return (r % upper_bound); } /* * Return a hash of the destination and message id. */ static isc_uint32_t -dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id) { +dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, + in_port_t port) +{ unsigned int ret; ret = isc_sockaddr_hash(dest, ISC_TRUE); - ret ^= id; + ret ^= (id << 16) | port; ret %= qid->qid_nbuckets; INSIST(ret < qid->qid_nbuckets); @@ -394,7 +535,7 @@ destroy_disp(isc_task_t *task, isc_event */ static dns_dispentry_t * bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, - unsigned int bucket) + in_port_t port, unsigned int bucket) { dns_dispentry_t *res; @@ -403,8 +544,10 @@ bucket_search(dns_qid_t *qid, isc_sockad res = ISC_LIST_HEAD(qid->qid_table[bucket]); while (res != NULL) { - if ((res->id == id) && isc_sockaddr_equal(dest, &res->host)) + if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) && + res->port == port) { return (res); + } res = ISC_LIST_NEXT(res, link); } @@ -607,9 +750,9 @@ udp_recv(isc_task_t *task, isc_event_t * } /* response */ - bucket = dns_hash(qid, &ev->address, id); + bucket = dns_hash(qid, &ev->address, id, disp->localport); LOCK(&qid->lock); - resp = bucket_search(qid, &ev->address, id, bucket); + resp = bucket_search(qid, &ev->address, id, disp->localport, bucket); dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, (resp == NULL ? "not found" : "found")); @@ -843,9 +986,10 @@ tcp_recv(isc_task_t *task, isc_event_t * /* * Response. */ - bucket = dns_hash(qid, &tcpmsg->address, id); + bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); LOCK(&qid->lock); - resp = bucket_search(qid, &tcpmsg->address, id, bucket); + resp = bucket_search(qid, &tcpmsg->address, id, disp->localport, + bucket); dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, (resp == NULL ? "not found" : "found")); @@ -994,6 +1138,8 @@ destroy_mgr(dns_dispatchmgr_t **mgrp) { DESTROYLOCK(&mgr->lock); mgr->state = 0; + DESTROYLOCK(&mgr->arc4_lock); + isc_mempool_destroy(&mgr->epool); isc_mempool_destroy(&mgr->rpool); isc_mempool_destroy(&mgr->dpool); @@ -1072,10 +1218,14 @@ dns_dispatchmgr_create(isc_mem_t *mctx, if (result != ISC_R_SUCCESS) goto deallocate; - result = isc_mutex_init(&mgr->buffer_lock); + result = isc_mutex_init(&mgr->arc4_lock); if (result != ISC_R_SUCCESS) goto kill_lock; + result = isc_mutex_init(&mgr->buffer_lock); + if (result != ISC_R_SUCCESS) + goto kill_arc4_lock; + result = isc_mutex_init(&mgr->pool_lock); if (result != ISC_R_SUCCESS) goto kill_buffer_lock; @@ -1126,6 +1276,8 @@ dns_dispatchmgr_create(isc_mem_t *mctx, if (entropy != NULL) isc_entropy_attach(entropy, &mgr->entropy); + dispatch_arc4init(&mgr->arc4ctx); + *mgrp = mgr; return (ISC_R_SUCCESS); @@ -1137,6 +1289,8 @@ dns_dispatchmgr_create(isc_mem_t *mctx, DESTROYLOCK(&mgr->pool_lock); kill_buffer_lock: DESTROYLOCK(&mgr->buffer_lock); + kill_arc4_lock: + DESTROYLOCK(&mgr->arc4_lock); kill_lock: DESTROYLOCK(&mgr->lock); deallocate: @@ -1262,20 +1416,27 @@ dns_dispatchmgr_destroy(dns_dispatchmgr_ } static isc_boolean_t -blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock) { +blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_sockaddr_t *sockaddrp) +{ isc_sockaddr_t sockaddr; isc_result_t result; + REQUIRE(sock != NULL || sockaddrp != NULL); + if (mgr->portlist == NULL) return (ISC_FALSE); - result = isc_socket_getsockname(sock, &sockaddr); - if (result != ISC_R_SUCCESS) - return (ISC_FALSE); + if (sock != NULL) { + sockaddrp = &sockaddr; + result = isc_socket_getsockname(sock, sockaddrp); + if (result != ISC_R_SUCCESS) + return (ISC_FALSE); + } if (mgr->portlist != NULL && - dns_portlist_match(mgr->portlist, isc_sockaddr_pf(&sockaddr), - isc_sockaddr_getport(&sockaddr))) + dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp), + isc_sockaddr_getport(sockaddrp))) return (ISC_TRUE); return (ISC_FALSE); } @@ -1296,7 +1457,7 @@ local_addr_match(dns_dispatch_t *disp, i if (disp->mgr->portlist != NULL && isc_sockaddr_getport(addr) == 0 && isc_sockaddr_getport(&disp->local) == 0 && - blacklisted(disp->mgr, disp->socket)) + blacklisted(disp->mgr, disp->socket, NULL)) return (ISC_FALSE); /* @@ -1404,8 +1565,8 @@ qid_allocate(dns_dispatchmgr_t *mgr, uns qid->qid_nbuckets = buckets; qid->qid_increment = increment; qid->magic = QID_MAGIC; + isc_shuffle_init(&qid->qid_shuffle); - isc_lcg_init(&qid->qid_lcg); *qidp = qid; return (ISC_R_SUCCESS); } @@ -1457,6 +1618,7 @@ dispatch_allocate(dns_dispatchmgr_t *mgr disp->refcount = 1; disp->recv_pending = 0; memset(&disp->local, 0, sizeof(disp->local)); + disp->localport = 0; disp->shutting_down = 0; disp->shutdown_out = 0; disp->connected = 0; @@ -1629,7 +1791,7 @@ dns_dispatch_getudp(dns_dispatchmgr_t *m dns_dispatch_t **dispp) { isc_result_t result; - dns_dispatch_t *disp; + dns_dispatch_t *disp = NULL; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(sockmgr != NULL); @@ -1649,6 +1811,11 @@ dns_dispatch_getudp(dns_dispatchmgr_t *m LOCK(&mgr->lock); + if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) { + REQUIRE(isc_sockaddr_getport(localaddr) == 0); + goto createudp; + } + /* * First, see if we have a dispatcher that matches. */ @@ -1677,6 +1844,7 @@ dns_dispatch_getudp(dns_dispatchmgr_t *m return (ISC_R_SUCCESS); } + createudp: /* * Nope, create one. */ @@ -1712,7 +1880,9 @@ dispatch_createudp(dns_dispatchmgr_t *mg dns_dispatch_t *disp; isc_socket_t *sock = NULL; isc_socket_t *held[DNS_DISPATCH_HELD]; - unsigned int i = 0, j = 0; + unsigned int i = 0, j = 0, k = 0; + isc_sockaddr_t localaddr_bound; + in_port_t localport = 0; /* * dispatch_allocate() checks mgr for us. @@ -1728,11 +1898,34 @@ dispatch_createudp(dns_dispatchmgr_t *mg * from returning the same port to us too quickly. */ memset(held, 0, sizeof(held)); + localaddr_bound = *localaddr; getsocket: - result = create_socket(sockmgr, localaddr, &sock); + if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) { + in_port_t prt; + + /* XXX: should the range be configurable? */ + prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023); + isc_sockaddr_setport(&localaddr_bound, prt); + if (blacklisted(mgr, NULL, &localaddr_bound)) { + if (++k == 1024) + attributes &= ~DNS_DISPATCHATTR_RANDOMPORT; + goto getsocket; + } + result = create_socket(sockmgr, &localaddr_bound, &sock); + if (result == ISC_R_ADDRINUSE) { + if (++k == 1024) + attributes &= ~DNS_DISPATCHATTR_RANDOMPORT; + goto getsocket; + } + localport = prt; + } else + result = create_socket(sockmgr, localaddr, &sock); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; - if (isc_sockaddr_getport(localaddr) == 0 && blacklisted(mgr, sock)) { + if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 && + isc_sockaddr_getport(localaddr) == 0 && + blacklisted(mgr, sock, NULL)) + { if (held[i] != NULL) isc_socket_detach(&held[i]); held[i++] = sock; @@ -1753,6 +1946,7 @@ dispatch_createudp(dns_dispatchmgr_t *mg disp->socktype = isc_sockettype_udp; disp->socket = sock; disp->local = *localaddr; + disp->localport = localport; disp->task = NULL; result = isc_task_create(taskmgr, 0, &disp->task); @@ -1884,18 +2078,19 @@ dns_dispatch_addresponse(dns_dispatch_t * Try somewhat hard to find an unique ID. */ qid = DNS_QID(disp); + id = (dns_messageid_t)isc_shuffle_generate16(&qid->qid_shuffle); LOCK(&qid->lock); - id = dns_randomid(qid); - bucket = dns_hash(qid, dest, id); + bucket = dns_hash(qid, dest, id, disp->localport); ok = ISC_FALSE; for (i = 0; i < 64; i++) { - if (bucket_search(qid, dest, id, bucket) == NULL) { + if (bucket_search(qid, dest, id, disp->localport, bucket) == + NULL) { ok = ISC_TRUE; break; } id += qid->qid_increment; id &= 0x0000ffff; - bucket = dns_hash(qid, dest, id); + bucket = dns_hash(qid, dest, id, disp->localport); } if (!ok) { @@ -1917,6 +2112,7 @@ dns_dispatch_addresponse(dns_dispatch_t isc_task_attach(task, &res->task); res->disp = disp; res->id = id; + res->port = disp->localport; res->bucket = bucket; res->host = *dest; res->action = action; Index: usr.sbin/bind/lib/dns/resolver.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/dns/resolver.c,v retrieving revision 1.13 retrieving revision 1.13.4.1 diff -u -p -r1.13 -r1.13.4.1 --- usr.sbin/bind/lib/dns/resolver.c 25 Jan 2007 07:31:25 -0000 1.13 +++ usr.sbin/bind/lib/dns/resolver.c 23 Jul 2008 17:59:55 -0000 1.13.4.1 @@ -1054,17 +1054,50 @@ fctx_query(fetchctx_t *fctx, dns_adbaddr * A dispatch will be created once the connect succeeds. */ } else { + isc_sockaddr_t localaddr; + unsigned int attrs, attrmask; + dns_dispatch_t *disp_base; + + attrs = 0; + attrs |= DNS_DISPATCHATTR_UDP; + attrs |= DNS_DISPATCHATTR_RANDOMPORT; + + attrmask = 0; + attrmask |= DNS_DISPATCHATTR_UDP; + attrmask |= DNS_DISPATCHATTR_TCP; + attrmask |= DNS_DISPATCHATTR_IPV4; + attrmask |= DNS_DISPATCHATTR_IPV6; + switch (isc_sockaddr_pf(&addrinfo->sockaddr)) { - case PF_INET: - dns_dispatch_attach(res->dispatchv4, &query->dispatch); + case AF_INET: + disp_base = res->dispatchv4; + attrs |= DNS_DISPATCHATTR_IPV4; break; - case PF_INET6: - dns_dispatch_attach(res->dispatchv6, &query->dispatch); + case AF_INET6: + disp_base = res->dispatchv6; + attrs |= DNS_DISPATCHATTR_IPV6; break; default: result = ISC_R_NOTIMPLEMENTED; goto cleanup_query; } + + result = dns_dispatch_getlocaladdress(disp_base, &localaddr); + if (result != ISC_R_SUCCESS) + goto cleanup_query; + if (isc_sockaddr_getport(&localaddr) == 0) { + result = dns_dispatch_getudp(res->dispatchmgr, + res->socketmgr, + res->taskmgr, + &localaddr, + 4096, 1000, 32768, + 16411, 16433, + attrs, attrmask, + &query->dispatch); + if (result != ISC_R_SUCCESS) + goto cleanup_query; + } else + dns_dispatch_attach(disp_base, &query->dispatch); /* * We should always have a valid dispatcher here. If we * don't support a protocol family, then its dispatcher Index: usr.sbin/bind/lib/dns/rootns.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/dns/rootns.c,v retrieving revision 1.3 retrieving revision 1.3.12.1 diff -u -p -r1.3 -r1.3.12.1 --- usr.sbin/bind/lib/dns/rootns.c 28 Sep 2004 17:14:06 -0000 1.3 +++ usr.sbin/bind/lib/dns/rootns.c 19 Nov 2007 11:08:13 -0000 1.3.12.1 @@ -67,7 +67,7 @@ static char root_ns[] = "I.ROOT-SERVERS.NET. 3600000 IN A 192.36.148.17\n" "J.ROOT-SERVERS.NET. 3600000 IN A 192.58.128.30\n" "K.ROOT-SERVERS.NET. 3600000 IN A 193.0.14.129\n" -"L.ROOT-SERVERS.NET. 3600000 IN A 198.32.64.12\n" +"L.ROOT-SERVERS.NET. 3600000 IN A 199.7.83.42\n" "M.ROOT-SERVERS.NET. 3600000 IN A 202.12.27.33\n"; static isc_result_t Index: usr.sbin/bind/lib/dns/include/dns/dispatch.h =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/dns/include/dns/dispatch.h,v retrieving revision 1.4 retrieving revision 1.4.12.1 diff -u -p -r1.4 -r1.4.12.1 --- usr.sbin/bind/lib/dns/include/dns/dispatch.h 28 Sep 2004 17:14:06 -0000 1.4 +++ usr.sbin/bind/lib/dns/include/dns/dispatch.h 23 Jul 2008 17:59:55 -0000 1.4.12.1 @@ -112,6 +112,9 @@ struct dns_dispatchevent { * _MAKEQUERY * The dispatcher can be used to issue queries to other servers, and * accept replies from them. + * + * _RANDOMPORT + * Allocate UDP port randomly. */ #define DNS_DISPATCHATTR_PRIVATE 0x00000001U #define DNS_DISPATCHATTR_TCP 0x00000002U @@ -121,6 +124,7 @@ struct dns_dispatchevent { #define DNS_DISPATCHATTR_NOLISTEN 0x00000020U #define DNS_DISPATCHATTR_MAKEQUERY 0x00000040U #define DNS_DISPATCHATTR_CONNECTED 0x00000080U +#define DNS_DISPATCHATTR_RANDOMPORT 0x00000100U isc_result_t dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, Index: usr.sbin/bind/lib/isc/Makefile.in =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/Makefile.in,v retrieving revision 1.5 retrieving revision 1.5.12.1 diff -u -p -r1.5 -r1.5.12.1 --- usr.sbin/bind/lib/isc/Makefile.in 28 Sep 2004 17:14:07 -0000 1.5 +++ usr.sbin/bind/lib/isc/Makefile.in 23 Jul 2008 17:59:55 -0000 1.5.12.1 @@ -60,10 +60,12 @@ OBJS = @ISC_EXTRA_OBJS@ \ ratelimiter.@O@ region.@O@ result.@O@ rwlock.@O@ \ serial.@O@ sha1.@O@ sockaddr.@O@ string.@O@ strtoul.@O@ \ symtab.@O@ task.@O@ taskpool.@O@ timer.@O@ version.@O@ \ + shuffle.@O@ \ ${UNIXOBJS} ${NLSOBJS} ${THREADOBJS} # Alphabetically SRCS = @ISC_EXTRA_SRCS@ \ + shuffle.c \ assertions.c base64.c bitstring.c buffer.c \ bufferlist.c commandline.c error.c event.c \ heap.c hex.c hmacmd5.c \ Index: usr.sbin/bind/lib/isc/random.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/random.c,v retrieving revision 1.4 retrieving revision 1.4.12.1 diff -u -p -r1.4 -r1.4.12.1 --- usr.sbin/bind/lib/isc/random.c 28 Sep 2004 17:14:07 -0000 1.4 +++ usr.sbin/bind/lib/isc/random.c 23 Jul 2008 17:59:55 -0000 1.4.12.1 @@ -1,6 +1,7 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. + * Copyright (C) 2008 Damien Miller * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,6 +18,8 @@ /* $ISC: random.c,v 1.15.74.5 2004/03/08 09:04:49 marka Exp $ */ +/*! \file */ + #include #include @@ -89,14 +92,54 @@ isc_random_get(isc_uint32_t *val) } isc_uint32_t +isc_random_uniform(isc_uint32_t upper_bound) +{ + isc_uint32_t r, min; + + /* + * Uniformity is achieved by generating new random numbers until + * the one returned is outside the range [0, 2**32 % upper_bound). + * This guarantees the selected random number will be inside + * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) + * after reduction modulo upper_bound. + */ + + if (upper_bound < 2) + return 0; + +#if (ULONG_MAX > 0xffffffffUL) + min = 0x100000000UL % upper_bound; +#else + /* Calculate (2**32 % upper_bound) avoiding 64-bit math */ + if (upper_bound > 0x80000000) + min = 1 + ~upper_bound; /* 2**32 - upper_bound */ + else { + /* (2**32 - x) % x == 2**32 % x when x <= 2**31 */ + min = ((0xffffffff - upper_bound) + 1) % upper_bound; + } +#endif + + /* + * This could theoretically loop forever doing this, but each retry + * has p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need to + * re-roll. + */ + for (;;) { + isc_random_get(&r); + if (r >= min) + break; + } + + return r % upper_bound; +} + +isc_uint32_t isc_random_jitter(isc_uint32_t max, isc_uint32_t jitter) { REQUIRE(jitter < max); if (jitter == 0) return (max); else -#ifndef HAVE_ARC4RANDOM - return (max - rand() % jitter); -#else - return (max - arc4random() % jitter); -#endif + return max - isc_random_uniform(jitter); } + Index: usr.sbin/bind/lib/isc/shuffle.c =================================================================== RCS file: usr.sbin/bind/lib/isc/shuffle.c diff -N usr.sbin/bind/lib/isc/shuffle.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/bind/lib/isc/shuffle.c 23 Jul 2008 17:59:55 -0000 1.4.2.1 @@ -0,0 +1,67 @@ +/* + * Portions Copyright (C) 2008 Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM + * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL + * INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING + * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* $OpenBSD: shuffle.c,v 1.4.2.1 2008/07/23 17:59:55 brad Exp $ */ + +#include + +#include + +#include +#include +#include +#include + +#define VALID_SHUFFLE(x) (x != NULL) + +void +isc_shuffle_init(isc_shuffle_t *shuffle) +{ + int i, i2; + + REQUIRE(VALID_SHUFFLE(shuffle)); + + shuffle->isindex = 0; + /* Initialize using a Knuth shuffle */ + for (i = 0; i < 65536; ++i) { + i2 = isc_random_uniform(i + 1); + shuffle->id_shuffle[i] = shuffle->id_shuffle[i2]; + shuffle->id_shuffle[i2] = i; + } +} + +isc_uint16_t +isc_shuffle_generate16(isc_shuffle_t *shuffle) +{ + isc_uint32_t si; + isc_uint16_t r; + int i, i2; + + REQUIRE(VALID_SHUFFLE(shuffle)); + + do { + isc_random_get(&si); + i = shuffle->isindex & 0xFFFF; + i2 = (shuffle->isindex - (si & 0x7FFF)) & 0xFFFF; + r = shuffle->id_shuffle[i]; + shuffle->id_shuffle[i] = shuffle->id_shuffle[i2]; + shuffle->id_shuffle[i2] = r; + shuffle->isindex++; + } while (r == 0); + + return (r); +} Index: usr.sbin/bind/lib/isc/include/isc/random.h =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/include/isc/random.h,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.2.12.1 diff -u -p -r1.1.1.2 -r1.1.1.2.12.1 --- usr.sbin/bind/lib/isc/include/isc/random.h 28 Sep 2004 16:35:42 -0000 1.1.1.2 +++ usr.sbin/bind/lib/isc/include/isc/random.h 23 Jul 2008 17:59:55 -0000 1.1.1.2.12.1 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2001 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any @@ -23,9 +23,11 @@ #include #include -/* - * Implements a random state pool which will let the caller return a - * series of possibly non-reproducable random values. Note that the +/*! \file + * \brief Implements a random state pool which will let the caller return a + * series of possibly non-reproducable random values. + * + * Note that the * strength of these numbers is not all that high, and should not be * used in cryptography functions. It is useful for jittering values * a bit here and there, such as timeouts, etc. @@ -35,13 +37,13 @@ ISC_LANG_BEGINDECLS void isc_random_seed(isc_uint32_t seed); -/* +/*%< * Set the initial seed of the random state. */ void isc_random_get(isc_uint32_t *val); -/* +/*%< * Get a random value. * * Requires: @@ -50,9 +52,16 @@ isc_random_get(isc_uint32_t *val); isc_uint32_t isc_random_jitter(isc_uint32_t max, isc_uint32_t jitter); -/* +/*%< * Get a random value between (max - jitter) and (max). * This is useful for jittering timer values. + */ + +isc_uint32_t +isc_random_uniform(isc_uint32_t upper_bound); +/*%< + * Get a uniformly distributed random value < upper_bound. + * Avoid bias when upper_bound is not a power of two. */ ISC_LANG_ENDDECLS Index: usr.sbin/bind/lib/isc/include/isc/shuffle.h =================================================================== RCS file: usr.sbin/bind/lib/isc/include/isc/shuffle.h diff -N usr.sbin/bind/lib/isc/include/isc/shuffle.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/bind/lib/isc/include/isc/shuffle.h 23 Jul 2008 17:59:55 -0000 1.1.4.1 @@ -0,0 +1,59 @@ +/* + * Portions Copyright (C) 2002 Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM + * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL + * INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING + * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* $OpenBSD: shuffle.h,v 1.1.4.1 2008/07/23 17:59:55 brad Exp $ */ + +#ifndef ISC_SHUFFLE_H +#define ISC_SHUFFLE_H 1 + +#include +#include + +typedef struct isc_shuffle isc_shuffle_t; + +struct isc_shuffle { + isc_uint16_t id_shuffle[65536]; + int isindex; +}; + +ISC_LANG_BEGINDECLS + +void +isc_shuffle_init(isc_shuffle_t *shuffle); +/* + * Initialize a Shuffle generator + * + * Requires: + * + * shuffle != NULL + */ + +isc_uint16_t +isc_shuffle_generate16(isc_shuffle_t *shuffle); +/* + * Get a random number from a Shuffle generator + * + * Requires: + * + * shuffle be valid. + * + * data != NULL. + */ + +ISC_LANG_ENDDECLS + +#endif /* ISC_SHUFFLE_H */ Index: usr.sbin/bind/lib/isc/unix/app.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/unix/app.c,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.2.12.1 diff -u -p -r1.1.1.2 -r1.1.1.2.12.1 --- usr.sbin/bind/lib/isc/unix/app.c 28 Sep 2004 16:35:46 -0000 1.1.1.2 +++ usr.sbin/bind/lib/isc/unix/app.c 23 Jul 2008 17:59:55 -0000 1.1.1.2.12.1 @@ -301,7 +301,7 @@ evloop() { int n; isc_time_t when, now; struct timeval tv, *tvp; - fd_set readfds, writefds; + fd_set *readfds, *writefds; int maxfd; isc_boolean_t readytasks; isc_boolean_t call_timer_dispatch = ISC_FALSE; @@ -330,7 +330,7 @@ evloop() { } isc__socketmgr_getfdsets(&readfds, &writefds, &maxfd); - n = select(maxfd, &readfds, &writefds, NULL, tvp); + n = select(maxfd, readfds, writefds, NULL, tvp); if (n == 0 || call_timer_dispatch) { /* @@ -350,7 +350,7 @@ evloop() { isc__timermgr_dispatch(); } if (n > 0) - (void)isc__socketmgr_dispatch(&readfds, &writefds, + (void)isc__socketmgr_dispatch(readfds, writefds, maxfd); (void)isc__taskmgr_dispatch(); Index: usr.sbin/bind/lib/isc/unix/privsep.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/unix/privsep.c,v retrieving revision 1.6 retrieving revision 1.6.10.1 diff -u -p -r1.6 -r1.6.10.1 --- usr.sbin/bind/lib/isc/unix/privsep.c 4 May 2005 08:29:07 -0000 1.6 +++ usr.sbin/bind/lib/isc/unix/privsep.c 24 Jul 2008 05:28:41 -0000 1.6.10.1 @@ -189,7 +189,7 @@ check_bind(const struct sockaddr *sa, so if (port != NAMED_PORT_DEFAULT && port != RNDC_PORT_DEFAULT && port != LWRES_PORT_DEFAULT) { - if (port || child_pid) + if ((port && port < 1024) || child_pid) logmsg(LOG_ERR, "%s: disallowed port %u", pname, port); return (1); } Index: usr.sbin/bind/lib/isc/unix/socket.c =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/unix/socket.c,v retrieving revision 1.8 retrieving revision 1.8.4.1 diff -u -p -r1.8 -r1.8.4.1 --- usr.sbin/bind/lib/isc/unix/socket.c 10 Jan 2007 19:07:59 -0000 1.8 +++ usr.sbin/bind/lib/isc/unix/socket.c 23 Jul 2008 17:59:55 -0000 1.8.4.1 @@ -188,11 +188,12 @@ struct isc_socketmgr { isc_mutex_t lock; /* Locked by manager lock. */ ISC_LIST(isc_socket_t) socklist; - fd_set read_fds; - fd_set write_fds; - isc_socket_t *fds[FD_SETSIZE]; - int fdstate[FD_SETSIZE]; + fd_set *read_fds, *read_fds_copy; + fd_set *write_fds, *write_fds_copy; + isc_socket_t **fds; + int *fdstate; int maxfd; + int fdsize; #ifdef ISC_PLATFORM_USETHREADS isc_thread_t watcher; isc_condition_t shutdown_ok; @@ -237,6 +238,7 @@ static void build_msghdr_send(isc_socket struct msghdr *, struct iovec *, size_t *); static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *); +static void expand_fdsets(isc_socketmgr_t *, int, isc_mem_t *); #define SELECT_POKE_SHUTDOWN (-1) #define SELECT_POKE_NOTHING (-2) @@ -315,12 +317,12 @@ wakeup_socket(isc_socketmgr_t *manager, * or writes. */ - INSIST(fd >= 0 && fd < (int)FD_SETSIZE); + INSIST(fd >= 0 && fd < manager->fdsize); if (manager->fdstate[fd] == CLOSE_PENDING) { manager->fdstate[fd] = CLOSED; - FD_CLR(fd, &manager->read_fds); - FD_CLR(fd, &manager->write_fds); + FD_CLR(fd, manager->read_fds); + FD_CLR(fd, manager->write_fds); (void)close(fd); return; } @@ -333,9 +335,9 @@ wakeup_socket(isc_socketmgr_t *manager, * Set requested bit. */ if (msg == SELECT_POKE_READ) - FD_SET(sock->fd, &manager->read_fds); + FD_SET(sock->fd, manager->read_fds); if (msg == SELECT_POKE_WRITE) - FD_SET(sock->fd, &manager->write_fds); + FD_SET(sock->fd, manager->write_fds); } #ifdef ISC_PLATFORM_USETHREADS @@ -1196,7 +1198,7 @@ destroy(isc_socket_t **sockp) { INSIST(ISC_LIST_EMPTY(sock->recv_list)); INSIST(ISC_LIST_EMPTY(sock->send_list)); INSIST(sock->connect_ev == NULL); - REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE); + REQUIRE(sock->fd >= 0 && sock->fd < manager->fdsize); LOCK(&manager->lock); @@ -1371,6 +1373,78 @@ free_socket(isc_socket_t **socketp) { *socketp = NULL; } +static void +expand_fdsets(isc_socketmgr_t *manager, int maxfd, isc_mem_t *mctx) { + void *tmp; + int newsize = manager->fdsize; + + if (mctx == NULL) + mctx = manager->mctx; + + do { + newsize += FD_SETSIZE; + } while (newsize <= maxfd); + + tmp = isc_mem_get(mctx, sizeof(manager->fds[0]) * newsize); + memset(tmp, 0, sizeof(manager->fds[0]) * newsize); + if (manager->fdsize) { + memcpy(tmp, manager->fds, + sizeof(manager->fds[0]) * manager->fdsize); + isc_mem_put(mctx, manager->fds, + sizeof(manager->fds[0]) * manager->fdsize); + } + manager->fds = tmp; + + tmp = isc_mem_get(mctx, sizeof(manager->fdstate[0]) * newsize); + memset(tmp, 0, sizeof(manager->fdstate[0]) * newsize); + if (manager->fdsize) { + memcpy(tmp, manager->fdstate, + sizeof(manager->fdstate[0]) * manager->fdsize); + isc_mem_put(mctx, manager->fdstate, + sizeof(manager->fdstate[0]) * manager->fdsize); + } + manager->fdstate = tmp; + + tmp = isc_mem_get(mctx, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + memset(tmp, 0, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + if (manager->fdsize) { + memcpy(tmp, manager->read_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + isc_mem_put(mctx, manager->read_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + } + manager->read_fds = tmp; + + tmp = isc_mem_get(mctx, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + memset(tmp, 0, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + if (manager->fdsize) { + memcpy(tmp, manager->write_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + isc_mem_put(mctx, manager->write_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + } + manager->write_fds = tmp; + + /* Don't bother copying these, they are copied before use */ + tmp = isc_mem_get(mctx, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + memset(tmp, 0, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + if (manager->fdsize) { + isc_mem_put(mctx, manager->read_fds_copy, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + } + manager->read_fds_copy = tmp; + + tmp = isc_mem_get(mctx, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + memset(tmp, 0, howmany(newsize, NFDBITS) * sizeof(fd_mask)); + if (manager->fdsize) { + isc_mem_put(mctx, manager->write_fds_copy, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + } + manager->write_fds_copy = tmp; + + manager->fdsize = newsize; +} + /* * Create a new 'type' socket managed by 'manager'. Events * will be posted to 'task' and when dispatched 'action' will be @@ -1421,16 +1495,8 @@ isc_socket_create(isc_socketmgr_t *manag } #endif - if (sock->fd >= (int)FD_SETSIZE) { - (void)close(sock->fd); - isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", "socket"); - free_socket(&sock); - return (ISC_R_NORESOURCES); - } + if (sock->fd >= sock->manager->fdsize) + expand_fdsets(sock->manager, sock->fd, NULL); if (sock->fd < 0) { free_socket(&sock); @@ -1928,15 +1994,8 @@ internal_accept(isc_task_t *me, isc_even sock->pf); (void)close(fd); goto soft_error; - } else if (fd >= (int)FD_SETSIZE) { - isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", - "accept"); - (void)close(fd); - goto soft_error; + } else if (fd >= sock->manager->fdsize) { + expand_fdsets(sock->manager, fd, NULL); } } @@ -1980,10 +2039,13 @@ internal_accept(isc_task_t *me, isc_even */ dev->address = dev->newsocket->address; - manager->fds[fd] = dev->newsocket; - manager->fdstate[fd] = MANAGED; if (manager->maxfd < fd) manager->maxfd = fd; + if (manager->maxfd >= manager->fdsize) + expand_fdsets(manager, manager->maxfd, NULL); + + manager->fds[fd] = dev->newsocket; + manager->fdstate[fd] = MANAGED; socket_log(sock, &dev->newsocket->address, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, @@ -2140,7 +2202,7 @@ process_fds(isc_socketmgr_t *manager, in isc_socket_t *sock; isc_boolean_t unlock_sock; - REQUIRE(maxfd <= (int)FD_SETSIZE); + REQUIRE(maxfd <= manager->fdsize); /* * Process read/writes on other fds here. Avoid locking @@ -2154,8 +2216,8 @@ process_fds(isc_socketmgr_t *manager, in if (manager->fdstate[i] == CLOSE_PENDING) { manager->fdstate[i] = CLOSED; - FD_CLR(i, &manager->read_fds); - FD_CLR(i, &manager->write_fds); + FD_CLR(i, manager->read_fds); + FD_CLR(i, manager->write_fds); (void)close(i); @@ -2166,7 +2228,7 @@ process_fds(isc_socketmgr_t *manager, in unlock_sock = ISC_FALSE; if (FD_ISSET(i, readfds)) { if (sock == NULL) { - FD_CLR(i, &manager->read_fds); + FD_CLR(i, manager->read_fds); goto check_write; } unlock_sock = ISC_TRUE; @@ -2177,12 +2239,12 @@ process_fds(isc_socketmgr_t *manager, in else dispatch_recv(sock); } - FD_CLR(i, &manager->read_fds); + FD_CLR(i, manager->read_fds); } check_write: if (FD_ISSET(i, writefds)) { if (sock == NULL) { - FD_CLR(i, &manager->write_fds); + FD_CLR(i, manager->write_fds); continue; } if (!unlock_sock) { @@ -2195,7 +2257,7 @@ process_fds(isc_socketmgr_t *manager, in else dispatch_send(sock); } - FD_CLR(i, &manager->write_fds); + FD_CLR(i, manager->write_fds); } if (unlock_sock) UNLOCK(&sock->lock); @@ -2336,7 +2398,11 @@ isc_socketmgr_create(isc_mem_t *mctx, is manager->magic = SOCKET_MANAGER_MAGIC; manager->mctx = NULL; - memset(manager->fds, 0, sizeof(manager->fds)); + manager->read_fds = NULL; + manager->write_fds = NULL; + manager->fds = NULL; + manager->fdstate = NULL; + manager->fdsize = NULL; ISC_LIST_INIT(manager->socklist); if (isc_mutex_init(&manager->lock) != ISC_R_SUCCESS) { isc_mem_put(mctx, manager, sizeof(*manager)); @@ -2385,15 +2451,13 @@ isc_socketmgr_create(isc_mem_t *mctx, is /* * Set up initial state for the select loop */ - FD_ZERO(&manager->read_fds); - FD_ZERO(&manager->write_fds); #ifdef ISC_PLATFORM_USETHREADS - FD_SET(manager->pipe_fds[0], &manager->read_fds); + FD_SET(manager->pipe_fds[0], manager->read_fds); manager->maxfd = manager->pipe_fds[0]; #else /* ISC_PLATFORM_USETHREADS */ manager->maxfd = 0; #endif /* ISC_PLATFORM_USETHREADS */ - memset(manager->fdstate, 0, sizeof(manager->fdstate)); + expand_fdsets(manager, manager->maxfd, mctx); #ifdef ISC_PLATFORM_USETHREADS /* @@ -2499,9 +2563,23 @@ isc_socketmgr_destroy(isc_socketmgr_t ** (void)isc_condition_destroy(&manager->shutdown_ok); #endif /* ISC_PLATFORM_USETHREADS */ - for (i = 0; i < (int)FD_SETSIZE; i++) - if (manager->fdstate[i] == CLOSE_PENDING) - (void)close(i); + if (manager->fdsize) { + for (i = 0; i < manager->fdsize; i++) + if (manager->fdstate[i] == CLOSE_PENDING) + (void)close(i); + isc_mem_put(manager->mctx, manager->fds, + sizeof(manager->fds[0]) * manager->fdsize); + isc_mem_put(manager->mctx, manager->fdstate, + sizeof(manager->fdstate[0]) * manager->fdsize); + isc_mem_put(manager->mctx, manager->read_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + isc_mem_put(manager->mctx, manager->write_fds, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + isc_mem_put(manager->mctx, manager->read_fds_copy, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + isc_mem_put(manager->mctx, manager->write_fds_copy, + howmany(manager->fdsize, NFDBITS) * sizeof(fd_mask)); + } DESTROYLOCK(&manager->lock); manager->magic = 0; @@ -3536,12 +3614,22 @@ isc_socket_ipv6only(isc_socket_t *sock, #ifndef ISC_PLATFORM_USETHREADS void -isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) { +isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) { if (socketmgr == NULL) *maxfd = 0; else { - *readset = socketmgr->read_fds; - *writeset = socketmgr->write_fds; + + /* Prepare duplicates of fd_sets, as select() will modify */ + if (socketmgr->fdsize) { + memcpy(socketmgr->read_fds_copy, socketmgr->read_fds, + howmany(socketmgr->fdsize, NFDBITS) * + sizeof(fd_mask)); + memcpy(socketmgr->write_fds_copy, socketmgr->write_fds, + howmany(socketmgr->fdsize, NFDBITS) * + sizeof(fd_mask)); + } + *readset = socketmgr->read_fds_copy; + *writeset = socketmgr->write_fds_copy; *maxfd = socketmgr->maxfd + 1; } } Index: usr.sbin/bind/lib/isc/unix/socket_p.h =================================================================== RCS file: /cvs/src/usr.sbin/bind/lib/isc/unix/socket_p.h,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.2.12.1 diff -u -p -r1.1.1.2 -r1.1.1.2.12.1 --- usr.sbin/bind/lib/isc/unix/socket_p.h 28 Sep 2004 16:35:49 -0000 1.1.1.2 +++ usr.sbin/bind/lib/isc/unix/socket_p.h 23 Jul 2008 17:59:56 -0000 1.1.1.2.12.1 @@ -25,7 +25,7 @@ #endif void -isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd); +isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd); isc_result_t isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd);