Index: external/cddl/osnet/dist/uts/common/fs/zfs/arc.c =================================================================== RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c,v retrieving revision 1.22 diff -p -u -r1.22 arc.c --- external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 3 Aug 2022 01:53:06 -0000 1.22 +++ external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 25 Apr 2026 07:35:55 -0000 @@ -275,7 +275,7 @@ int arc_procfd; #endif #endif /* illumos */ -#ifdef __NetBSD__ +#if defined(__NetBSD__) && defined(_KERNEL) #include #ifndef btop #define btop(x) ((x) / PAGE_SIZE) @@ -288,8 +288,9 @@ int arc_procfd; #define freemem uvm_availmem(false) #define minfree uvmexp.freemin #define desfree uvmexp.freetarg -#define zfs_arc_free_target desfree +//#define zfs_arc_free_target desfree #define lotsfree (desfree * 2) +#define maxfree uvmexp.npages #define availrmem desfree #define swapfs_minfree 0 #define swapfs_reserve 0 @@ -297,13 +298,21 @@ int arc_procfd; #define curproc curlwp #define proc_pageout uvm.pagedaemon_lwp +u_int zfs_arc_free_target; +static void +arc_free_target_init(void) +{ + + zfs_arc_free_target = desfree; +} + static void *zio_arena; #include /* Structures used for memory and kva space reclaim. */ static struct callback_entry arc_kva_reclaim_entry; -#endif /* __NetBSD__ */ +#endif /* __NetBSD__ && _KERNEL */ static kmutex_t arc_reclaim_lock; static kcondvar_t arc_reclaim_thread_cv; @@ -468,6 +477,68 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta "ARC metadata limit"); #endif +#if defined(__NetBSD__) && defined(_KERNEL) + +static int sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_max(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_min(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_free_target(SYSCTLFN_PROTO); + +SYSCTL_SETUP(sysctl_vfs_zfs_arc_setup, "sysctl vfs.zfs_arc subtree setup") +{ + const struct sysctlnode *rnode = NULL; + + /* vfs.zfs is created in zfs_ioctl.c */ + sysctl_createv(clog, 0, NULL, &rnode, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "zfs_arc", + SYSCTL_DESCR("zfs"), + NULL, 0, NULL, 0, + CTL_VFS, CTL_CREATE, CTL_EOL); + + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "meta_limit", SYSCTL_DESCR("ARC metadata limit"), + sysctl_vfs_zfs_arc_meta_limit, 0, + &zfs_arc_meta_limit, sizeof(zfs_arc_meta_limit), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "meta_min", SYSCTL_DESCR("ARC metadata minimum"), + NULL, 0, &zfs_arc_meta_min, sizeof(zfs_arc_meta_min), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "shrink_shift", SYSCTL_DESCR("ARC shrink shift"), + NULL, 0, &zfs_arc_shrink_shift, sizeof(zfs_arc_shrink_shift), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "max", SYSCTL_DESCR("Maximum ARC size"), + sysctl_vfs_zfs_arc_max, 0, + &zfs_arc_max, sizeof(zfs_arc_max), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "min", SYSCTL_DESCR("Maximum ARC size"), + sysctl_vfs_zfs_arc_min, 0, + &zfs_arc_min, sizeof(zfs_arc_min), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "compressed", SYSCTL_DESCR("ARC compression"), + NULL, 0, &zfs_compressed_arc_enabled, sizeof(zfs_compressed_arc_enabled), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "free_target", SYSCTL_DESCR("Desired number of free pages below which ARC triggers reclaim"), + sysctl_vfs_zfs_arc_free_target, 0, + &zfs_arc_free_target, sizeof(zfs_arc_free_target), + CTL_CREATE, CTL_EOL); +} + +#endif + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -1169,6 +1240,151 @@ sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_AR } #endif +#if defined(__NetBSD__) && defined(_KERNEL) +static int +sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + + if (val <= 0 || val > arc_c_max) + return (EINVAL); + + arc_meta_limit = val; + + *(uint64_t *)rnode->sysctl_data = val; + return (0); +} + +static int +sysctl_vfs_zfs_arc_max(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + +#if 0 + if (zfs_arc_max == 0) { + /* Loader tunable so blindly set */ + zfs_arc_max = val; + return (0); + } +#endif + + if (val < arc_abs_min || val > kmem_size()) + return (EINVAL); + if (val < arc_c_min) + return (EINVAL); + if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) + return (EINVAL); + + arc_c_max = val; + + arc_c = arc_c_max; + arc_p = (arc_c >> 1); + + if (zfs_arc_meta_limit == 0) { + /* limit meta-data to 1/4 of the arc capacity */ + arc_meta_limit = arc_c_max / 4; + } + +#if 0 + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) + arc_c = arc_c / 2; +#endif + + *(uint64_t *)rnode->sysctl_data = arc_c; + return (0); + + return (0); +} + +static int +sysctl_vfs_zfs_arc_min(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + +#if 0 + if (zfs_arc_min == 0) { + /* Loader tunable so blindly set */ + zfs_arc_min = val; + return (0); + } +#endif + + if (val < arc_abs_min || val > arc_c_max) + return (EINVAL); + + arc_c_min = val; + + if (zfs_arc_meta_min == 0) + arc_meta_min = arc_c_min / 2; + + if (arc_c < arc_c_min) + arc_c = arc_c_min; + + *(uint64_t *)rnode->sysctl_data = arc_c_min; + return (0); + +} + +static int +sysctl_vfs_zfs_arc_free_target(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + u_int val; + int error; + + val = *(u_int *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + + if (val < minfree) + return (EINVAL); + if (val > maxfree) + return (EINVAL); + + *(u_int *)rnode->sysctl_data = val; + return (0); +} +#endif + #define GHOST_STATE(state) \ ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ (state) == arc_l2c_only) @@ -3903,6 +4119,34 @@ arc_available_memory(void) free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL +#ifdef __NetBSD__ + vmem_size_t totalpercent; + vmem_size_t free; + + /* + * PR kern/57558: + * + * do not let pdaemon get stuck in the uvm_km_va_starved_p() + * state. it starts a tight loop when in uvm_km_va_starved state + * and ZFS is not freeing any pool pages as it started freeing + * only when falling below uvmexp.freetarg. + * now we start freeing when falling below 10% kva free or + * uvmexp.freetarg. + * the 10% magic is shamelessly copied from uvm_km_va_starved_p() + * The interface to the pagedaemon has room for improvement. + */ + + totalpercent = vmem_size(heap_arena, VMEM_ALLOC|VMEM_FREE) / 10; + free = vmem_size(heap_arena, VMEM_FREE); + + if (free < totalpercent) { + needfree = btop(totalpercent - free); + } + if (free < uvmexp.freetarg && needfree < uvmexp.freetarg) { + needfree = uvmexp.freetarg; + } +#endif + if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { @@ -5925,8 +6169,8 @@ arc_state_multilist_index_func(multilist multilist_get_num_sublists(ml)); } -#ifdef _KERNEL #ifdef __FreeBSD__ +#ifdef _KERNEL static eventhandler_tag arc_event_lowmem = NULL; #endif @@ -6075,6 +6319,9 @@ arc_init(void) mutex_init(&arc_dnlc_evicts_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_dnlc_evicts_cv, NULL, CV_DEFAULT, NULL); #endif +#if defined(__NetBSD__) && defined(_KERNEL) + arc_free_target_init(); +#endif /* Convert seconds to clock ticks */ arc_min_prefetch_lifespan = 1 * hz; @@ -6094,11 +6341,17 @@ arc_init(void) #endif /* illumos */ /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */ arc_c_min = MAX(arc_c / 4, arc_abs_min); +#if 0 /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ if (arc_c * 8 >= 1 << 30) arc_c_max = (arc_c * 8) - (1 << 30); else arc_c_max = arc_c_min; +#else + /* set max to 1/2 of all memory, but at least the minimum */ + arc_c_max = MAX(arc_c_min, arc_c * 4); +#endif + /* increase max to at least 5 times the default size */ arc_c_max = MAX(arc_c * 5, arc_c_max); /*