/* $NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "opt_cpuoptions.h" #include "opt_multiprocessor.h" #include __KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $"); #include #include #include #include #include #include #include u_int cputype; /* compat arm */ u_int arm_dcache_align; /* compat arm */ u_int arm_dcache_align_mask; /* compat arm */ u_int arm_dcache_maxline; u_int aarch64_cache_vindexsize; u_int aarch64_cache_prefer_mask; int aarch64_hafdbs_enabled __read_mostly; int aarch64_pan_enabled __read_mostly; int aarch64_pac_enabled __read_mostly; static void __noasan extract_cacheunit(int level, bool insn, int cachetype, struct aarch64_cache_info *cacheinfo) { struct aarch64_cache_unit *cunit; uint64_t ccsidr, mmfr2; /* select and extract level N data cache */ reg_csselr_el1_write(__SHIFTIN(level, CSSELR_LEVEL) | __SHIFTIN(insn ? 1 : 0, CSSELR_IND)); isb(); ccsidr = reg_ccsidr_el1_read(); mmfr2 = reg_id_aa64mmfr2_el1_read(); if (insn) cunit = &cacheinfo[level].icache; else cunit = &cacheinfo[level].dcache; cunit->cache_type = cachetype; switch (__SHIFTOUT(mmfr2, ID_AA64MMFR2_EL1_CCIDX)) { case ID_AA64MMFR2_EL1_CCIDX_32BIT: cunit->cache_line_size = 1 << (__SHIFTOUT(ccsidr, CCSIDR_LINESIZE) + 4); cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR_ASSOC) + 1; cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR_NUMSET) + 1; break; case ID_AA64MMFR2_EL1_CCIDX_64BIT: cunit->cache_line_size = 1 << (__SHIFTOUT(ccsidr, CCSIDR64_LINESIZE) + 4); cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR64_ASSOC) + 1; cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR64_NUMSET) + 1; break; } /* calc waysize and whole size */ cunit->cache_way_size = cunit->cache_line_size * cunit->cache_sets; cunit->cache_size = cunit->cache_way_size * cunit->cache_ways; } /* Must be called on each processor */ void __noasan aarch64_getcacheinfo(struct cpu_info *ci) { struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; uint32_t clidr, ctr; int level, cachetype; /* * CTR - Cache Type Register */ ctr = reg_ctr_el0_read(); switch (__SHIFTOUT(ctr, CTR_EL0_L1IP_MASK)) { case CTR_EL0_L1IP_VPIPT: cachetype = CACHE_TYPE_VPIPT; break; case CTR_EL0_L1IP_AIVIVT: cachetype = CACHE_TYPE_VIVT; break; case CTR_EL0_L1IP_VIPT: cachetype = CACHE_TYPE_VIPT; break; case CTR_EL0_L1IP_PIPT: cachetype = CACHE_TYPE_PIPT; break; } /* * CLIDR - Cache Level ID Register * CSSELR - Cache Size Selection Register * CCSIDR - CurrentCache Size ID Register (selected by CSSELR) */ /* L1, L2, L3, ..., L8 cache */ for (level = 0, clidr = reg_clidr_el1_read(); level < MAX_CACHE_LEVEL; level++, clidr >>= 3) { int cacheable; switch (clidr & 7) { case CLIDR_TYPE_NOCACHE: cacheable = CACHE_CACHEABLE_NONE; break; case CLIDR_TYPE_ICACHE: cacheable = CACHE_CACHEABLE_ICACHE; extract_cacheunit(level, true, cachetype, cinfo); break; case CLIDR_TYPE_DCACHE: cacheable = CACHE_CACHEABLE_DCACHE; extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; case CLIDR_TYPE_IDCACHE: cacheable = CACHE_CACHEABLE_IDCACHE; extract_cacheunit(level, true, cachetype, cinfo); extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; case CLIDR_TYPE_UNIFIEDCACHE: cacheable = CACHE_CACHEABLE_UNIFIED; extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; default: cacheable = CACHE_CACHEABLE_NONE; break; } cinfo[level].cacheable = cacheable; if (cacheable == CACHE_CACHEABLE_NONE) { /* no more level */ break; } /* * L1 insn cachetype is CTR_EL0:L1IP, * all other cachetype is PIPT. */ cachetype = CACHE_TYPE_PIPT; } } void aarch64_parsecacheinfo(struct cpu_info *ci) { struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; struct aarch64_sysctl_cpu_id *id = &ci->ci_id; const uint32_t ctr = id->ac_ctr; u_int vindexsize; /* remember maximum alignment */ if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) { arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE); arm_dcache_align = sizeof(int) << arm_dcache_maxline; arm_dcache_align_mask = arm_dcache_align - 1; } #ifdef MULTIPROCESSOR if (coherency_unit < arm_dcache_align) panic("coherency_unit %ld < %d; increase COHERENCY_UNIT", coherency_unit, arm_dcache_align); #endif /* calculate L1 icache virtual index size */ if ((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT || cinfo[0].icache.cache_type == CACHE_TYPE_VIPT) && (cinfo[0].cacheable == CACHE_CACHEABLE_ICACHE || cinfo[0].cacheable == CACHE_CACHEABLE_IDCACHE)) { vindexsize = cinfo[0].icache.cache_size / cinfo[0].icache.cache_ways; KASSERT(vindexsize != 0); } else { vindexsize = 0; } if (vindexsize > aarch64_cache_vindexsize) { aarch64_cache_vindexsize = vindexsize; aarch64_cache_prefer_mask = vindexsize - 1; if (uvm.page_init_done) uvm_page_recolor(vindexsize / PAGE_SIZE); } } static int prt_cache(device_t self, struct aarch64_cache_info *cinfo, int level) { struct aarch64_cache_unit *cunit; int i; const char *cacheable, *cachetype; if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) return -1; for (i = 0; i < 2; i++) { switch (cinfo[level].cacheable) { case CACHE_CACHEABLE_ICACHE: cunit = &cinfo[level].icache; cacheable = "Instruction"; break; case CACHE_CACHEABLE_DCACHE: cunit = &cinfo[level].dcache; cacheable = "Data"; break; case CACHE_CACHEABLE_IDCACHE: if (i == 0) { cunit = &cinfo[level].icache; cacheable = "Instruction"; } else { cunit = &cinfo[level].dcache; cacheable = "Data"; } break; case CACHE_CACHEABLE_UNIFIED: cunit = &cinfo[level].dcache; cacheable = "Unified"; break; default: cunit = &cinfo[level].dcache; cacheable = "*UNK*"; break; } switch (cunit->cache_type) { case CACHE_TYPE_VPIPT: cachetype = "VPIPT"; break; case CACHE_TYPE_VIVT: cachetype = "VIVT"; break; case CACHE_TYPE_VIPT: cachetype = "VIPT"; break; case CACHE_TYPE_PIPT: cachetype = "PIPT"; break; default: cachetype = "*UNK*"; break; } aprint_verbose_dev(self, "L%d %uKB/%uB %u-way (%u set) %s %s cache\n", level + 1, cunit->cache_size / 1024, cunit->cache_line_size, cunit->cache_ways, cunit->cache_sets, cachetype, cacheable); if (cinfo[level].cacheable != CACHE_CACHEABLE_IDCACHE) break; } return 0; } void aarch64_printcacheinfo(device_t dev, struct cpu_info *ci) { struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; int level; for (level = 0; level < MAX_CACHE_LEVEL; level++) if (prt_cache(dev, cinfo, level) < 0) break; } static inline void ln_dcache_wb_all(int level, struct aarch64_cache_unit *cunit) { uint64_t x; unsigned int set, way, setshift, wayshift; setshift = ffs(cunit->cache_line_size) - 1; wayshift = 32 - (ffs(cunit->cache_ways) - 1); for (way = 0; way < cunit->cache_ways; way++) { for (set = 0; set < cunit->cache_sets; set++) { x = (way << wayshift) | (set << setshift) | (level << 1); __asm __volatile ("dc csw, %0; dsb sy" :: "r"(x)); } } } static inline void ln_dcache_wbinv_all(int level, struct aarch64_cache_unit *cunit) { uint64_t x; unsigned int set, way, setshift, wayshift; setshift = ffs(cunit->cache_line_size) - 1; wayshift = 32 - (ffs(cunit->cache_ways) - 1); for (way = 0; way < cunit->cache_ways; way++) { for (set = 0; set < cunit->cache_sets; set++) { x = (way << wayshift) | (set << setshift) | (level << 1); __asm __volatile ("dc cisw, %0; dsb sy" :: "r"(x)); } } } static inline void ln_dcache_inv_all(int level, struct aarch64_cache_unit *cunit) { uint64_t x; unsigned int set, way, setshift, wayshift; setshift = ffs(cunit->cache_line_size) - 1; wayshift = 32 - (ffs(cunit->cache_ways) - 1); for (way = 0; way < cunit->cache_ways; way++) { for (set = 0; set < cunit->cache_sets; set++) { x = (way << wayshift) | (set << setshift) | (level << 1); __asm __volatile ("dc isw, %0; dsb sy" :: "r"(x)); } } } void aarch64_dcache_wbinv_all(void) { KASSERT(kpreempt_disabled()); struct cpu_info * const ci = curcpu(); struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; int level; for (level = 0; level < MAX_CACHE_LEVEL; level++) { if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; dsb(ish); ln_dcache_wbinv_all(level, &cinfo[level].dcache); } dsb(ish); } void aarch64_dcache_inv_all(void) { KASSERT(kpreempt_disabled()); struct cpu_info * const ci = curcpu(); struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; int level; for (level = 0; level < MAX_CACHE_LEVEL; level++) { if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; dsb(ish); ln_dcache_inv_all(level, &cinfo[level].dcache); } dsb(ish); } void aarch64_dcache_wb_all(void) { KASSERT(kpreempt_disabled()); struct cpu_info * const ci = curcpu(); struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo; int level; for (level = 0; level < MAX_CACHE_LEVEL; level++) { if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; dsb(ish); ln_dcache_wb_all(level, &cinfo[level].dcache); } dsb(ish); } int set_cpufuncs(void) { // This is only called from the BP return aarch64_setcpufuncs(&cpu_info_store[0]); } int aarch64_setcpufuncs(struct cpu_info *ci) { const uint64_t ctr = reg_ctr_el0_read(); const uint64_t clidr = reg_clidr_el1_read(); /* install default functions */ ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0; ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_sync_range; /* * install core/cluster specific functions */ /* Icache sync op */ if (__SHIFTOUT(ctr, CTR_EL0_DIC) == 1) { /* Icache invalidation to the PoU is not required */ ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_barrier_range; } else if (__SHIFTOUT(ctr, CTR_EL0_IDC) == 1 || __SHIFTOUT(clidr, CLIDR_LOC) == 0 || (__SHIFTOUT(clidr, CLIDR_LOUIS) == 0 && __SHIFTOUT(clidr, CLIDR_LOUU) == 0)) { /* Dcache clean to the PoU is not required for Icache */ ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_inv_range; } #ifdef CPU_THUNDERX const uint32_t midr = reg_midr_el1_read(); /* Cavium erratum 27456 */ if ((midr == CPU_ID_THUNDERXP1d0) || (midr == CPU_ID_THUNDERXP1d1) || (midr == CPU_ID_THUNDERXP2d1) || (midr == CPU_ID_THUNDERX81XXRX)) { ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0_thunderx; } #endif return 0; } void aarch64_hafdbs_init(int primary) { #ifdef ARMV81_HAFDBS uint64_t tcr; int hafdbs; hafdbs = __SHIFTOUT(reg_id_aa64mmfr1_el1_read(), ID_AA64MMFR1_EL1_HAFDBS); /* * hafdbs * 0:HAFDBS_NONE - no support for any hardware flags * 1:HAFDBS_A - only hardware access flag supported * 2:HAFDBS_AD - hardware access and modified flags supported. */ if (primary) { /* CPU0 does the detection. */ switch (hafdbs) { case ID_AA64MMFR1_EL1_HAFDBS_NONE: default: aarch64_hafdbs_enabled = 0; break; case ID_AA64MMFR1_EL1_HAFDBS_A: case ID_AA64MMFR1_EL1_HAFDBS_AD: aarch64_hafdbs_enabled = hafdbs; break; } } else { /* * The support status of HAFDBS on the primary CPU is different * from that of the application processor. * * XXX: * The correct way to do this is to disable it on all cores, * or call pmap_fault_fixup() only on the unsupported cores, * but for now, do panic(). */ if (aarch64_hafdbs_enabled != hafdbs) panic("HAFDBS is supported (%d) on primary cpu, " "but isn't equal (%d) on secondary cpu", aarch64_hafdbs_enabled, hafdbs); } /* enable Hardware updates to Access flag and Dirty state */ tcr = reg_tcr_el1_read(); switch (hafdbs) { case ID_AA64MMFR1_EL1_HAFDBS_NONE: default: break; case ID_AA64MMFR1_EL1_HAFDBS_A: /* enable only access */ reg_tcr_el1_write(tcr | TCR_HA); isb(); break; case ID_AA64MMFR1_EL1_HAFDBS_AD: /* enable both access and dirty */ reg_tcr_el1_write(tcr | TCR_HD | TCR_HA); isb(); break; } #endif } void aarch64_pan_init(int primary) { #ifdef ARMV81_PAN uint64_t reg, sctlr; /* CPU0 does the detection. */ if (primary) { reg = reg_id_aa64mmfr1_el1_read(); if (__SHIFTOUT(reg, ID_AA64MMFR1_EL1_PAN) != ID_AA64MMFR1_EL1_PAN_NONE) aarch64_pan_enabled = 1; } if (!aarch64_pan_enabled) return; /* * On an exception to EL1, have the CPU set the PAN bit automatically. * This ensures PAN is enabled each time the kernel is entered. */ sctlr = reg_sctlr_el1_read(); sctlr &= ~SCTLR_SPAN; reg_sctlr_el1_write(sctlr); /* Set the PAN bit right now. */ reg_pan_write(1); #endif } /* * In order to avoid inconsistencies with pointer authentication * in this function itself, the caller must enable PAC according * to the return value. */ int aarch64_pac_init(int primary) { #ifdef ARMV83_PAC uint64_t reg; /* CPU0 does the detection. */ if (primary) { reg = reg_id_aa64isar1_el1_read(); if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_APA) != ID_AA64ISAR1_EL1_APA_NONE) aarch64_pac_enabled = 1; if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_API) != ID_AA64ISAR1_EL1_API_NONE) aarch64_pac_enabled = 1; if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPA) != ID_AA64ISAR1_EL1_GPA_NONE) aarch64_pac_enabled = 1; if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPI) != ID_AA64ISAR1_EL1_GPI_NONE) aarch64_pac_enabled = 1; } if (!aarch64_pac_enabled) return -1; /* Set the key. Curlwp here is the CPU's idlelwp. */ reg_APIAKeyLo_EL1_write(curlwp->l_md.md_ia_kern[0]); reg_APIAKeyHi_EL1_write(curlwp->l_md.md_ia_kern[1]); return 0; #else return -1; #endif }