/* $NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $ */ /* * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $"); #include "locators.h" #include #include #include #include #include #include #include #include struct ecc_plb_softc { device_t sc_dev; uint64_t sc_ecc_tb; uint64_t sc_ecc_iv; /* Interval */ uint32_t sc_ecc_cnt; u_int sc_memsize; int sc_irq; }; static int ecc_plbmatch(device_t, cfdata_t, void *); static void ecc_plbattach(device_t, device_t, void *); static void ecc_plb_deferred(device_t); static int ecc_plb_intr(void *); CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc), ecc_plbmatch, ecc_plbattach, NULL, NULL); static int ecc_plb_found; static int ecc_plbmatch(device_t parent, cfdata_t cf, void *aux) { struct plb_attach_args *paa = aux; if (strcmp(paa->plb_name, cf->cf_name) != 0) return (0); if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT) panic("ecc_plbmatch: wildcard IRQ not allowed"); paa->plb_irq = cf->cf_loc[PLBCF_IRQ]; return (!ecc_plb_found); } static void ecc_plbattach(device_t parent, device_t self, void *aux) { struct ecc_plb_softc *sc = device_private(self); struct plb_attach_args *paa = aux; unsigned int processor_freq; unsigned int memsiz; prop_number_t pn; ecc_plb_found++; pn = prop_dictionary_get(board_properties, "processor-frequency"); KASSERT(pn != NULL); processor_freq = (unsigned int) prop_number_integer_value(pn); pn = prop_dictionary_get(board_properties, "mem-size"); KASSERT(pn != NULL); memsiz = (unsigned int) prop_number_integer_value(pn); aprint_normal(": ECC controller\n"); sc->sc_dev = self; sc->sc_ecc_tb = 0; sc->sc_ecc_cnt = 0; sc->sc_ecc_iv = processor_freq; /* Set interval */ sc->sc_memsize = memsiz; sc->sc_irq = paa->plb_irq; /* * Defer hooking the interrupt until all PLB devices have attached * since the interrupt controller may well be one of those devices... */ config_defer(self, ecc_plb_deferred); } static void ecc_plb_deferred(device_t self) { struct ecc_plb_softc *sc = device_private(self); intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, sc); } /* * ECC fault handler. */ static int ecc_plb_intr(void *arg) { struct ecc_plb_softc *sc = arg; u_int32_t esr, ear; int ue; u_quad_t tb; u_long tmp, msr, dat; /* This code needs to be improved to handle double-bit errors */ /* in some intelligent fashion. */ mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); esr = mfdcr(DCR_SDRAM0_CFGDATA); mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR); ear = mfdcr(DCR_SDRAM0_CFGDATA); /* Always clear the error to stop the intr ASAP. */ mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); if (esr == 0x00) { /* No current error. Could happen due to intr. nesting */ return(1); } /* * Only report errors every once per second max. Do this using the TB, * because the system time (via microtime) may be adjusted when the * date is set and can't reliably be used to measure intervals. */ __asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b" : "=r"(tb), "=r"(tmp)); sc->sc_ecc_cnt++; if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv) return(1); ue = (esr & SDRAM0_ECCESR_UE) != 0x00; printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d " "BLCE=%d%d%d%d CBE=%d%d.\n", sc->sc_ecc_cnt, esr, ear, (ue) ? "Uncorrectable" : "Correctable", ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00), ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00), ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00), ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00), ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00), ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00), ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00), ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00), ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00), ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00)); /* Should check for uncorrectable errors and panic... */ if (sc->sc_ecc_cnt > 1000) { printf("ECC: Too many errors, recycling entire " "SDRAM (size = %d).\n", sc->sc_memsize); /* * Can this code be changed to run without disabling data MMU * and disabling intrs? * Does kernel always map all of physical RAM VA=PA? If so, * just loop over lowmem. */ __asm volatile( "mfmsr %0;" "li %1, 0x00;" "ori %1, %1, 0x8010;" "andc %1, %0, %1;" "mtmsr %1;" "sync;isync;" "li %1, 0x00;" "1:" "dcbt 0, %1;" "sync;isync;" "lwz %2, 0(%1);" "stw %2, 0(%1);" "sync;isync;" "dcbf 0, %1;" "sync;isync;" "addi %1, %1, 0x20;" "addic. %3, %3, -0x20;" "bge 1b;" "mtmsr %0;" "sync;isync;" : "=&r" (msr), "=&r" (tmp), "=&r" (dat) : "r" (sc->sc_memsize) : "0" ); mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); esr = mfdcr(DCR_SDRAM0_CFGDATA); mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); /* * Correctable errors here are OK, mem should be clean now. * * Should check for uncorrectable errors and panic... */ printf("ECC: Recycling complete, ESR=%x. " "Checking for persistent errors.\n", esr); __asm volatile( "mfmsr %0;" "li %1, 0x00;" "ori %1, %1, 0x8010;" "andc %1, %0, %1;" "mtmsr %1;" "sync;isync;" "li %1, 0x00;" "1:" "dcbt 0, %1;" "sync;isync;" "lwz %2, 0(%1);" "stw %2, 0(%1);" "sync;isync;" "dcbf 0, %1;" "sync;isync;" "addi %1, %1, 0x20;" "addic. %3, %3, -0x20;" "bge 1b;" "mtmsr %0;" "sync;isync;" : "=&r" (msr), "=&r" (tmp), "=&r" (dat) : "r" (sc->sc_memsize) : "0" ); mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); esr = mfdcr(DCR_SDRAM0_CFGDATA); /* * If esr is non zero here, we're screwed. * Should check this and panic. */ printf("ECC: Persistent error check complete, " "final ESR=%x.\n", esr); } sc->sc_ecc_tb = tb; sc->sc_ecc_cnt = 0; return(1); }