/* $NetBSD: memset.S,v 1.2 2017/08/29 15:00:23 ryo Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of 3am Software Foundry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include ENTRY(memset) cbz x2, .Lret mov x15, x0 /* working data pointer */ cbz x1, .Lzerofill cbz x1, .Lfilled /* * Non zero fill, replicate to all 64 bits of x1. */ and x1, x1, #0xff orr x1, x1, x1, lsl #8 orr x1, x1, x1, lsl #16 orr x1, x1, x1, lsl #32 .Lfilled: cmp x2, #15 /* if it's small, ignore alignment */ b.ls .Llast_subqword mov x6, x1 tst x15, #15 b.eq .Lqword_loop /* * We have at least 15 to copy which means we can get qword alignment * without having to check the amount left. */ tbz x15, #0, .Lhword_aligned strb w1, [x15], #1 .Lhword_aligned: tbz x15, #1, .Lword_aligned strh w1, [x15], #2 .Lword_aligned: tbz x15, #2, .Ldword_aligned str w1, [x15], #4 .Ldword_aligned: tbz x15, #3, .Lqword_aligned str x1, [x15], #8 /* * Now we qword aligned. Figure how much we have to write to get here. * Then subtract from the length. If we get 0, we're done. */ .Lqword_aligned: sub x5, x15, x0 subs x2, x2, x5 b.eq .Lret /* * Write 16 bytes at time. If we don't have 16 bytes to write, bail. * Keep looping if there's data to set. */ .Lqword_loop: subs x2, x2, #16 b.mi .Llast_subqword stp x1, x6, [x15], #16 b.ne .Lqword_loop ret /* * We have less than a qword to write. We hope we are aligned but since * unaligned access works, we don't have to be aligned. */ .Llast_subqword: tbz x2, #3, .Llast_subdword str x1, [x15], #8 .Llast_subdword: tbz x2, #2, .Llast_subword str w1, [x15], #4 .Llast_subword: tbz x2, #1, .Llast_subhword strh w1, [x15], #2 .Llast_subhword: tbz x2, #0, .Lret strb w1, [x15] .Lret: ret /* * If we are filling with zeros then let's see if we can use the * dc zva, * instruction to speed things up. */ .Lzerofill: mrs x9, dczid_el0 /* * Make sure we can the instruction isn't prohibited. */ tbnz x9, #4, .Lfilled /* * Now find out the block size. */ ubfx x9, x9, #0, #4 /* extract low 4 bits */ add x9, x9, #2 /* add log2(word) */ mov x10, #1 /* the value is log2(words) */ lsl x10, x10, x9 /* shift to get the block size */ cmp x2, x10 /* are we even copying a block? */ b.lt .Lfilled /* no, do it 16 bytes at a time */ /* * Now we figure out how many aligned blocks we have */ sub x11, x10, #1 /* make block size a mask */ add x12, x15, x11 /* round start to a block boundary */ asr x12, x12, x9 /* "starting" block number */ add x13, x15, x2 /* get ending address */ asr x13, x13, x9 /* "ending" block numebr */ cmp x13, x12 /* how many blocks? */ b.ls .Lfilled /* none, do it 16 bytes at a time */ /* * Now we have one or more blocks to deal with. First now we need * to get block aligned. */ and x7, x15, x11 /* are already aligned on a block boundary? */ cbz x7, .Lblock_aligned sub x7, x10, x7 /* subtract offset from block length */ sub x2, x2, x7 /* subtract that from length */ asr x7, x7, #4 /* length -> N*16 */ tbz x15, #0, .Lzero_hword_aligned strb wzr, [x15], #1 .Lzero_hword_aligned: tbz x15, #1, .Lzero_word_aligned strh wzr, [x15], #2 .Lzero_word_aligned: tbz x15, #2, .Lzero_dword_aligned str wzr, [x15], #4 .Lzero_dword_aligned: tbz x15, #3, .Lzero_qword_aligned str xzr, [x15], #8 .Lzero_qword_aligned: cbz x7, .Lblock_aligned /* less than 16 bytes? just branch */ adr x6, .Lunrolled_end sub x6, x6, x7, lsl #2 /* backup to write the last N insn */ br x6 /* and do it */ /* * The maximum size of DCZID_EL0:BS supported is 2048 bytes. */ .rept (2048 / 16) - 1 stp xzr, xzr, [x15], #16 .endr .Lunrolled_end: /* * Now we are block aligned. */ .Lblock_aligned: subs x2, x2, x10 b.mi .Lblock_done dc zva, x15 add x15, x15, x10 b.ne .Lblock_aligned ret .Lblock_done: and x2, x2, x11 /* make positive again */ mov x6, xzr /* fill 2nd xword */ b .Lqword_loop /* and finish filling */ END(memset)