/* * Written by J.T. Conklin * Public domain. */ #include #if defined(LIBC_SCCS) RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") #endif ENTRY(strcat) pushl %ebx movl 8(%esp),%ecx movl 12(%esp),%eax /* * Align destination to word boundary. * Consider unrolling loop? */ .Lscan: .Lscan_align: testb $3,%cl je .Lscan_aligned cmpb $0,(%ecx) je .Lcopy incl %ecx jmp .Lscan_align _ALIGN_TEXT .Lscan_aligned: .Lscan_loop: movl (%ecx),%ebx addl $4,%ecx leal -0x01010101(%ebx),%edx testl $0x80808080,%edx je .Lscan_loop /* * In rare cases, the above loop may exit prematurely. We must * return to the loop if none of the bytes in the word equal 0. */ /* * The optimal code for determining whether each byte is zero * differs by processor. This space-optimized code should be * acceptable on all, especially since we don't expect it to * be run frequently, */ testb %bl,%bl /* 1st byte == 0? */ jne 1f subl $4,%ecx jmp .Lcopy 1: testb %bh,%bh /* 2nd byte == 0? */ jne 1f subl $3,%ecx jmp .Lcopy 1: shrl $16,%ebx testb %bl,%bl /* 3rd byte == 0? */ jne 1f subl $2,%ecx jmp .Lcopy 1: testb %bh,%bh /* 4th byte == 0? */ jne .Lscan_loop subl $1,%ecx /* * Align source to a word boundary. * Consider unrolling loop? */ .Lcopy: .Lcopy_align: testl $3,%eax je .Lcopy_aligned movb (%eax),%bl incl %eax movb %bl,(%ecx) incl %ecx testb %bl,%bl jne .Lcopy_align jmp .Ldone _ALIGN_TEXT .Lcopy_loop: movl %ebx,(%ecx) addl $4,%ecx .Lcopy_aligned: movl (%eax),%ebx addl $4,%eax leal -0x01010101(%ebx),%edx testl $0x80808080,%edx je .Lcopy_loop /* * In rare cases, the above loop may exit prematurely. We must * return to the loop if none of the bytes in the word equal 0. */ movb %bl,(%ecx) incl %ecx testb %bl,%bl je .Ldone movb %bh,(%ecx) incl %ecx testb %bh,%bh je .Ldone shrl $16,%ebx movb %bl,(%ecx) incl %ecx testb %bl,%bl je .Ldone movb %bh,(%ecx) incl %ecx testb %bh,%bh jne .Lcopy_aligned .Ldone: movl 8(%esp),%eax popl %ebx ret END(strcat)