2250 lines
41 KiB
ArmAsm
2250 lines
41 KiB
ArmAsm
/* strcpy with SSE2 and unaligned load
|
|
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
|
|
#if IS_IN (libc)
|
|
|
|
# include <sysdep.h>
|
|
|
|
|
|
# define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
# define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
# define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
# ifndef STRCPY
|
|
# define STRCPY __strcpy_sse2
|
|
# endif
|
|
|
|
# define STR1 PARMS
|
|
# define STR2 STR1+4
|
|
# define LEN STR2+4
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
# define PARMS 16
|
|
# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
|
|
# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
|
|
CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
|
|
|
|
# ifdef PIC
|
|
# define JMPTBL(I, B) I - B
|
|
|
|
/* Load an entry in a jump table into ECX and branch to it. TABLE is a
|
|
jump table with relative offsets.
|
|
INDEX is a register contains the index into the jump table.
|
|
SCALE is the scale of INDEX. */
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
/* We first load PC into ECX. */ \
|
|
SETUP_PIC_REG(cx); \
|
|
/* Get the address of the jump table. */ \
|
|
addl $(TABLE - .), %ecx; \
|
|
/* Get the entry and convert the relative offset to the \
|
|
absolute address. */ \
|
|
addl (%ecx,INDEX,SCALE), %ecx; \
|
|
/* We loaded the jump table and adjusted ECX. Go. */ \
|
|
_CET_NOTRACK jmp *%ecx
|
|
# else
|
|
# define JMPTBL(I, B) I
|
|
|
|
/* Branch to an entry in a jump table. TABLE is a jump table with
|
|
absolute offsets. INDEX is a register contains the index into the
|
|
jump table. SCALE is the scale of INDEX. */
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
_CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
|
|
# endif
|
|
|
|
.text
|
|
ENTRY (STRCPY)
|
|
ENTRANCE
|
|
mov STR1(%esp), %edi
|
|
mov STR2(%esp), %esi
|
|
movl LEN(%esp), %ebx
|
|
test %ebx, %ebx
|
|
jz L(ExitZero)
|
|
|
|
mov %esi, %ecx
|
|
# ifndef USE_AS_STPCPY
|
|
mov %edi, %eax /* save result */
|
|
# endif
|
|
and $15, %ecx
|
|
jz L(SourceStringAlignmentZero)
|
|
|
|
and $-16, %esi
|
|
pxor %xmm0, %xmm0
|
|
pxor %xmm1, %xmm1
|
|
|
|
pcmpeqb (%esi), %xmm1
|
|
add %ecx, %ebx
|
|
pmovmskb %xmm1, %edx
|
|
shr %cl, %edx
|
|
# ifdef USE_AS_STPCPY
|
|
cmp $16, %ebx
|
|
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
|
|
# else
|
|
cmp $17, %ebx
|
|
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
|
|
# endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail)
|
|
|
|
pcmpeqb 16(%esi), %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
# ifdef USE_AS_STPCPY
|
|
cmp $32, %ebx
|
|
jbe L(CopyFrom1To32BytesCase2OrCase3)
|
|
# else
|
|
cmp $33, %ebx
|
|
jbe L(CopyFrom1To32BytesCase2OrCase3)
|
|
# endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32Bytes)
|
|
|
|
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
|
|
movdqu %xmm1, (%edi)
|
|
|
|
sub %ecx, %edi
|
|
|
|
/* If source address alignment != destination address alignment */
|
|
.p2align 4
|
|
L(Unalign16Both):
|
|
mov $16, %ecx
|
|
movdqa (%esi, %ecx), %xmm1
|
|
movaps 16(%esi, %ecx), %xmm2
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $48, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm2)
|
|
|
|
movaps 16(%esi, %ecx), %xmm3
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm3)
|
|
|
|
movaps 16(%esi, %ecx), %xmm4
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm4)
|
|
|
|
movaps 16(%esi, %ecx), %xmm1
|
|
movdqu %xmm4, (%edi, %ecx)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm1)
|
|
|
|
movaps 16(%esi, %ecx), %xmm2
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm2)
|
|
|
|
movaps 16(%esi, %ecx), %xmm3
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm3)
|
|
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
mov %esi, %edx
|
|
lea 16(%esi, %ecx), %esi
|
|
and $-0x40, %esi
|
|
sub %esi, %edx
|
|
sub %edx, %edi
|
|
lea 128(%ebx, %edx), %ebx
|
|
|
|
L(Unaligned64Loop):
|
|
movaps (%esi), %xmm2
|
|
movaps %xmm2, %xmm4
|
|
movaps 16(%esi), %xmm5
|
|
movaps 32(%esi), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
sub $64, %ebx
|
|
jbe L(UnalignedLeaveCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(Unaligned64Leave)
|
|
L(Unaligned64Loop_start):
|
|
add $64, %edi
|
|
add $64, %esi
|
|
movdqu %xmm4, -64(%edi)
|
|
movaps (%esi), %xmm2
|
|
movdqa %xmm2, %xmm4
|
|
movdqu %xmm5, -48(%edi)
|
|
movaps 16(%esi), %xmm5
|
|
pminub %xmm5, %xmm2
|
|
movaps 32(%esi), %xmm3
|
|
movdqu %xmm6, -32(%edi)
|
|
movaps %xmm3, %xmm6
|
|
movdqu %xmm7, -16(%edi)
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
sub $64, %ebx
|
|
jbe L(UnalignedLeaveCase2OrCase3)
|
|
test %edx, %edx
|
|
jz L(Unaligned64Loop_start)
|
|
L(Unaligned64Leave):
|
|
pxor %xmm1, %xmm1
|
|
|
|
pcmpeqb %xmm4, %xmm0
|
|
pcmpeqb %xmm5, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnaligned_0)
|
|
test %ecx, %ecx
|
|
jnz L(CopyFrom1To16BytesUnaligned_16)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pcmpeqb %xmm7, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnaligned_32)
|
|
|
|
bsf %ecx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
movdqu %xmm5, 16(%edi)
|
|
movdqu %xmm6, 32(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 48(%edi, %edx), %eax
|
|
# endif
|
|
movdqu %xmm7, 48(%edi)
|
|
add $15, %ebx
|
|
sub %edx, %ebx
|
|
lea 49(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
|
|
/* If source address alignment == destination address alignment */
|
|
|
|
L(SourceStringAlignmentZero):
|
|
pxor %xmm0, %xmm0
|
|
movdqa (%esi), %xmm1
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
# ifdef USE_AS_STPCPY
|
|
cmp $16, %ebx
|
|
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
|
|
# else
|
|
cmp $17, %ebx
|
|
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
|
|
# endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail1)
|
|
|
|
pcmpeqb 16(%esi), %xmm0
|
|
movdqu %xmm1, (%edi)
|
|
pmovmskb %xmm0, %edx
|
|
# ifdef USE_AS_STPCPY
|
|
cmp $32, %ebx
|
|
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
|
|
# else
|
|
cmp $33, %ebx
|
|
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
|
|
# endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32Bytes1)
|
|
|
|
jmp L(Unalign16Both)
|
|
|
|
/*-----------------End of main part---------------------------*/
|
|
|
|
/* Case1 */
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesTail):
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes1):
|
|
add $16, %esi
|
|
add $16, %edi
|
|
sub $16, %ebx
|
|
L(CopyFrom1To16BytesTail1):
|
|
bsf %edx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes):
|
|
sub %ecx, %ebx
|
|
bsf %edx, %edx
|
|
add %ecx, %esi
|
|
add $16, %edx
|
|
sub %ecx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_0):
|
|
bsf %edx, %edx
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edi, %edx), %eax
|
|
# endif
|
|
movdqu %xmm4, (%edi)
|
|
add $63, %ebx
|
|
sub %edx, %ebx
|
|
lea 1(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_16):
|
|
bsf %ecx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 16(%edi, %edx), %eax
|
|
# endif
|
|
movdqu %xmm5, 16(%edi)
|
|
add $47, %ebx
|
|
sub %edx, %ebx
|
|
lea 17(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_32):
|
|
bsf %edx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
movdqu %xmm5, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 32(%edi, %edx), %eax
|
|
# endif
|
|
movdqu %xmm6, 32(%edi)
|
|
add $31, %ebx
|
|
sub %edx, %ebx
|
|
lea 33(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm6):
|
|
movdqu %xmm6, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm5):
|
|
movdqu %xmm5, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm4):
|
|
movdqu %xmm4, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm3):
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm1):
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesExit):
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
/* Case2 */
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2):
|
|
add $16, %ebx
|
|
add %ecx, %edi
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32BytesCase2):
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
add $16, %edx
|
|
sub %ecx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
L(CopyFrom1To16BytesTailCase2):
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
L(CopyFrom1To16BytesTail1Case2):
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
/* Case2 or Case3, Case3 */
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
L(CopyFrom1To16BytesCase3):
|
|
add $16, %ebx
|
|
add %ecx, %edi
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32BytesCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32BytesCase2)
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesTailCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTailCase2)
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes1Case2OrCase3):
|
|
add $16, %edi
|
|
add $16, %esi
|
|
sub $16, %ebx
|
|
L(CopyFrom1To16BytesTail1Case2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail1Case2)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(Exit0):
|
|
# ifdef USE_AS_STPCPY
|
|
mov %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit1):
|
|
movb %dh, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edi), %eax
|
|
# endif
|
|
sub $1, %ebx
|
|
lea 1(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit2):
|
|
movw (%esi), %dx
|
|
movw %dx, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edi), %eax
|
|
# endif
|
|
sub $2, %ebx
|
|
lea 2(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit3):
|
|
movw (%esi), %cx
|
|
movw %cx, (%edi)
|
|
movb %dh, 2(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edi), %eax
|
|
# endif
|
|
sub $3, %ebx
|
|
lea 3(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit4):
|
|
movl (%esi), %edx
|
|
movl %edx, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edi), %eax
|
|
# endif
|
|
sub $4, %ebx
|
|
lea 4(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit5):
|
|
movl (%esi), %ecx
|
|
movb %dh, 4(%edi)
|
|
movl %ecx, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edi), %eax
|
|
# endif
|
|
sub $5, %ebx
|
|
lea 5(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit6):
|
|
movl (%esi), %ecx
|
|
movw 4(%esi), %dx
|
|
movl %ecx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edi), %eax
|
|
# endif
|
|
sub $6, %ebx
|
|
lea 6(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit7):
|
|
movl (%esi), %ecx
|
|
movl 3(%esi), %edx
|
|
movl %ecx, (%edi)
|
|
movl %edx, 3(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edi), %eax
|
|
# endif
|
|
sub $7, %ebx
|
|
lea 7(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit8):
|
|
movlpd (%esi), %xmm0
|
|
movlpd %xmm0, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edi), %eax
|
|
# endif
|
|
sub $8, %ebx
|
|
lea 8(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit9):
|
|
movlpd (%esi), %xmm0
|
|
movb %dh, 8(%edi)
|
|
movlpd %xmm0, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edi), %eax
|
|
# endif
|
|
sub $9, %ebx
|
|
lea 9(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit10):
|
|
movlpd (%esi), %xmm0
|
|
movw 8(%esi), %dx
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edi), %eax
|
|
# endif
|
|
sub $10, %ebx
|
|
lea 10(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit11):
|
|
movlpd (%esi), %xmm0
|
|
movl 7(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edi), %eax
|
|
# endif
|
|
sub $11, %ebx
|
|
lea 11(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit12):
|
|
movlpd (%esi), %xmm0
|
|
movl 8(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edi), %eax
|
|
# endif
|
|
sub $12, %ebx
|
|
lea 12(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit13):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 5(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 5(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edi), %eax
|
|
# endif
|
|
sub $13, %ebx
|
|
lea 13(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit14):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 6(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 6(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edi), %eax
|
|
# endif
|
|
sub $14, %ebx
|
|
lea 14(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit15):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 7(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 7(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edi), %eax
|
|
# endif
|
|
sub $15, %ebx
|
|
lea 15(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit16):
|
|
movdqu (%esi), %xmm0
|
|
movdqu %xmm0, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edi), %eax
|
|
# endif
|
|
sub $16, %ebx
|
|
lea 16(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit17):
|
|
movdqu (%esi), %xmm0
|
|
movdqu %xmm0, (%edi)
|
|
movb %dh, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 16(%edi), %eax
|
|
# endif
|
|
sub $17, %ebx
|
|
lea 17(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit18):
|
|
movdqu (%esi), %xmm0
|
|
movw 16(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movw %cx, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 17(%edi), %eax
|
|
# endif
|
|
sub $18, %ebx
|
|
lea 18(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit19):
|
|
movdqu (%esi), %xmm0
|
|
movl 15(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 18(%edi), %eax
|
|
# endif
|
|
sub $19, %ebx
|
|
lea 19(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit20):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 19(%edi), %eax
|
|
# endif
|
|
sub $20, %ebx
|
|
lea 20(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit21):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
movb %dh, 20(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 20(%edi), %eax
|
|
# endif
|
|
sub $21, %ebx
|
|
lea 21(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit22):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 14(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 14(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 21(%edi), %eax
|
|
# endif
|
|
sub $22, %ebx
|
|
lea 22(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit23):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 15(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 22(%edi), %eax
|
|
# endif
|
|
sub $23, %ebx
|
|
lea 23(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit24):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 23(%edi), %eax
|
|
# endif
|
|
sub $24, %ebx
|
|
lea 24(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit25):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movb %dh, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 24(%edi), %eax
|
|
# endif
|
|
sub $25, %ebx
|
|
lea 25(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit26):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movw 24(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movw %cx, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 25(%edi), %eax
|
|
# endif
|
|
sub $26, %ebx
|
|
lea 26(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit27):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 23(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 23(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 26(%edi), %eax
|
|
# endif
|
|
sub $27, %ebx
|
|
lea 27(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit28):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 24(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 27(%edi), %eax
|
|
# endif
|
|
sub $28, %ebx
|
|
lea 28(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit29):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 13(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 13(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 28(%edi), %eax
|
|
# endif
|
|
sub $29, %ebx
|
|
lea 29(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit30):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 14(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 14(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 29(%edi), %eax
|
|
# endif
|
|
sub $30, %ebx
|
|
lea 30(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
|
|
.p2align 4
|
|
L(Exit31):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 15(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 30(%edi), %eax
|
|
# endif
|
|
sub $31, %ebx
|
|
lea 31(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit32):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 31(%edi), %eax
|
|
# endif
|
|
sub $32, %ebx
|
|
lea 32(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit1):
|
|
movb (%esi), %dl
|
|
movb %dl, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit2):
|
|
movw (%esi), %dx
|
|
movw %dx, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit3):
|
|
movw (%esi), %cx
|
|
movb 2(%esi), %dl
|
|
movw %cx, (%edi)
|
|
movb %dl, 2(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit4):
|
|
movl (%esi), %edx
|
|
movl %edx, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit5):
|
|
movl (%esi), %ecx
|
|
movb 4(%esi), %dl
|
|
movl %ecx, (%edi)
|
|
movb %dl, 4(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit6):
|
|
movl (%esi), %ecx
|
|
movw 4(%esi), %dx
|
|
movl %ecx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit7):
|
|
movl (%esi), %ecx
|
|
movl 3(%esi), %edx
|
|
movl %ecx, (%edi)
|
|
movl %edx, 3(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit8):
|
|
movlpd (%esi), %xmm0
|
|
movlpd %xmm0, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit9):
|
|
movlpd (%esi), %xmm0
|
|
movb 8(%esi), %dl
|
|
movlpd %xmm0, (%edi)
|
|
movb %dl, 8(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit10):
|
|
movlpd (%esi), %xmm0
|
|
movw 8(%esi), %dx
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit11):
|
|
movlpd (%esi), %xmm0
|
|
movl 7(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit12):
|
|
movlpd (%esi), %xmm0
|
|
movl 8(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit13):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 5(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 5(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit14):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 6(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 6(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit15):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 7(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 7(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit16):
|
|
movdqu (%esi), %xmm0
|
|
movdqu %xmm0, (%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 16(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit17):
|
|
movdqu (%esi), %xmm0
|
|
movb 16(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movb %cl, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 17(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit18):
|
|
movdqu (%esi), %xmm0
|
|
movw 16(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movw %cx, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 18(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit19):
|
|
movdqu (%esi), %xmm0
|
|
movl 15(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 19(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit20):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 20(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit21):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movb 20(%esi), %dl
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
movb %dl, 20(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 21(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit22):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 14(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 14(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 22(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit23):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 15(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 23(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit24):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 24(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit25):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movb 24(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movb %cl, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 25(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit26):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movw 24(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movw %cx, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 26(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit27):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 23(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 23(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 27(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit28):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 24(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 24(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 28(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit29):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 13(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 13(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 29(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit30):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 14(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 14(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 30(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit31):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 15(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 15(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 31(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit32):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 32(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit33):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movb 32(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
movb %cl, 32(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill0):
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill1):
|
|
movb %dl, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill2):
|
|
movw %dx, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill3):
|
|
movl %edx, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill4):
|
|
movl %edx, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill5):
|
|
movl %edx, (%edi)
|
|
movb %dl, 4(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill6):
|
|
movl %edx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill7):
|
|
movlpd %xmm0, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill8):
|
|
movlpd %xmm0, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill9):
|
|
movlpd %xmm0, (%edi)
|
|
movb %dl, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill10):
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill11):
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill12):
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill13):
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm0, 5(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill14):
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm0, 6(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill15):
|
|
movdqu %xmm0, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill16):
|
|
movdqu %xmm0, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm2):
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmmExit):
|
|
bsf %edx, %edx
|
|
add $15, %ebx
|
|
add %ecx, %edi
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edi, %edx), %eax
|
|
# endif
|
|
sub %edx, %ebx
|
|
lea 1(%edi, %edx), %edi
|
|
|
|
.p2align 4
|
|
L(StrncpyFillTailWithZero):
|
|
pxor %xmm0, %xmm0
|
|
xor %edx, %edx
|
|
sub $16, %ebx
|
|
jbe L(StrncpyFillExit)
|
|
|
|
movdqu %xmm0, (%edi)
|
|
add $16, %edi
|
|
|
|
mov %edi, %esi
|
|
and $0xf, %esi
|
|
sub %esi, %edi
|
|
add %esi, %ebx
|
|
sub $64, %ebx
|
|
jb L(StrncpyFillLess64)
|
|
|
|
L(StrncpyFillLoopMovdqa):
|
|
movdqa %xmm0, (%edi)
|
|
movdqa %xmm0, 16(%edi)
|
|
movdqa %xmm0, 32(%edi)
|
|
movdqa %xmm0, 48(%edi)
|
|
add $64, %edi
|
|
sub $64, %ebx
|
|
jae L(StrncpyFillLoopMovdqa)
|
|
|
|
L(StrncpyFillLess64):
|
|
add $32, %ebx
|
|
jl L(StrncpyFillLess32)
|
|
movdqa %xmm0, (%edi)
|
|
movdqa %xmm0, 16(%edi)
|
|
add $32, %edi
|
|
sub $16, %ebx
|
|
jl L(StrncpyFillExit)
|
|
movdqa %xmm0, (%edi)
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
L(StrncpyFillLess32):
|
|
add $16, %ebx
|
|
jl L(StrncpyFillExit)
|
|
movdqa %xmm0, (%edi)
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
L(StrncpyFillExit):
|
|
add $16, %ebx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(UnalignedLeaveCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(Unaligned64LeaveCase2)
|
|
L(Unaligned64LeaveCase3):
|
|
lea 64(%ebx), %ecx
|
|
and $-16, %ecx
|
|
add $48, %ebx
|
|
jl L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm4, (%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm5, 16(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm6, 32(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm7, 48(%edi)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 64(%edi), %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Unaligned64LeaveCase2):
|
|
xor %ecx, %ecx
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm4)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm4, (%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm5)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm5, 16(%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm6)
|
|
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm6, 32(%edi)
|
|
lea 16(%edi, %ecx), %edi
|
|
lea 16(%esi, %ecx), %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(ExitZero):
|
|
movl %edi, %eax
|
|
RETURN
|
|
|
|
END (STRCPY)
|
|
|
|
.p2align 4
|
|
.section .rodata
|
|
L(ExitTable):
|
|
.int JMPTBL(L(Exit1), L(ExitTable))
|
|
.int JMPTBL(L(Exit2), L(ExitTable))
|
|
.int JMPTBL(L(Exit3), L(ExitTable))
|
|
.int JMPTBL(L(Exit4), L(ExitTable))
|
|
.int JMPTBL(L(Exit5), L(ExitTable))
|
|
.int JMPTBL(L(Exit6), L(ExitTable))
|
|
.int JMPTBL(L(Exit7), L(ExitTable))
|
|
.int JMPTBL(L(Exit8), L(ExitTable))
|
|
.int JMPTBL(L(Exit9), L(ExitTable))
|
|
.int JMPTBL(L(Exit10), L(ExitTable))
|
|
.int JMPTBL(L(Exit11), L(ExitTable))
|
|
.int JMPTBL(L(Exit12), L(ExitTable))
|
|
.int JMPTBL(L(Exit13), L(ExitTable))
|
|
.int JMPTBL(L(Exit14), L(ExitTable))
|
|
.int JMPTBL(L(Exit15), L(ExitTable))
|
|
.int JMPTBL(L(Exit16), L(ExitTable))
|
|
.int JMPTBL(L(Exit17), L(ExitTable))
|
|
.int JMPTBL(L(Exit18), L(ExitTable))
|
|
.int JMPTBL(L(Exit19), L(ExitTable))
|
|
.int JMPTBL(L(Exit20), L(ExitTable))
|
|
.int JMPTBL(L(Exit21), L(ExitTable))
|
|
.int JMPTBL(L(Exit22), L(ExitTable))
|
|
.int JMPTBL(L(Exit23), L(ExitTable))
|
|
.int JMPTBL(L(Exit24), L(ExitTable))
|
|
.int JMPTBL(L(Exit25), L(ExitTable))
|
|
.int JMPTBL(L(Exit26), L(ExitTable))
|
|
.int JMPTBL(L(Exit27), L(ExitTable))
|
|
.int JMPTBL(L(Exit28), L(ExitTable))
|
|
.int JMPTBL(L(Exit29), L(ExitTable))
|
|
.int JMPTBL(L(Exit30), L(ExitTable))
|
|
.int JMPTBL(L(Exit31), L(ExitTable))
|
|
.int JMPTBL(L(Exit32), L(ExitTable))
|
|
|
|
L(ExitStrncpyTable):
|
|
.int JMPTBL(L(Exit0), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
|
|
|
|
.p2align 4
|
|
L(FillTable):
|
|
.int JMPTBL(L(Fill0), L(FillTable))
|
|
.int JMPTBL(L(Fill1), L(FillTable))
|
|
.int JMPTBL(L(Fill2), L(FillTable))
|
|
.int JMPTBL(L(Fill3), L(FillTable))
|
|
.int JMPTBL(L(Fill4), L(FillTable))
|
|
.int JMPTBL(L(Fill5), L(FillTable))
|
|
.int JMPTBL(L(Fill6), L(FillTable))
|
|
.int JMPTBL(L(Fill7), L(FillTable))
|
|
.int JMPTBL(L(Fill8), L(FillTable))
|
|
.int JMPTBL(L(Fill9), L(FillTable))
|
|
.int JMPTBL(L(Fill10), L(FillTable))
|
|
.int JMPTBL(L(Fill11), L(FillTable))
|
|
.int JMPTBL(L(Fill12), L(FillTable))
|
|
.int JMPTBL(L(Fill13), L(FillTable))
|
|
.int JMPTBL(L(Fill14), L(FillTable))
|
|
.int JMPTBL(L(Fill15), L(FillTable))
|
|
.int JMPTBL(L(Fill16), L(FillTable))
|
|
# else
|
|
# define PARMS 4
|
|
# define ENTRANCE
|
|
# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
|
|
# define RETURN1 ret
|
|
|
|
.text
|
|
ENTRY (STRCPY)
|
|
ENTRANCE
|
|
mov STR1(%esp), %edx
|
|
mov STR2(%esp), %ecx
|
|
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
cmpb $0, 3(%ecx)
|
|
jz L(ExitTail4)
|
|
cmpb $0, 4(%ecx)
|
|
jz L(ExitTail5)
|
|
cmpb $0, 5(%ecx)
|
|
jz L(ExitTail6)
|
|
cmpb $0, 6(%ecx)
|
|
jz L(ExitTail7)
|
|
cmpb $0, 7(%ecx)
|
|
jz L(ExitTail8)
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
cmpb $0, 11(%ecx)
|
|
jz L(ExitTail12)
|
|
cmpb $0, 12(%ecx)
|
|
jz L(ExitTail13)
|
|
cmpb $0, 13(%ecx)
|
|
jz L(ExitTail14)
|
|
cmpb $0, 14(%ecx)
|
|
jz L(ExitTail15)
|
|
cmpb $0, 15(%ecx)
|
|
jz L(ExitTail16)
|
|
|
|
PUSH (%edi)
|
|
PUSH (%ebx)
|
|
|
|
mov %edx, %edi
|
|
lea 16(%ecx), %ebx
|
|
and $-16, %ebx
|
|
pxor %xmm0, %xmm0
|
|
movdqu (%ecx), %xmm1
|
|
movdqu %xmm1, (%edx)
|
|
pcmpeqb (%ebx), %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
sub %ecx, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
mov %ecx, %eax
|
|
lea 16(%ecx), %ecx
|
|
and $-16, %ecx
|
|
sub %ecx, %eax
|
|
sub %eax, %edx
|
|
xor %ebx, %ebx
|
|
|
|
.p2align 4
|
|
movdqa (%ecx), %xmm1
|
|
movaps 16(%ecx), %xmm2
|
|
movdqu %xmm1, (%edx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %ebx), %xmm3
|
|
movdqu %xmm2, (%edx, %ebx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %ebx), %xmm4
|
|
movdqu %xmm3, (%edx, %ebx)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %ebx), %xmm1
|
|
movdqu %xmm4, (%edx, %ebx)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %ebx), %xmm2
|
|
movdqu %xmm1, (%edx, %ebx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %ebx), %xmm3
|
|
movdqu %xmm2, (%edx, %ebx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $16, %ebx
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movdqu %xmm3, (%edx, %ebx)
|
|
mov %ecx, %eax
|
|
lea 16(%ecx, %ebx), %ecx
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
sub %eax, %edx
|
|
|
|
L(Aligned64Loop):
|
|
movaps (%ecx), %xmm2
|
|
movaps %xmm2, %xmm4
|
|
movaps 16(%ecx), %xmm5
|
|
movaps 32(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 48(%ecx), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
add $64, %ecx
|
|
pminub %xmm7, %xmm3
|
|
add $64, %edx
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %eax
|
|
test %eax, %eax
|
|
jnz L(Aligned64Leave)
|
|
L(Aligned64Loop_start):
|
|
movdqu %xmm4, -64(%edx)
|
|
movaps (%ecx), %xmm2
|
|
movdqa %xmm2, %xmm4
|
|
movdqu %xmm5, -48(%edx)
|
|
movaps 16(%ecx), %xmm5
|
|
pminub %xmm5, %xmm2
|
|
movaps 32(%ecx), %xmm3
|
|
movdqu %xmm6, -32(%edx)
|
|
movaps %xmm3, %xmm6
|
|
movdqu %xmm7, -16(%edx)
|
|
movaps 48(%ecx), %xmm7
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $64, %edx
|
|
add $64, %ecx
|
|
test %eax, %eax
|
|
jz L(Aligned64Loop_start)
|
|
L(Aligned64Leave):
|
|
sub $0xa0, %ebx
|
|
pxor %xmm0, %xmm0
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movdqu %xmm4, -64(%edx)
|
|
test %eax, %eax
|
|
lea 16(%ebx), %ebx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movdqu %xmm5, -48(%edx)
|
|
test %eax, %eax
|
|
lea 16(%ebx), %ebx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movdqu %xmm6, -32(%edx)
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ebx), %ebx
|
|
|
|
/*-----------------End of main part---------------------------*/
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes):
|
|
add %ebx, %edx
|
|
add %ebx, %ecx
|
|
|
|
POP (%ebx)
|
|
test %al, %al
|
|
jz L(ExitHigh)
|
|
test $0x01, %al
|
|
jnz L(Exit1)
|
|
test $0x02, %al
|
|
jnz L(Exit2)
|
|
test $0x04, %al
|
|
jnz L(Exit3)
|
|
test $0x08, %al
|
|
jnz L(Exit4)
|
|
test $0x10, %al
|
|
jnz L(Exit5)
|
|
test $0x20, %al
|
|
jnz L(Exit6)
|
|
test $0x40, %al
|
|
jnz L(Exit7)
|
|
/* Exit 8 */
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitHigh):
|
|
test $0x01, %ah
|
|
jnz L(Exit9)
|
|
test $0x02, %ah
|
|
jnz L(Exit10)
|
|
test $0x04, %ah
|
|
jnz L(Exit11)
|
|
test $0x08, %ah
|
|
jnz L(Exit12)
|
|
test $0x10, %ah
|
|
jnz L(Exit13)
|
|
test $0x20, %ah
|
|
jnz L(Exit14)
|
|
test $0x40, %ah
|
|
jnz L(Exit15)
|
|
/* Exit 16 */
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 8(%ecx), %xmm0
|
|
movlpd %xmm0, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea (%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit9):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movb 8(%ecx), %al
|
|
movb %al, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit10):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movw 8(%ecx), %ax
|
|
movw %ax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit11):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movl 7(%ecx), %eax
|
|
movl %eax, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit12):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movl 8(%ecx), %eax
|
|
movl %eax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 5(%ecx), %xmm0
|
|
movlpd %xmm0, 5(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 6(%ecx), %xmm0
|
|
movlpd %xmm0, 6(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
# else
|
|
movl %edi, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
CFI_POP (%edi)
|
|
|
|
.p2align 4
|
|
L(ExitTail1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
movl %edx, %eax
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 1(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 2(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 3(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 4(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 5(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 6(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail8):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail9):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movb 8(%ecx), %al
|
|
movb %al, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 8(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail10):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movw 8(%ecx), %ax
|
|
movw %ax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 9(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail11):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movl 7(%ecx), %eax
|
|
movl %eax, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 10(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail12):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 4(%ecx), %eax
|
|
movl %eax, 4(%edx)
|
|
movl 8(%ecx), %eax
|
|
movl %eax, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 11(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 5(%ecx), %xmm0
|
|
movlpd %xmm0, 5(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 12(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 6(%ecx), %xmm0
|
|
movlpd %xmm0, 6(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 13(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitTail16):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 8(%ecx), %xmm0
|
|
movlpd %xmm0, 8(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 15(%edx), %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN1
|
|
|
|
END (STRCPY)
|
|
# endif
|
|
|
|
#endif
|