73 lines
1.5 KiB
ArmAsm
73 lines
1.5 KiB
ArmAsm
|
#include "textflag.h"
|
||
|
|
||
|
// func bytesN(dst, a, b *byte, n int)
|
||
|
TEXT ·bytesN(SB), NOSPLIT, $0
|
||
|
MOVQ d+0(FP), BX
|
||
|
MOVQ a+8(FP), SI
|
||
|
MOVQ b+16(FP), CX
|
||
|
MOVQ n+24(FP), DX
|
||
|
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
|
||
|
JNZ not_aligned
|
||
|
|
||
|
aligned:
|
||
|
MOVQ $0, AX // position in slices
|
||
|
|
||
|
loop16b:
|
||
|
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
|
||
|
MOVOU (CX)(AX*1), X1
|
||
|
PXOR X1, X0
|
||
|
MOVOU X0, (BX)(AX*1)
|
||
|
ADDQ $16, AX
|
||
|
CMPQ DX, AX
|
||
|
JNE loop16b
|
||
|
RET
|
||
|
|
||
|
loop_1b:
|
||
|
SUBQ $1, DX // XOR 1byte backwards.
|
||
|
MOVB (SI)(DX*1), DI
|
||
|
MOVB (CX)(DX*1), AX
|
||
|
XORB AX, DI
|
||
|
MOVB DI, (BX)(DX*1)
|
||
|
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
|
||
|
JNZ loop_1b
|
||
|
CMPQ DX, $0 // if len is 0, ret.
|
||
|
JE ret
|
||
|
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
|
||
|
JZ aligned
|
||
|
|
||
|
not_aligned:
|
||
|
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
|
||
|
JNE loop_1b
|
||
|
SUBQ $8, DX // XOR 8bytes backwards.
|
||
|
MOVQ (SI)(DX*1), DI
|
||
|
MOVQ (CX)(DX*1), AX
|
||
|
XORQ AX, DI
|
||
|
MOVQ DI, (BX)(DX*1)
|
||
|
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
|
||
|
JGE aligned
|
||
|
|
||
|
ret:
|
||
|
RET
|
||
|
|
||
|
// func bytes8(dst, a, b *byte)
|
||
|
TEXT ·bytes8(SB), NOSPLIT, $0
|
||
|
MOVQ d+0(FP), BX
|
||
|
MOVQ a+8(FP), SI
|
||
|
MOVQ b+16(FP), CX
|
||
|
MOVQ (SI), DI
|
||
|
MOVQ (CX), AX
|
||
|
XORQ AX, DI
|
||
|
MOVQ DI, (BX)
|
||
|
RET
|
||
|
|
||
|
// func bytes16(dst, a, b *byte)
|
||
|
TEXT ·bytes16(SB), NOSPLIT, $0
|
||
|
MOVQ d+0(FP), BX
|
||
|
MOVQ a+8(FP), SI
|
||
|
MOVQ b+16(FP), CX
|
||
|
MOVOU (SI), X0
|
||
|
MOVOU (CX), X1
|
||
|
PXOR X1, X0
|
||
|
MOVOU X0, (BX)
|
||
|
RET
|