Metropoli BBS
VIEWER: memcpy.s MODE: TEXT (ASCII)
! Fast memmove/memcpy/bcopy
! Copyright Australian National University, 1995
! This file may be used under the terms of the GNU Public License
! Author: Paul Mackerras, September 95
! Minor beautifications David S. Miller

#include <asm/cprefix.h>

	.globl	C_LABEL(bcopy)
C_LABEL(bcopy):
	mov	%o0,%o3
	mov	%o1,%o0
	mov	%o3,%o1

	.globl	C_LABEL(amemmove)
C_LABEL(amemmove):
	.globl	C_LABEL(memmove)
	.globl	C_LABEL(memcpy)
C_LABEL(memmove):
C_LABEL(memcpy):
	save	%sp,-96,%sp
	mov	%i0,%l7

	cmp	%i0,%i1		! check for dest within source area
	bleu,a	1f
	andcc	%i0,3,%l1
	add	%i1,%i2,%l0
	cmp	%i0,%l0
	blu,a	Lback
	mov	%l0,%i1

	! copying forwards
	! first get dest to be word-aligned
	andcc	%i0,3,%l1
1:
	be,a	Lwalign		! if dest already word-aligned
	cmp	%i2,4
	mov	4,%l2
	sub	%l2,%l1,%l2	! #bytes until word-aligned
	subcc	%i2,%l2,%i2
	ble,a	Lend		! not copying enough to get past word bdry
	addcc	%i2,%l2,%i2

1:
	ldub	[%i1],%o0	! copy single bytes until word-aligned
	add	%i1,1,%i1
	subcc	%l2,1,%l2
	stb	%o0,[%i0]
	bgt	1b
	add	%i0,1,%i0
	cmp	%i2,4

Lwalign:			! dest now word aligned
	blt,a	Lend
	orcc	%i2,%g0,%g0

	andcc	%i1,3,%l0
	be,a	Ldoword		! if dest word aligned wrt src
	andcc	%i0,4,%g0

	! yucky cases where we have to shift

	mov	4,%l2
	sub	%l2,%l0,%l2	! address adjustment, used at Lendn
	sll	%l0,3,%l0	! bit offset = shift left count
	sll	%l2,3,%l1	! shift right count
	add	%i1,%l2,%i1	! round up to next word
	ld	[%i1-4],%o0	! get first word

	andcc	%i0,4,%g0	! get destination double-word aligned
	be,a	1f
	andcc	%i1,4,%g0
	ld	[%i1],%o1	! by constructing and storing one word
	add	%i0,4,%i0
	add	%i1,4,%i1
	sub	%i2,4,%i2
	sll	%o0,%l0,%o0
	srl	%o1,%l1,%l6
	or	%o0,%l6,%o0
	st	%o0,[%i0-4]
	mov	%o1,%o0

	andcc	%i1,4,%g0	! now construct & store pairs of double-words
1:
	bne,a	3f		! if source now not double-word aligned
	subcc	%i2,4,%i2
	subcc	%i2,16,%i2
	blt	2f
	mov	%o0,%o1
4:
	ldd	[%i1],%o2
	sll	%o1,%l0,%o4
	ldd	[%i1+8],%o0
	add	%i0,16,%i0
	add	%i1,16,%i1
	subcc	%i2,16,%i2
	srl	%o2,%l1,%l6
	or	%l6,%o4,%o4
	sll	%o2,%l0,%o5
	srl	%o3,%l1,%l6
	or	%l6,%o5,%o5
	std	%o4,[%i0-16]
	sll	%o3,%l0,%o4
	srl	%o0,%l1,%l6
	or	%l6,%o4,%o4
	sll	%o0,%l0,%o5
	srl	%o1,%l1,%l6
	or	%l6,%o5,%o5
	bge	4b
	std	%o4,[%i0-8]
2:
	addcc	%i2,12,%i2
	blt,a	Lendn
	addcc	%i2,4,%i2
5:
	ld	[%i1],%o2
	add	%i0,4,%i0
	add	%i1,4,%i1
	subcc	%i2,4,%i2
	sll	%o1,%l0,%o0
	srl	%o2,%l1,%o1
	or	%o1,%o0,%o0
	st	%o0,[%i0-4]
	bge	5b
	mov	%o2,%o1
	ba	Lendn
	addcc	%i2,4,%i2

3:
	blt,a	Lendn
	addcc	%i2,4,%i2
	ld	[%i1],%o1
	add	%i1,4,%i1
	subcc	%i2,16,%i2
	blt,a	8f
	addcc	%i2,16,%i2
7:
	ldd	[%i1],%o2
	sll	%o0,%l0,%o4
	srl	%o1,%l1,%l6
	or	%l6,%o4,%o4
	sll	%o1,%l0,%o5
	ldd	[%i1+8],%o0
	add	%i0,16,%i0
	add	%i1,16,%i1
	subcc	%i2,16,%i2
	srl	%o2,%l1,%l6
	or	%l6,%o5,%o5
	std	%o4,[%i0-16]
	sll	%o2,%l0,%o4
	srl	%o3,%l1,%l6
	or	%l6,%o4,%o4
	sll	%o3,%l0,%o5
	srl	%o0,%l1,%l6
	or	%l6,%o5,%o5
	bge	7b
	std	%o4,[%i0-8]
	addcc	%i2,16,%i2
8:
	sll	%o0,%l0,%o4
	srl	%o1,%l1,%l6
	or	%l6,%o4,%o4
	st	%o4,[%i0]
	add	%i0,4,%i0
	subcc	%i2,4,%i2
	blt,a	Lendn
	addcc	%i2,4,%i2
	mov	%o1,%o0
	ld	[%i1],%o1
	ba	8b
	add	%i1,4,%i1


Ldoword:
	! here both dest and src are word-aligned
	! make dest double-word aligned
	be,a	1f
	andcc	%i1,4,%g0
	ld	[%i1],%o0
	add	%i0,4,%i0
	add	%i1,4,%i1
	sub	%i2,4,%i2
	st	%o0,[%i0-4]
	cmp	%i2,4
	blt,a	Lend
	orcc	%i2,%g0,%g0
	andcc	%i1,4,%g0

1:
	be,a	Ldodble		! if source double-word aligned now
	subcc	%i2,32,%i2
	ld	[%i1],%o5
	add	%i1,4,%i1
	subcc	%i2,36,%i2
	blt,a	3f
	add	%i2,32,%i2
2:
	ldd	[%i1],%o2
	add	%i1,32,%i1
	subcc	%i2,32,%i2
	mov	%o5,%o0
	ldd	[%i1-24],%o4
	mov	%o2,%o1
	std	%o0,[%i0]
	mov	%o3,%o2
	ldd	[%i1-16],%o0
	mov	%o4,%o3
	std	%o2,[%i0+8]
	mov	%o5,%o2
	ldd	[%i1-8],%o4
	mov	%o0,%o3
	std	%o2,[%i0+16]
	mov	%o1,%o0
	mov	%o4,%o1
	std	%o0,[%i0+24]
	bge	2b
	add	%i0,32,%i0
	add	%i2,32,%i2
3:
	st	%o5,[%i0]
	add	%i0,4,%i0
	subcc	%i2,4,%i2
	blt,a	Lend
	addcc	%i2,4,%i2
	ld	[%i1],%o5
	ba	3b
	add	%i1,4,%i1

Ldodble:
	! dest and source are both double-word aligned
	blt,a	2f
	addcc	%i2,28,%i2
1:
	ldd	[%i1],%o0	! copy sets of 4 double-words
	subcc	%i2,32,%i2
	ldd	[%i1+8],%o2
	add	%i1,32,%i1
	ldd	[%i1-16],%o4
	add	%i0,32,%i0
	std	%o0,[%i0-32]
	ldd	[%i1-8],%o0
	std	%o2,[%i0-24]
	std	%o4,[%i0-16]
	bge	1b
	std	%o0,[%i0-8]
	addcc	%i2,28,%i2
2:
	blt,a	Lend
	addcc	%i2,4,%i2
3:
	ld	[%i1],%o0	! copy words
	add	%i1,4,%i1
	add	%i0,4,%i0
	subcc	%i2,4,%i2
	bge	3b
	st	%o0,[%i0-4]
	ba	Lend
	addcc	%i2,4,%i2

Lendn:
	sub	%i1,%l2,%i1
Lend:
	ble	Lout
	nop
1:
	ldub	[%i1],%o0
	add	%i1,1,%i1
	subcc	%i2,1,%i2
	stb	%o0,[%i0]
	bgt	1b
	add	%i0,1,%i0

	ba	Lout
	nop

Lback:	! Here we have to copy backwards
	add	%i0,%i2,%i0
	! first get dest to be word-aligned
	andcc	%i0,3,%l2	! #bytes until word-aligned
	be,a	Lbwal		! if dest already word-aligned
	cmp	%i2,4
	subcc	%i2,%l2,%i2
	ble,a	Lbend		! not copying enough to get past word bdry
	addcc	%i2,%l2,%i2

1:
	ldub	[%i1-1],%o0	! copy single bytes until word-aligned
	sub	%i1,1,%i1
	subcc	%l2,1,%l2
	stb	%o0,[%i0-1]
	bgt	1b
	sub	%i0,1,%i0
	cmp	%i2,4

Lbwal:				! dest now word aligned
	blt,a	Lbend
	orcc	%i2,%g0,%g0

	andcc	%i1,3,%l2
	be,a	Lbword		! if dest word aligned wrt src
	andcc	%i0,4,%g0

	! yucky cases where we have to shift
	! note %l2 used below at Lbendn

	mov	4,%l0
	sub	%l0,%l2,%l0	! # bytes to right of src in word
	sll	%l0,3,%l0	! bit offset = shift right count
	sll	%l2,3,%l1	! shift left count
	sub	%i1,%l2,%i1	! round down to word boundary
	ld	[%i1],%o1	! get first word

	andcc	%i0,4,%g0	! get destination double-word aligned
	be,a	1f
	andcc	%i1,4,%g0
	ld	[%i1-4],%o0	! by constructing and storing one word
	sub	%i0,4,%i0
	sub	%i1,4,%i1
	sub	%i2,4,%i2
	srl	%o1,%l0,%o1
	sll	%o0,%l1,%l6
	or	%o1,%l6,%o1
	st	%o1,[%i0]
	mov	%o0,%o1

	andcc	%i1,4,%g0	! now construct & store pairs of double-words
1:
	bne,a	3f		! if source now not double-word aligned
	subcc	%i2,4,%i2
	subcc	%i2,16,%i2
	blt	2f
	mov	%o1,%o0
4:
	ldd	[%i1-8],%o2
	srl	%o0,%l0,%o5
	ldd	[%i1-16],%o0
	sub	%i0,16,%i0
	sub	%i1,16,%i1
	subcc	%i2,16,%i2
	sll	%o3,%l1,%l6
	or	%l6,%o5,%o5
	srl	%o3,%l0,%o4
	sll	%o2,%l1,%l6
	or	%l6,%o4,%o4
	std	%o4,[%i0+8]
	srl	%o2,%l0,%o5
	sll	%o1,%l1,%l6
	or	%l6,%o5,%o5
	srl	%o1,%l0,%o4
	sll	%o0,%l1,%l6
	or	%l6,%o4,%o4
	bge	4b
	std	%o4,[%i0]
2:
	addcc	%i2,12,%i2
	blt,a	Lbendn
	addcc	%i2,4,%i2
5:
	ld	[%i1-4],%o2
	sub	%i0,4,%i0
	sub	%i1,4,%i1
	subcc	%i2,4,%i2
	srl	%o0,%l0,%o0
	sll	%o2,%l1,%o1
	or	%o1,%o0,%o0
	st	%o0,[%i0]
	bge	5b
	mov	%o2,%o0
	ba	Lbendn
	addcc	%i2,4,%i2

3:
	blt,a	Lbendn
	addcc	%i2,4,%i2
	ld	[%i1-4],%o0
	sub	%i1,4,%i1
	subcc	%i2,16,%i2
	blt,a	8f
	addcc	%i2,16,%i2
7:
	ldd	[%i1-8],%o2
	srl	%o1,%l0,%o5
	sll	%o0,%l1,%l6
	or	%l6,%o5,%o5
	srl	%o0,%l0,%o4
	ldd	[%i1-16],%o0
	sub	%i0,16,%i0
	sub	%i1,16,%i1
	subcc	%i2,16,%i2
	sll	%o3,%l1,%l6
	or	%l6,%o4,%o4
	std	%o4,[%i0+8]
	srl	%o3,%l0,%o5
	sll	%o2,%l1,%l6
	or	%l6,%o5,%o5
	srl	%o2,%l0,%o4
	sll	%o1,%l1,%l6
	or	%l6,%o4,%o4
	bge	7b
	std	%o4,[%i0]
	addcc	%i2,16,%i2
8:
	srl	%o1,%l0,%o5
	sll	%o0,%l1,%l6
	or	%l6,%o5,%o5
	st	%o5,[%i0-4]
	sub	%i0,4,%i0
	subcc	%i2,4,%i2
	blt,a	Lbendn
	addcc	%i2,4,%i2
	mov	%o0,%o1
	ld	[%i1-4],%o0
	ba	8b
	sub	%i1,4,%i1


Lbword:
	! here both dest and src are word-aligned
	! make dest double-word aligned
	be,a	1f
	andcc	%i1,4,%g0
	ld	[%i1-4],%o0
	sub	%i0,4,%i0
	sub	%i1,4,%i1
	sub	%i2,4,%i2
	st	%o0,[%i0]
	cmp	%i2,4
	blt,a	Lbend
	orcc	%i2,%g0,%g0
	andcc	%i1,4,%g0

1:
	be,a	Lbdble		! if source double-word aligned now
	subcc	%i2,32,%i2
	ld	[%i1-4],%o4
	sub	%i1,4,%i1
	subcc	%i2,36,%i2
	blt,a	3f
	add	%i2,32,%i2
2:
	ldd	[%i1-8],%o2
	sub	%i1,32,%i1
	subcc	%i2,32,%i2
	mov	%o4,%o1
	ldd	[%i1+16],%o4
	mov	%o3,%o0
	std	%o0,[%i0-8]
	mov	%o2,%o3
	ldd	[%i1+8],%o0
	mov	%o5,%o2
	std	%o2,[%i0-16]
	mov	%o4,%o3
	ldd	[%i1],%o4
	mov	%o1,%o2
	std	%o2,[%i0-24]
	mov	%o0,%o1
	mov	%o5,%o0
	std	%o0,[%i0-32]
	bge	2b
	sub	%i0,32,%i0
	add	%i2,32,%i2
3:
	st	%o4,[%i0-4]
	sub	%i0,4,%i0
	subcc	%i2,4,%i2
	blt,a	Lbend
	addcc	%i2,4,%i2
	ld	[%i1-4],%o4
	ba	3b
	sub	%i1,4,%i1

Lbdble:
	! dest and source are both double-word aligned
	blt,a	2f
	addcc	%i2,28,%i2
1:
	ldd	[%i1-8],%o0	! copy sets of 4 double-words
	subcc	%i2,32,%i2
	ldd	[%i1-16],%o2
	sub	%i1,32,%i1
	ldd	[%i1+8],%o4
	sub	%i0,32,%i0
	std	%o0,[%i0+24]
	ldd	[%i1],%o0
	std	%o2,[%i0+16]
	std	%o4,[%i0+8]
	bge	1b
	std	%o0,[%i0]
	addcc	%i2,28,%i2
2:
	blt,a	Lbend
	addcc	%i2,4,%i2
3:
	ld	[%i1-4],%o0	! copy words
	sub	%i1,4,%i1
	sub	%i0,4,%i0
	subcc	%i2,4,%i2
	bge	3b
	st	%o0,[%i0]
	ba	Lbend
	addcc	%i2,4,%i2

Lbendn:
	add	%i1,%l2,%i1
Lbend:
	ble	Lout
	nop
1:
	ldub	[%i1-1],%o0
	sub	%i1,1,%i1
	subcc	%i2,1,%i2
	stb	%o0,[%i0-1]
	bgt	1b
	sub	%i0,1,%i0

Lout:
	ret
	restore	%l7,0,%o0


[ RETURN TO DIRECTORY ]