 b6cbf7203b
			
		
	
	
		b6cbf7203b
		
	
	
	
	
		
			
			This patch imports the unmodified current version of NetBSD libc. The NetBSD includes are in /nbsd_include, while the libc code itself is split between lib/nbsd_libc and common/lib/libc.
		
			
				
	
	
		
			289 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			289 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
 | |
| 
 | |
| /*
 | |
|  * Copyright (c) 1995 Carnegie-Mellon University.
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
 | |
|  *	   added by Chris Demetriou.
 | |
|  *
 | |
|  * Permission to use, copy, modify and distribute this software and
 | |
|  * its documentation is hereby granted, provided that both the copyright
 | |
|  * notice and this permission notice appear in all copies of the
 | |
|  * software, derivative works or modified versions, and any portions
 | |
|  * thereof, and that both notices appear in supporting documentation.
 | |
|  *
 | |
|  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 | |
|  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 | |
|  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 | |
|  *
 | |
|  * Carnegie Mellon requests users of this software to return to
 | |
|  *
 | |
|  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 | |
|  *  School of Computer Science
 | |
|  *  Carnegie Mellon University
 | |
|  *  Pittsburgh PA 15213-3890
 | |
|  *
 | |
|  * any improvements or extensions that they make and grant Carnegie the
 | |
|  * rights to redistribute these changes.
 | |
|  */
 | |
| 
 | |
| #include <machine/asm.h>
 | |
| 
 | |
| #if defined(MEMCOPY) || defined(MEMMOVE)
 | |
| #ifdef MEMCOPY
 | |
| #define	FUNCTION	memcpy
 | |
| #else
 | |
| #define FUNCTION	memmove
 | |
| #endif
 | |
| #define	SRCREG		a1
 | |
| #define	DSTREG		a0
 | |
| #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
 | |
| #define	FUNCTION	bcopy
 | |
| #define	SRCREG		a0
 | |
| #define	DSTREG		a1
 | |
| #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
 | |
| 
 | |
| #define	SIZEREG		a2
 | |
| 
 | |
| /*
 | |
|  * Copy bytes.
 | |
|  *
 | |
|  * void bcopy(char *from, char *to, size_t len);
 | |
|  * char *memcpy(void *to, const void *from, size_t len);
 | |
|  * char *memmove(void *to, const void *from, size_t len);
 | |
|  *
 | |
|  * No matter how invoked, the source and destination registers
 | |
|  * for calculation.  There's no point in copying them to "working"
 | |
|  * registers, since the code uses their values "in place," and
 | |
|  * copying them would be slower.
 | |
|  */
 | |
| 
 | |
| LEAF(FUNCTION,3)
 | |
| 
 | |
| #if defined(MEMCOPY) || defined(MEMMOVE)
 | |
| 	/* set up return value, while we still can */
 | |
| 	mov	DSTREG,v0
 | |
| #endif
 | |
| 
 | |
| 	/* Check for negative length */
 | |
| 	ble	SIZEREG,bcopy_done
 | |
| 
 | |
| 	/* Check for overlap */
 | |
| 	subq	DSTREG,SRCREG,t5
 | |
| 	cmpult	t5,SIZEREG,t5
 | |
| 	bne	t5,bcopy_overlap
 | |
| 
 | |
| 	/* a3 = end address */
 | |
| 	addq	SRCREG,SIZEREG,a3
 | |
| 
 | |
| 	/* Get the first word */
 | |
| 	ldq_u	t2,0(SRCREG)
 | |
| 
 | |
| 	/* Do they have the same alignment? */
 | |
| 	xor	SRCREG,DSTREG,t0
 | |
| 	and	t0,7,t0
 | |
| 	and	DSTREG,7,t1
 | |
| 	bne	t0,bcopy_different_alignment
 | |
| 
 | |
| 	/* src & dst have same alignment */
 | |
| 	beq	t1,bcopy_all_aligned
 | |
| 
 | |
| 	ldq_u	t3,0(DSTREG)
 | |
| 	addq	SIZEREG,t1,SIZEREG
 | |
| 	mskqh	t2,SRCREG,t2
 | |
| 	mskql	t3,SRCREG,t3
 | |
| 	or	t2,t3,t2
 | |
| 
 | |
| 	/* Dst is 8-byte aligned */
 | |
| 
 | |
| bcopy_all_aligned:
 | |
| 	/* If less than 8 bytes,skip loop */
 | |
| 	subq	SIZEREG,1,t0
 | |
| 	and	SIZEREG,7,SIZEREG
 | |
| 	bic	t0,7,t0
 | |
| 	beq	t0,bcopy_samealign_lp_end
 | |
| 
 | |
| bcopy_samealign_lp:
 | |
| 	stq_u	t2,0(DSTREG)
 | |
| 	addq	DSTREG,8,DSTREG
 | |
| 	ldq_u	t2,8(SRCREG)
 | |
| 	subq	t0,8,t0
 | |
| 	addq	SRCREG,8,SRCREG
 | |
| 	bne	t0,bcopy_samealign_lp
 | |
| 
 | |
| bcopy_samealign_lp_end:
 | |
| 	/* If we're done, exit */
 | |
| 	bne	SIZEREG,bcopy_small_left
 | |
| 	stq_u	t2,0(DSTREG)
 | |
| 	RET
 | |
| 
 | |
| bcopy_small_left:
 | |
| 	mskql	t2,SIZEREG,t4
 | |
| 	ldq_u	t3,0(DSTREG)
 | |
| 	mskqh	t3,SIZEREG,t3
 | |
| 	or	t4,t3,t4
 | |
| 	stq_u	t4,0(DSTREG)
 | |
| 	RET
 | |
| 
 | |
| bcopy_different_alignment:
 | |
| 	/*
 | |
| 	 * this is the fun part
 | |
| 	 */
 | |
| 	addq	SRCREG,SIZEREG,a3
 | |
| 	cmpule	SIZEREG,8,t0
 | |
| 	bne	t0,bcopy_da_finish
 | |
| 
 | |
| 	beq	t1,bcopy_da_noentry
 | |
| 
 | |
| 	/* Do the initial partial word */
 | |
| 	subq	zero,DSTREG,t0
 | |
| 	and	t0,7,t0
 | |
| 	ldq_u	t3,7(SRCREG)
 | |
| 	extql	t2,SRCREG,t2
 | |
| 	extqh	t3,SRCREG,t3
 | |
| 	or	t2,t3,t5
 | |
| 	insql	t5,DSTREG,t5
 | |
| 	ldq_u	t6,0(DSTREG)
 | |
| 	mskql	t6,DSTREG,t6
 | |
| 	or	t5,t6,t5
 | |
| 	stq_u	t5,0(DSTREG)
 | |
| 	addq	SRCREG,t0,SRCREG
 | |
| 	addq	DSTREG,t0,DSTREG
 | |
| 	subq	SIZEREG,t0,SIZEREG
 | |
| 	ldq_u	t2,0(SRCREG)
 | |
| 
 | |
| bcopy_da_noentry:
 | |
| 	subq	SIZEREG,1,t0
 | |
| 	bic	t0,7,t0
 | |
| 	and	SIZEREG,7,SIZEREG
 | |
| 	beq	t0,bcopy_da_finish2
 | |
| 
 | |
| bcopy_da_lp:
 | |
| 	ldq_u	t3,7(SRCREG)
 | |
| 	addq	SRCREG,8,SRCREG
 | |
| 	extql	t2,SRCREG,t4
 | |
| 	extqh	t3,SRCREG,t5
 | |
| 	subq	t0,8,t0
 | |
| 	or	t4,t5,t5
 | |
| 	stq	t5,0(DSTREG)
 | |
| 	addq	DSTREG,8,DSTREG
 | |
| 	beq	t0,bcopy_da_finish1
 | |
| 	ldq_u	t2,7(SRCREG)
 | |
| 	addq	SRCREG,8,SRCREG
 | |
| 	extql	t3,SRCREG,t4
 | |
| 	extqh	t2,SRCREG,t5
 | |
| 	subq	t0,8,t0
 | |
| 	or	t4,t5,t5
 | |
| 	stq	t5,0(DSTREG)
 | |
| 	addq	DSTREG,8,DSTREG
 | |
| 	bne	t0,bcopy_da_lp
 | |
| 
 | |
| bcopy_da_finish2:
 | |
| 	/* Do the last new word */
 | |
| 	mov	t2,t3
 | |
| 
 | |
| bcopy_da_finish1:
 | |
| 	/* Do the last partial word */
 | |
| 	ldq_u	t2,-1(a3)
 | |
| 	extql	t3,SRCREG,t3
 | |
| 	extqh	t2,SRCREG,t2
 | |
| 	or	t2,t3,t2
 | |
| 	br	zero,bcopy_samealign_lp_end
 | |
| 
 | |
| bcopy_da_finish:
 | |
| 	/* Do the last word in the next source word */
 | |
| 	ldq_u	t3,-1(a3)
 | |
| 	extql	t2,SRCREG,t2
 | |
| 	extqh	t3,SRCREG,t3
 | |
| 	or	t2,t3,t2
 | |
| 	insqh	t2,DSTREG,t3
 | |
| 	insql	t2,DSTREG,t2
 | |
| 	lda	t4,-1(zero)
 | |
| 	mskql	t4,SIZEREG,t5
 | |
| 	cmovne	t5,t5,t4
 | |
| 	insqh	t4,DSTREG,t5
 | |
| 	insql	t4,DSTREG,t4
 | |
| 	addq	DSTREG,SIZEREG,a4
 | |
| 	ldq_u	t6,0(DSTREG)
 | |
| 	ldq_u	t7,-1(a4)
 | |
| 	bic	t6,t4,t6
 | |
| 	bic	t7,t5,t7
 | |
| 	and	t2,t4,t2
 | |
| 	and	t3,t5,t3
 | |
| 	or	t2,t6,t2
 | |
| 	or	t3,t7,t3
 | |
| 	stq_u	t3,-1(a4)
 | |
| 	stq_u	t2,0(DSTREG)
 | |
| 	RET
 | |
| 
 | |
| bcopy_overlap:
 | |
| 	/*
 | |
| 	 * Basically equivalent to previous case, only backwards.
 | |
| 	 * Not quite as highly optimized
 | |
| 	 */
 | |
| 	addq	SRCREG,SIZEREG,a3
 | |
| 	addq	DSTREG,SIZEREG,a4
 | |
| 
 | |
| 	/* less than 8 bytes - don't worry about overlap */
 | |
| 	cmpule	SIZEREG,8,t0
 | |
| 	bne	t0,bcopy_ov_short
 | |
| 
 | |
| 	/* Possibly do a partial first word */
 | |
| 	and	a4,7,t4
 | |
| 	beq	t4,bcopy_ov_nostart2
 | |
| 	subq	a3,t4,a3
 | |
| 	subq	a4,t4,a4
 | |
| 	ldq_u	t1,0(a3)
 | |
| 	subq	SIZEREG,t4,SIZEREG
 | |
| 	ldq_u	t2,7(a3)
 | |
| 	ldq	t3,0(a4)
 | |
| 	extql	t1,a3,t1
 | |
| 	extqh	t2,a3,t2
 | |
| 	or	t1,t2,t1
 | |
| 	mskqh	t3,t4,t3
 | |
| 	mskql	t1,t4,t1
 | |
| 	or	t1,t3,t1
 | |
| 	stq	t1,0(a4)
 | |
| 
 | |
| bcopy_ov_nostart2:
 | |
| 	bic	SIZEREG,7,t4
 | |
| 	and	SIZEREG,7,SIZEREG
 | |
| 	beq	t4,bcopy_ov_lp_end
 | |
| 
 | |
| bcopy_ov_lp:
 | |
| 	/* This could be more pipelined, but it doesn't seem worth it */
 | |
| 	ldq_u	t0,-8(a3)
 | |
| 	subq	a4,8,a4
 | |
| 	ldq_u	t1,-1(a3)
 | |
| 	subq	a3,8,a3
 | |
| 	extql	t0,a3,t0
 | |
| 	extqh	t1,a3,t1
 | |
| 	subq	t4,8,t4
 | |
| 	or	t0,t1,t0
 | |
| 	stq	t0,0(a4)
 | |
| 	bne	t4,bcopy_ov_lp
 | |
| 
 | |
| bcopy_ov_lp_end:
 | |
| 	beq	SIZEREG,bcopy_done
 | |
| 
 | |
| 	ldq_u	t0,0(SRCREG)
 | |
| 	ldq_u	t1,7(SRCREG)
 | |
| 	ldq_u	t2,0(DSTREG)
 | |
| 	extql	t0,SRCREG,t0
 | |
| 	extqh	t1,SRCREG,t1
 | |
| 	or	t0,t1,t0
 | |
| 	insql	t0,DSTREG,t0
 | |
| 	mskql	t2,DSTREG,t2
 | |
| 	or	t2,t0,t2
 | |
| 	stq_u	t2,0(DSTREG)
 | |
| 
 | |
| bcopy_done:
 | |
| 	RET
 | |
| 
 | |
| bcopy_ov_short:
 | |
| 	ldq_u	t2,0(SRCREG)
 | |
| 	br	zero,bcopy_da_finish
 | |
| 
 | |
| 	END(FUNCTION)
 |