Welcome to mirror list, hosted at ThFree Co, Russian Federation.

memcpy.S « m68k « machine « libc « newlib - cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3badd58bfd5293c35577c95a0bffd1632cd6cb9b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/* a-memcpy.s -- memcpy, optimised for m68k asm
 *
 * Copyright (c) 2007 mocom software GmbH & Co KG)
 *
 * The authors hereby grant permission to use, copy, modify, distribute,
 * and license this software and its documentation for any purpose, provided
 * that existing copyright notices are retained in all copies and that this
 * notice is included verbatim in any distributions. No written agreement,
 * license, or royalty fee is required for any of the authorized uses.
 * Modifications to this software may be copyrighted by their authors
 * and need not follow the licensing terms described here, provided that
 * the new terms are clearly indicated on the first page of each file where
 * they apply.
 */

#include "m68kasm.h"

	.text
	.align	4

	.globl	SYM(memcpy)
	.type	SYM(memcpy), @function

/*   memcpy, optimised
 *
 *   strategy:
 *       - no argument testing (the original memcpy from the GNU lib does
 *         no checking either)
 *       - make sure the destination pointer (the write pointer) is long word
 *         aligned. This is the best you can do, because writing to unaligned
 *         addresses can be the most costfull thing you could do.
 *       - Once you have figured that out, we do a little loop unrolling
 *         to further improve speed.
 */

SYM(memcpy):
	move.l	4(sp),a0	| dest ptr
	move.l	8(sp),a1	| src ptr
	move.l	12(sp),d1	| len
	cmp.l	#8,d1		| if fewer than 8 bytes to transfer,
	blo	.Lresidue	| do not optimise

	/* align dest */
	move.l	a0,d0		| copy of dest
	neg.l	d0
	and.l	#3,d0		| look for the lower two only
	beq	2f		| is aligned?
	sub.l	d0,d1
	lsr.l	#1,d0		| word align needed?
	bcc	1f
	move.b	(a1)+,(a0)+
1:
	lsr.l	#1,d0		| long align needed?
	bcc	2f
	move.w	(a1)+,(a0)+
2:

	/* long word transfers */
	move.l	d1,d0
	and.l	#3,d1		| byte residue
	lsr.l	#3,d0
	bcc	1f		| carry set for 4-byte residue
	move.l	(a1)+,(a0)+
1:
	lsr.l	#1,d0		| number of 16-byte transfers
	bcc	.Lcopy 		| carry set for 8-byte residue
	bra	.Lcopy8

1:
	move.l	(a1)+,(a0)+
	move.l	(a1)+,(a0)+
.Lcopy8:
	move.l	(a1)+,(a0)+
	move.l	(a1)+,(a0)+
.Lcopy:
#if !defined (__mcoldfire__)
	dbra	d0,1b
	sub.l	#0x10000,d0
#else
	subq.l	#1,d0
#endif
	bpl	1b
	bra	.Lresidue

1:
	move.b	(a1)+,(a0)+	| move residue bytes

.Lresidue:
#if !defined (__mcoldfire__)
	dbra	d1,1b		| loop until done
#else
	subq.l	#1,d1
	bpl	1b
#endif
	move.l	4(sp),d0	| return value
	rts