Welcome to mirror list, hosted at ThFree Co, Russian Federation.

memset.S « sh « machine « libc « newlib - cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3d37e33e3efe812c19b982d9c0a7fb133c3115c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
!
! Fast SH memset
!
! by Toshiyasu Morita (tm@netcom.com)
!
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
! Copyright 2002 SuperH Ltd.
!

#include "asm.h"

ENTRY(memset)
#if __SHMEDIA__
	pta/l multiquad, tr0
	andi r2, 7, r22
	ptabs r18, tr2
	mshflo.b r3,r3,r3
	add r4, r22, r23
	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3

	movi 8, r9
	bgtu/u r23, r9, tr0 // multiquad

	ldlo.q r2, 0, r7
	shlli r4, 2, r4
	movi -1, r8
	SHHI r8, r4, r8
	SHHI r8, r4, r8
	mcmv r7, r8, r3
	stlo.q r2, 0, r3
	blink tr2, r63

multiquad:
	pta/l lastquad, tr0
	stlo.q r2, 0, r3
	shlri r23, 3, r24
	add r2, r4, r5
	beqi/u r24, 1, tr0 // lastquad
	pta/l loop, tr1
	sub r2, r22, r25
	andi r5, -8, r20   // calculate end address and
	addi r20, -7*8, r8 // loop end address; This might overflow, so we need
	                   // to use a different test before we start the loop
	bge/u r24, r9, tr1 // loop
	st.q r25, 8, r3
	st.q r20, -8, r3
	shlri r24, 1, r24
	beqi/u r24, 1, tr0 // lastquad
	st.q r25, 16, r3
	st.q r20, -16, r3
	beqi/u r24, 2, tr0 // lastquad
	st.q r25, 24, r3
	st.q r20, -24, r3
lastquad:
	sthi.q r5, -1, r3
	blink tr2,r63

loop:
	alloco r25, 32
	st.q r25, 8, r3
	st.q r25, 16, r3
	st.q r25, 24, r3
	st.q r25, 32, r3
	addi r25, 32, r25
	bgeu/l r8, r25, tr1 // loop

	st.q r20, -40, r3
	st.q r20, -32, r3
	st.q r20, -24, r3
	st.q r20, -16, r3
	st.q r20, -8, r3
	sthi.q r5, -1, r3
	blink tr2,r63
#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
! Entry: r4: destination pointer
!        r5: fill value
!        r6: byte count
!
! Exit:  r0-r3: trashed
!

! This assumes that the first four bytes of the address space (0..3) are
! reserved - usually by the linker script.  Otherwise, we would had to check
! for the case of objects of the size 12..15 at address 0..3 .

#ifdef __SH5__
#define DST r2
#define VAL r3
#define CNT r4
#define TMP r5
#else
#define DST r4
#define VAL r5
#define CNT r6
#define TMP r2
#endif

	mov	#12,r0	! Check for small number of bytes
	cmp/gt	CNT,r0
	mov	DST,r0
	SL(bt, L_store_byte_loop_check0, add DST,CNT)

	tst	#3,r0	! Align destination
	SL(bt,	L_dup_bytes, extu.b r5,r5)
	.balignw 4,0x0009
L_align_loop:
	mov.b	VAL,@r0
	add	#1,r0
	tst	#3,r0
	bf	L_align_loop

L_dup_bytes:	
	swap.b	VAL,TMP	! Duplicate bytes across longword
	or	TMP,VAL
	swap.w	VAL,TMP
	or	TMP,VAL

	add	#-16,CNT

	.balignw 4,0x0009
L_store_long_loop:
	mov.l	VAL,@r0	! Store double longs to memory
	cmp/hs	CNT,r0
	mov.l	VAL,@(4,r0)
	SL(bf, L_store_long_loop, add #8,r0)

	add	#16,CNT

L_store_byte_loop_check0:
	cmp/eq	CNT,r0
	bt	L_exit
	.balignw 4,0x0009
L_store_byte_loop:
	mov.b	VAL,@r0	! Store bytes to memory
	add	#1,r0
	cmp/eq	CNT,r0
	bf	L_store_byte_loop

L_exit:
	rts
	mov	r4,r0
#endif /* ! SHMEDIA */