Welcome to mirror list, hosted at ThFree Co, Russian Federation.

memset.S « sh « machine « libc « newlib - cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 2b45aff368372378f080e721a41c99abd99f827b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
!
! Fast SH memset
!
! by Toshiyasu Morita (tm@netcom.com)
!
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
! Copyright 2002 SuperH Ltd.
!

#include "asm.h"

ENTRY(memset)
#if __SHMEDIA__
	pta/l multiquad, tr0
	ptabs r18, tr2

	andi r2, -8, r25
	add r2, r4, r5
	addi r5, -1, r20    // calculate end address.
	andi r20, -8, r20
	cmveq r4, r25, r20
	bne/u r25, r20, tr0 // multiquad

!	This sequence could clobber volatile objects that are in the same
!	quadword as a very short char array.
!	ldlo.q r2, 0, r7
!	shlli r4, 2, r4
!	movi -1, r8
!	SHHI r8, r4, r8
!	SHHI r8, r4, r8
!	mcmv r7, r8, r3
!	stlo.q r2, 0, r3

	pta/l setlongs, tr0
	movi 4, r8
	bgeu/u r4, r8, tr0
	pta/l endset, tr0
	beqi/u r4, 0, tr0
	st.b r2, 0, r3
	beqi/u r4, 1, tr0
	nop
	st.b r2, 1, r3
	beqi/l r4, 2, tr0
	st.b r2,2,r3
endset: blink tr2, r63
setlongs:
	mshflo.b r3, r3, r3
	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3
	stlo.l r2, 0, r3
	nop
	nop
	sthi.l r5, -1, r3
	blink tr2, r63

multiquad:
	mshflo.b r3, r3, r3
	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3
	pta/l lastquad, tr0
	stlo.q r2, 0, r3
	sub r20, r25, r24
	movi 64, r9
	beqi/u r24, 8, tr0 // lastquad
	pta/l loop, tr1
	addi r20, -7*8, r8 // loop end address; This might overflow, so we need
	                   // to use a different test before we start the loop
	bgeu/u r24, r9, tr1// loop
	st.q r25, 8, r3
	shlri r24, 4, r24
	st.q r20, -8, r3
	beqi/u r24, 1, tr0 // lastquad
	st.q r25, 16, r3
	st.q r20, -16, r3
	beqi/u r24, 2, tr0 // lastquad
	st.q r25, 24, r3
	st.q r20, -24, r3
lastquad:
	sthi.q r5, -1, r3
	blink tr2,r63

loop:
	alloco r25, 32
	st.q r25, 8, r3
	st.q r25, 16, r3
	st.q r25, 24, r3
	st.q r25, 32, r3
	addi r25, 32, r25
	bgeu/l r8, r25, tr1 // loop

	st.q r20, -40, r3
	st.q r20, -32, r3
	st.q r20, -24, r3
	st.q r20, -16, r3
	st.q r20, -8, r3
	sthi.q r5, -1, r3
	blink tr2,r63
#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
! Entry: r4: destination pointer
!        r5: fill value
!        r6: byte count
!
! Exit:  r0-r3: trashed
!

! This assumes that the first four bytes of the address space (0..3) are
! reserved - usually by the linker script.  Otherwise, we would had to check
! for the case of objects of the size 12..15 at address 0..3 .

#ifdef __SH5__
#define DST r2
#define VAL r3
#define CNT r4
#define TMP r5
#else
#define DST r4
#define VAL r5
#define CNT r6
#define TMP r2
#endif

	mov	#12,r0	! Check for small number of bytes
	cmp/gt	CNT,r0
	mov	DST,r0
	SL(bt, L_store_byte_loop_check0, add DST,CNT)

	tst	#3,r0	! Align destination
	SL(bt,	L_dup_bytes, extu.b r5,r5)
	.balignw 4,0x0009
L_align_loop:
	mov.b	VAL,@r0
	add	#1,r0
	tst	#3,r0
	bf	L_align_loop

L_dup_bytes:	
	swap.b	VAL,TMP	! Duplicate bytes across longword
	or	TMP,VAL
	swap.w	VAL,TMP
	or	TMP,VAL

	add	#-16,CNT

	.balignw 4,0x0009
L_store_long_loop:
	mov.l	VAL,@r0	! Store double longs to memory
	cmp/hs	CNT,r0
	mov.l	VAL,@(4,r0)
	SL(bf, L_store_long_loop, add #8,r0)

	add	#16,CNT

L_store_byte_loop_check0:
	cmp/eq	CNT,r0
	bt	L_exit
	.balignw 4,0x0009
L_store_byte_loop:
	mov.b	VAL,@r0	! Store bytes to memory
	add	#1,r0
	cmp/eq	CNT,r0
	bf	L_store_byte_loop

L_exit:
	rts
	mov	r4,r0
#endif /* ! SHMEDIA */