Welcome to mirror list, hosted at ThFree Co, Russian Federation.

strcpy.S « hppa « machine « libc « newlib - cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3068cd5e1fa10d37bfb0f488646645168f2b4628 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
/*
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
 *
 *  To anyone who acknowledges that this file is provided "AS IS"
 *  without any express or implied warranty:
 *      permission to use, copy, modify, and distribute this file
 *  for any purpose is hereby granted without fee, provided that
 *  the above copyright notice and this notice appears in all
 *  copies, and that the name of Hewlett-Packard Company not be
 *  used in advertising or publicity pertaining to distribution
 *  of the software without specific, written prior permission.
 *  Hewlett-Packard Company makes no representations about the
 *  suitability of this software for any purpose.
 */

/*
	A faster strcpy.

	by

	Jerry Huck (aligned case)
	Daryl Odnert (equal-alignment case)
	Edgar Circenis (non-aligned case)
*/
/*
 * strcpy(s1, s2)
 *
 * Copy string s2 to s1.  s1 must be large enough.
 * return s1
 */

#include "DEFS.h"

#define	d_addr		r26
#define	s_addr		r25
#define	tmp6		r24
#define	tmp1		r19
#define evenside	r19
#define	tmp2		r20
#define oddside		r20
#define	tmp3		r21
#define	tmp4		r22
#define	tmp5		arg3
#define	save		r1


ENTRY(strcpy)
/* Do some quick alignment checking on and fast path both word aligned */
        extru,<>   s_addr,31,2,tmp6    /*Is source word aligned? */
        ldwm       4(0,s_addr),oddside /*Assume yes and guess that it
                                          is double-word aligned. */
        dep,=      d_addr,29,2,tmp6    /*Is target word aligned? */
        b          case_analysis
	copy       d_addr,ret0
/* Both are aligned.  First source word already loaded assuming that
   source was oddword aligned.  Fall through (therefore fastest) code
   shuffles the registers to join the main loop */
bothaligned:
	bb,>=    s_addr,29,twoatatime  /*Branch if source was odd aligned*/
	uxor,nbz oddside,r0,save

/* Even aligned source.  save holds that operand.
   Do one iteration of the main copy loop juggling the registers to avoid
   one copy. */
	b,n	 nullfound
	ldwm     4(s_addr),oddside
	stwm     save,4(d_addr)
	uxor,nbz oddside,r0,save
	b,n      nullfound
        ldwm     4(s_addr),evenside
        stwm     oddside,4(d_addr)
        uxor,nbz evenside,r0,save
        b,n      nullfound
        ldwm     4(s_addr),oddside

/* Main loop body.  Entry expects evenside still to be stored, oddside
   just loaded. */
loop:
        stwm     evenside,4(d_addr)
        uxor,nbz oddside,r0,save

/* mid loop entry */
twoatatime:
        b,n      nullfound
        ldwm     4(s_addr),evenside
        stwm     oddside,4(d_addr)
        uxor,sbz evenside,r0,save
        b        loop
        ldwm     4(s_addr),oddside

/* fall through when null found in evenside.  oddside actually loaded */
nullfound:				/* adjust d_addr and store final word */

	extru,<>	save,7,8,r0         /* pick up leftmost byte */
	addib,tr,n	1,d_addr,store_final
	extru,<>	save,15,8,r0
	addib,tr,n	2,d_addr,store_final
	extru,<> 	save,23,8,r0
	addib,tr	3,d_addr,store_final2
	bv		0(rp)
	stw		save,0(d_addr)

store_final:
	bv		0(rp)
store_final2:
	stbys,e		save,0(d_addr) 	/* delay slot */
	
case_analysis:

        blr         tmp6,r0
        nop

	/* NOTE: the delay slots for the non-aligned cases load a   */
	/* shift quantity which is TGT-SRC into tmp3.               */
        /* Note also, the case for both strings being word aligned  */
	/* is already checked before the BLR is executed, so that   */
	/* case can never occur.                                    */

                                       /* TGT SRC */
        nop                            /* 00  00  can't happen */
        nop
        b           neg_aligned_copy   /* 00  01  */
	ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 00  10  */
	ldi         -2,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 00  11  */
	ldi         -3,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 01  00  */
	ldi         1,tmp3            /* load shift quantity. delay slot */
        b           equal_alignment_1  /* 01  01  */
        ldbs,ma     1(s_addr),tmp1
        b           neg_aligned_copy   /* 01  10  */
	ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 01  11  */
	ldi         -2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 10  00  */
	ldi         2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 10  01  */
	ldi         1,tmp3            /* load shift quantity. delay slot */
        b           equal_alignment_2  /* 10  10  */
        ldhs,ma     2(s_addr),tmp1
        b           neg_aligned_copy   /* 10  11  */
	ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 11  00  */
	ldi         3,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 11  01  */
	ldi         2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 11  10  */
	ldi         1,tmp3            /* load shift quantity. delay slot */
        ldbs,ma     1(s_addr),tmp1     /* 11  11  */
        comiclr,<>  r0,tmp1,r0
        bv          0(rp)              /* return if 1st byte was null */
        stbs,ma     tmp1,1(d_addr)     /* store a byte to dst string  */
        b           bothaligned       /* can now goto word_aligned   */
        ldwm        4(s_addr),oddside     /* load next word of source    */

equal_alignment_1:
        comiclr,<>  r0,tmp1,r0      /* nullify next if tmp1 <> 0  */
        bv          0(rp)           /* return if null byte found  */
        stbs,ma     tmp1,1(d_addr)  /* store a byte to dst string */
        ldhs,ma     2(s_addr),tmp1  /* load next halfword         */
equal_alignment_2:
        extru,<>    tmp1,23,8,tmp6  /* look at left byte of halfword */
        bv          0(rp)           /* return if 1st byte was null */
        stbs,ma     tmp6,1(d_addr)
        extru,<>    tmp1,31,8,r0
        bv          0(rp)           /* return if 2nd byte was null */
        stbs,ma     tmp1,1(d_addr)
        b           bothaligned
        ldwm        4(s_addr),oddside  /* load next word              */

/* source and destination are not aligned, so we do it the hard way. */

/* target alignment is greater than source alignment */
pos_aligned_copy0:
	addi		-4,s_addr,s_addr
pos_aligned_copy:
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
        extru       s_addr,31,2,tmp1   /* Extract low 2 bits of the src addr */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        sh3add		tmp3,r0,tmp4        /* compute shift amt */
        ldwm        	4(0,s_addr),tmp2    /* get 1st source word */
	sh3add		tmp1,r0,save  	    /* setup mask shift amount */
	mtctl		save,r11	    /* set-up cr11 for mask */
	zvdepi		-2,32,save	    /* create mask */
	or		save,tmp2,tmp2	    /* mask unused bytes in src */
	ldi		-1,tmp1		    /* load tmp1 with 0xffffffff */
        mtctl        	tmp4,r11            /* shift count -> shift count reg */
        vshd        	tmp1,tmp2,tmp3      /* position data ! */
	uxor,nbz	tmp3,r0,save
	b,n		first_null
	uxor,nbz	tmp2,r0,save
	b		nullfound1
        mtctl        	tmp4,r11            /* re-load shift cnt (delay slot) */
	b		loop_entry
        ldwm        	4(0,s_addr),tmp1    /* get next word. delay slot */

neg_aligned_copy:
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
	extru	    s_addr,31,2,tmp2   /* Extract low 2 bits of the src addr */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        sh3add		tmp3,r0,tmp4        /* compute shift amt */
        ldwm         	4(0,s_addr),tmp1    /* load first word from source. */
/* check to see if next word can be read safely */
	sh3add		tmp2,r0,save
        mtctl        	save,r11            /* shift count -> shift count reg */
	zvdepi		-2,32,save
	or		save, tmp1, tmp1
	uxor,nbz	tmp1,r0,save	    /* any nulls in first word? */
	b		first_null0
	mtctl		tmp4,r11
        ldwm        	4(0,s_addr),tmp2    /* load second word from source */
	combt,=		tmp6,r0,chunk1      /* don't mask if whole word valid */
        vshd        	tmp1,tmp2,tmp3      /* position data ! */
	sh3add		tmp6,r0,save  	    /* setup r1 */
	mtctl		save,r11	    /* set-up cr11 for mask */
	zvdepi		-2,32,save
	or		save, tmp3, tmp3
	uxor,nbz	tmp3,r0,save
	b,n		first_null
	uxor,nbz	tmp2,r0,save
	b		nullfound1
        mtctl        	tmp4,r11            /* re-load shift cnt (delay slot) */
	b		loop_entry
        ldwm        	4(0,s_addr),tmp1    /* get next word. delay slot */

chunk1:
	uxor,nbz	tmp2,r0,save
	b		nullfound0
	vshd		tmp1,tmp2,tmp3
did_mask:
        ldwm        	4(0,s_addr),tmp1    /* get next word !  */
loop_entry:
        stbys,b,m   	tmp3,4(0,d_addr)    /* store !  */

	uxor,nbz	tmp1, r0, save
	b		nullfound2
        vshd        	tmp2,tmp1,tmp3      /* position data !  */
	ldwm		4(s_addr),tmp2
	stwm		tmp3,4(d_addr)
	uxor,sbz	tmp2,r0,save
	b		did_mask
nullfound0:
	vshd		tmp1,tmp2,tmp3	    /* delay slot */
	uxor,nbz	tmp3,r0,save
	b,n		nullfound
nullfound1:
	stbys,b,m	tmp3,4(0,d_addr)
	b		nullfound
	vshd		tmp2,r0,save	    /* delay slot */

nullfound2:
	uxor,nbz	tmp3,r0,save
	b,n		nullfound
	stwm		tmp3,4(d_addr)
	b		nullfound
	/* notice that delay slot is in next routine */

first_null0:	/* null found in first word of non-aligned (wrt d_addr) */
	vshd		tmp1,r0,save	    /* delay slot */
	combt,=		tmp6,r0,check4
	extru		save,7,8,tmp4
first_null:
	addibt,=	-1,tmp6,check3	/* check last 3 bytes of word */
	extru   	save,15,8,tmp4
	addibt,=,n	-1,tmp6,check2	/* check last 2 bytes */
	bv		0(rp)		/* null in last byte--store and exit */
	stbys,b		save, 0(d_addr)

check4:
	combt,=		tmp4,r0,done
	stbs,ma		tmp4,1(d_addr)
	extru,<>	save,15,8,tmp4
check3:
	combt,=		tmp4,r0,done
	stbs,ma		tmp4,1(d_addr)
check2:
	extru,<>	save,23,8,tmp4
	bv		0(rp)
	stbs,ma		tmp4,1(d_addr)
	bv		0(rp)
	stbs		r0,0(d_addr)

done:    
EXIT(strcpy)