Welcome to mirror list, hosted at ThFree Co, Russian Federation.

strncat.S « hppa « machine « libc « newlib - cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 04bd156c96c30b6b3db94ec2c525c262aad3faf9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/*
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
 *
 *  To anyone who acknowledges that this file is provided "AS IS"
 *  without any express or implied warranty:
 *      permission to use, copy, modify, and distribute this file
 *  for any purpose is hereby granted without fee, provided that
 *  the above copyright notice and this notice appears in all
 *  copies, and that the name of Hewlett-Packard Company not be
 *  used in advertising or publicity pertaining to distribution
 *  of the software without specific, written prior permission.
 *  Hewlett-Packard Company makes no representations about the
 *  suitability of this software for any purpose.
 */

/*HPUX_ID:	@(#) $Revision$	*/
/* strncat(s1,s2,n) : concatonate at most n characters from s2 onto s1 */

#include "DEFS.h"

#define	d_addr  r26
#define	s_addr  r25
#define	count   r24
#define	tmp1    r19
#define	tmp2    r20
#define	tmp3    r21
#define	tmp4    r22
#define	tmp5	arg3
#define tmp6	r31
#define	save	r1
#define tmp7	ret1	/* source offset-- reset to orig source addr if not aligned */


ENTRY(strncat)

	comb,=		r0,s_addr,quit	/* quit if s2=NULL */
        copy      d_addr,ret0          /* The return value is the value of d_addr. DELAY SLOT*/

/* First look for end of s1 (d_addr) */

        extru       d_addr,31,2,tmp1   /* Extract the low two bits of the dest address. */
	combt,=		tmp1,r0,dont_mask
	dep		0,31,2,d_addr	/*set word alignment */
	ldwm		4(d_addr),tmp2
	sh3add		tmp1,r0,save	/* build mask based on tmp1 */
	mtctl		save,11
	zvdepi		-2,32,save
	or		save,tmp2,tmp2
	uxor,nbz	tmp2,r0,save
search:
	b,n		found_end	/* nullified under uxor conditions above and below */
dont_mask:
	ldwm		4(d_addr),tmp2
	comib,tr	r0,r0,search
	uxor,nbz	tmp2,r0,save

found_end:				/* at this point d_addr points to word */
	extru,<>	save,7,8,r0	/* following word with null */
	addib,tr,n	-4,d_addr,begin_copy	/*set d_addr to end of s1 */
	extru,<>	save,15,8,r0
	addib,tr,n	-3,d_addr,begin_copy
	extru,<>	save,23,8,r0
	addi		-1,d_addr,d_addr
	addi		-1,d_addr,d_addr


begin_copy:
        addibt,<,n  -4,count,byteloop     /* If count is <= 4 don't get fancy.*/

        extru       s_addr,31,2,tmp4   /* Extract the low two bits of the source address.*/
        extru       d_addr,31,2,tmp5   /* Extract the low two bits of the destination address.*/
        add         count,tmp5,count   /* pre increment the count by the byte address so that the count is*/
	copy		s_addr,tmp6	/* save original s_addr in case we find null in first word */
	copy	     s_addr, tmp7	/* save s_addr in case we find null before first store */
        comb,<>       tmp5,tmp4,not_aligned /* branch if tmp5<>tmp4. */
        dep         0,31,2,s_addr      /* Compute the word address of the source.  DELAY SLOT.*/
/* aligned*/
	combt,=		tmp5,r0,skip_mask
        ldwm        4(0,s_addr),tmp1   /* tmp1 = *s_addr   s_addr += 4 (DELAY SLOT)*/
	sh3add		tmp5,r0,save	/* compute mask in save*/
	mtctl		save,11
	zvdepi		-2,32,save
	or		save,tmp1,tmp1  /* or mask with data*/
	uxor,nbz	tmp1,r0,save 	/* check for null*/
	b,n		null1
	addibt,<	-4,count,back_porch
        stbys,b,m   tmp1,4(0,d_addr)   /* store word (delay slot)*/

chunks:
	ldwm		4(0,s_addr),tmp1 /* get a word*/

skip_mask:
	uxor,nbz	tmp1,r0,save 	/* check for null*/
	b,n		align_null1
	addibf,<	-4,count,chunks
        stbys,b,m   tmp1,4(0,d_addr)   /* store word (delay slot)*/

back_porch:				   /* last word to store*/
         addibt,=,n  4,count,done       /* if count = 0 we're, of course, done !*/
         ldws        0(s_addr),tmp1     /* load up the back_porch*/
	 sh3add		count,r0, save	/* setup right mask based on count*/
	 mtctl		save,r11
	 zvdepi		-2,32,save	/*save now has left-hand mask*/
	 uaddcm		r0,save,save	/*form right hand mask */
	 or		tmp1,save,tmp1	/*and insert data*/
	 uxor,nbz	tmp1,r0,save	/* check for null*/
	 b,n 		null2
         add         d_addr,count,d_addr/* final store address  is +1 too high !*/
	 b		done
	 stbys,e	tmp1,0(d_addr)	/* done */

/* Begin non_aligned code. */
not_aligned:
        sub,>=       tmp5,tmp4,tmp6     /* compute the shift amt.and skip load if tmp5 > tmp4.*/
        ldwm         4(0,s_addr),tmp1   /* load up the first word from the source. tmp1 = *s_addr++*/
        zdep         tmp6,28,29,tmp4    /* compute the number of bits to shift */
        mtctl        tmp4,11            /* load the shift count into cr11 = shift count register.*/
        addibt,<,n   -4,count,chkchnk2 /* first step in pre adjustment of count for looping.*/

        ldwm        4(0,s_addr),tmp2    /* get either first or second word from source. */
	combt,=		tmp5,r0,skip_mask4 /* don't mask if whole word is valid*/
        vshd        tmp1,tmp2,tmp3      /* position data !  (delay slot)*/
	sh3add		tmp5,r0,save	/* setup r1*/
	mtctl		save,r11	/* setup mask in save*/
	zvdepi		-2,32,save
	or		save, tmp3, tmp3
        mtctl           tmp4,11            /* re-load the shift count into cr11 */

skip_mask4:
	uxor,nbz	tmp3, r0, save
	b,n		null4		/* special case for first word */
	copy 		r0, tmp5	/* zero out tmp5 so we don't try to mask again*/
	copy		r0, tmp7	/* zero out tmp7 so we don't try to use original s_addr anymore */
	b		continue
        stbys,b,m   tmp3,4(0,d_addr)    /* store ! */

chunk2:
        ldwm        4(0,s_addr),tmp2
        vshd        tmp1,tmp2,tmp3 

skip_mask2:
	uxor,nbz	tmp3, r0, save
	b,n		null3
        stbys,b,m   tmp3,4(0,d_addr)    /* store ! */

continue:
        ldwm        4(0,s_addr),tmp1    /* get 2nd word ! */
        vshd        tmp2,tmp1,tmp3      /* position data ! */
	uxor,nbz	tmp3, r0, save
	b,n		null3

        addibf,<    -8,count,chunk2    /* If count is still >= 8 do another loop.*/
        stbys,b,m   tmp3,4(0,d_addr)    /* store !*/

chkchnk2:
        addibt,<,n  4,count,bp_0       /* if we don't have 4 bytes left then do the back porch (bp_0)*/

subchnk2: /* we have less than 8 chars to copy*/

        ldwm        4(0,s_addr),tmp2    /* get next word !*/
	combt,=		tmp5,r0,skip_mask3
        vshd        tmp1,tmp2,tmp3      /* position data !*/
	sh3add		tmp5,r0,save	/* setup r1*/
	mtctl		save,r11	/* setup mask in save*/
	zvdepi		-2,32,save
	or		save, tmp3, tmp3
	mtctl		tmp4,11		/* restore shift value again */
skip_mask3:
	uxor,nbz	tmp3,r0,save
	b,n		null3
	copy		r0,tmp5   /* zero out tmp5 so null3 does correct alignment */
	copy		r0,tmp7	  /* zero out tmp7 so we don't use orignal s_addr since no longer valid */
	b		bp_1 /* we now have less than 4 bytes to move*/
        stbys,b,m   tmp3,4(0,d_addr)    /* store !*/

bp_0:    
	copy		tmp1,tmp2	/* switch registers for shift process */
	addibt,<=,n  4,count,done        /* if count = -4 this implies that count = 0 -> done */

bp_1:
        ldwm        4(0,s_addr),tmp1    /* get final word !        */
        vshd        tmp2,tmp1,tmp3      /* position data !*/
	uxor,nbz	tmp3,r0,save	/* if no-byte-zero */
	b,n		bp_null		/* don't goto no_null-find which null instead */
no_null:
	add	    d_addr,count,d_addr	/* set up d_addr for stbys,e */
	b		done		/* were done*/
        stbys,e     tmp3,0(0,d_addr)    /* store the data !*/

/* here we do ye old byte-at-a-time moves.*/
align_null1:
	b	byteloop
	addi	-4,s_addr,s_addr
null1:
	copy		tmp6,s_addr		/* restore orig s_addr (aligned only) */
byteloop:
	addibt,=     4,count,done
null2:
        ldbs,ma     1(s_addr),tmp1
encore:
	combt,=,n	tmp1,r0, done
        stbs,ma     tmp1,1(d_addr) 
        addibf,=,n  -1,count,encore
        ldbs,ma     1(s_addr),tmp1
	b,n		done

bp_null:
	addi	-4,count,count		/* fudge count 'cause byteloop will re-increment */

null3:	/* not_aligned case reset s_addr and finish byte-wise */
	combt,=,n  r0,tmp7,null3a	/* if tmp7 is not valid address then branch below */
	b 	byteloop		  	/* otherwise reset s_addr to tmp7 and finish */
	copy 	tmp7, s_addr

null3a: /* right shift target */
	addibt,<,n  0,tmp6,null3b	/* if left shifting */
	sub	r0,tmp6,tmp6		/* do null3b code */
	addi	-4,tmp6,tmp6
	b	byteloop
	add	tmp6,s_addr,s_addr	/* reset s_addr by 4 + shift_amt */

null3b:
	subi	-8,tmp6,tmp6
	add	tmp5,tmp6,tmp6  /* adjust by the dest offset if this is our first store */
	b	byteloop
	add	tmp6,s_addr,s_addr	/* adjust s_addr by (8-shift_amt-dest_off) */

null4:
	add,>	tmp6,r0,tmp6		/* if left shift */
	b,n	null3			/* then do null3 */
	b	byteloop
	addi	-4,s_addr,s_addr	/* adj source only by 4 */
	
done:    
	bv		0(r2)
	stbs		r0,0(d_addr)
quit:
EXIT(strncat)