Welcome to mirror list, hosted at ThFree Co, Russian Federation.

convert_a.asm « DSUtil « src - github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3cc9c7308b44a4a6ba49b8c1718de22a45a0cea2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
; Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
; http://www.avisynth.org
;
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
; http://www.gnu.org/copyleft/gpl.html .
;
; Linking Avisynth statically or dynamically with other modules is making a
; combined work based on Avisynth.  Thus, the terms and conditions of the GNU
; General Public License cover the whole combination.
;
; As a special exception, the copyright holders of Avisynth give you
; permission to link Avisynth with independent modules that communicate with
; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
; terms of these independent modules, and to copy and distribute the
; resulting combined work under terms of your choice, provided that
; every copy of the combined work is accompanied by a complete copy of
; the source code of Avisynth (the version of Avisynth used to produce the
; combined work), being distributed under the terms of the GNU General
; Public License plus this exception.  An independent module is a module
; which is not derived from or based on Avisynth, such as 3rd-party filters,
; import and export plugins, or graphical user interfaces.

	.586
	.mmx
	.model	flat

; alignment has to be 'page' so that I can use 'align 32' below

_TEXT64	segment	page public use32 'CODE'

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

	align	8

yuv2rgb_constants_rec601:

x0000_0000_0010_0010	dq	00000000000100010h		;    16
x0080_0080_0080_0080	dq	00080008000800080h		;   128
x00FF_00FF_00FF_00FF	dq	000FF00FF00FF00FFh
x00002000_00002000		dq	00000200000002000h		;  8192        = (0.5)<<14
xFF000000_FF000000		dq	0FF000000FF000000h
cy						dq	000004A8500004A85h		; 19077        = (255./219.)<<14+0.5
crv						dq	03313000033130000h		; 13075        = ((1-0.299)*255./112.)<<13+0.5
cgu_cgv					dq	0E5FCF377E5FCF377h		; -6660, -3209 = ((K-1)*K/0.587*255./112.)<<13-0.5, K=(0.299, 0.114)
cbu						dq	00000408D0000408Dh		;        16525 = ((1-0.114)*255./112.)<<13+0.5

yuv2rgb_constants_PC_601:

						dq	00000000000000000h		;     0       
						dq	00080008000800080h		;   128       
						dq	000FF00FF00FF00FFh                    
						dq	00000200000002000h		;  8192        = (0.5)<<14
						dq	0FF000000FF000000h                    
						dq	00000400000004000h		; 16384        = (1.)<<14+0.5                                
						dq	02D0B00002D0B0000h		; 11531        = ((1-0.299)*255./127.)<<13+0.5                      
						dq	0E90FF4F2E90FF4F2h		; -5873, -2830 = (((K-1)*K/0.587)*255./127.)<<13-0.5, K=(0.299, 0.114)
						dq	0000038ED000038EDh		;        14573 = ((1-0.114)*255./127.)<<13+0.5                      

yuv2rgb_constants_rec709:

						dq	00000000000100010h		;    16       
						dq	00080008000800080h		;   128       
						dq	000FF00FF00FF00FFh                    
						dq	00000200000002000h		;  8192        = (0.5)<<14
						dq	0FF000000FF000000h                    
						dq	000004A8500004A85h		; 19077        = (255./219.)<<14+0.5
						dq	0395E0000395E0000h		; 14686        = ((1-0.2126)*255./112.)<<13+0.5
						dq	0EEF2F92DEEF2F92Dh		; -4366, -1747 = ((K-1)*K/0.7152*255./112.)<<13-0.5, K=(0.2126, 0.0722)
						dq	00000439900004399h		;        17305        = ((1-0.0722)*255./112.)<<13+0.5       

yuv2rgb_constants_PC_709:

						dq	00000000000000000h		;     0       
						dq	00080008000800080h		;   128       
						dq	000FF00FF00FF00FFh                    
						dq	00000200000002000h		;  8192        = (0.5)<<14
						dq	0FF000000FF000000h                    
						dq	00000400000004000h		; 16384        = (1.)<<14+0.5                                
						dq	03298000032980000h		; 12952        = ((1-0.2126)*255./127.)<<13+0.5                      
						dq	0F0F6F9FBF0F6F9FBh		; -3850, -1541 = (((K-1)*K/0.7152)*255./127.)<<13-0.5, K=(0.2126, 0.0722)
						dq	000003B9D00003B9Dh		;        15261 = ((1-0.0722)*255./127.)<<13+0.5                      

ofs_x0000_0000_0010_0010 = 0
ofs_x0080_0080_0080_0080 = 8
ofs_x00FF_00FF_00FF_00FF = 16
ofs_x00002000_00002000 = 24
ofs_xFF000000_FF000000 = 32
ofs_cy = 40
ofs_crv = 48
ofs_cgu_cgv = 56
ofs_cbu = 64

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

GET_Y	MACRO	mma,uyvy
IF &uyvy
	psrlw		mma,8
ELSE
	pand		mma,[edx+ofs_x00FF_00FF_00FF_00FF]
ENDIF
	ENDM

GET_UV	MACRO	mma,uyvy
	GET_Y		mma,1-uyvy
	ENDM

YUV2RGB_INNER_LOOP	MACRO	uyvy,rgb32,no_next_pixel

;; This YUV422->RGB conversion code uses only four MMX registers per
;; source dword, so I convert two dwords in parallel.  Lines corresponding
;; to the "second pipe" are indented an extra space.  There's almost no
;; overlap, except at the end and in the three lines marked ***.
;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations

	movd		mm0, DWORD PTR [esi] ; DWORD PTR for compatibility woth masm8
	 movd		 mm5, DWORD PTR [esi+4]
	movq		mm1,mm0
	GET_Y		mm0,&uyvy	; mm0 = __________Y1__Y0
	 movq		 mm4,mm5
	GET_UV		mm1,&uyvy	; mm1 = __________V0__U0
	 GET_Y		 mm4,&uyvy	; mm4 = __________Y3__Y2
	movq		mm2,mm5		; *** avoid reload from [esi+4]
	 GET_UV		 mm5,&uyvy	; mm5 = __________V2__U2
	psubw		mm0,[edx+ofs_x0000_0000_0010_0010]	; (Y-16)
	 movd		 mm6, DWORD PTR [esi+8-4*(no_next_pixel)]
	GET_UV		mm2,&uyvy	; mm2 = __________V2__U2
	 psubw		 mm4,[edx+ofs_x0000_0000_0010_0010]	; (Y-16)
	paddw		mm2,mm1		; 2*UV1=UV0+UV2
	 GET_UV		 mm6,&uyvy	; mm6 = __________V4__U4
	psubw		mm1,[edx+ofs_x0080_0080_0080_0080]	; (UV-128)
	 paddw		 mm6,mm5	; 2*UV3=UV2+UV4
	psllq		mm2,32
	 psubw		 mm5,[edx+ofs_x0080_0080_0080_0080]	; (UV-128)
	punpcklwd	mm0,mm2		; mm0 = ______Y1______Y0
	 psllq		 mm6,32
	pmaddwd		mm0,[edx+ofs_cy]	; (Y-16)*(255./219.)<<14
	 punpcklwd	 mm4,mm6
	paddw		mm1,mm1		; 2*UV0=UV0+UV0
	 pmaddwd	 mm4,[edx+ofs_cy]
	 paddw		 mm5,mm5	; 2*UV2=UV2+UV2
	paddw		mm1,mm2		; mm1 = __V1__U1__V0__U0 * 2
	paddd		mm0,[edx+ofs_x00002000_00002000]	; +=0.5<<14
	 paddw		 mm5,mm6	; mm5 = __V3__U3__V2__U2 * 2
	movq		mm2,mm1
	 paddd		 mm4,[edx+ofs_x00002000_00002000]	; +=0.5<<14
	movq		mm3,mm1
	 movq		 mm6,mm5
	pmaddwd		mm1,[edx+ofs_crv]
	 movq		 mm7,mm5
	paddd		mm1,mm0
	 pmaddwd	 mm5,[edx+ofs_crv]
	psrad		mm1,14		; mm1 = RRRRRRRRrrrrrrrr
	 paddd		 mm5,mm4
	pmaddwd		mm2,[edx+ofs_cgu_cgv]
	 psrad		 mm5,14
	paddd		mm2,mm0
	 pmaddwd	 mm6,[edx+ofs_cgu_cgv]
	psrad		mm2,14		; mm2 = GGGGGGGGgggggggg
	 paddd		 mm6,mm4
	pmaddwd		mm3,[edx+ofs_cbu]
	 psrad		 mm6,14
	paddd		mm3,mm0
	 pmaddwd	 mm7,[edx+ofs_cbu]
       add	       esi,8
       add	       edi,12+4*rgb32
IFE &no_next_pixel
       cmp	       esi,ecx
ENDIF
	psrad		mm3,14		; mm3 = BBBBBBBBbbbbbbbb
	 paddd		 mm7,mm4
	pxor		mm0,mm0
	 psrad		 mm7,14
	packssdw	mm3,mm2	; mm3 = GGGGggggBBBBbbbb
	 packssdw	 mm7,mm6
	packssdw	mm1,mm0	; mm1 = ________RRRRrrrr
	 packssdw	 mm5,mm0	; *** avoid pxor mm4,mm4
	movq		mm2,mm3
	 movq		 mm6,mm7
	punpcklwd	mm2,mm1	; mm2 = RRRRBBBBrrrrbbbb
	 punpcklwd	 mm6,mm5
	punpckhwd	mm3,mm1	; mm3 = ____GGGG____gggg
	 punpckhwd	 mm7,mm5
	movq		mm0,mm2
	 movq		 mm4,mm6
	punpcklwd	mm0,mm3	; mm0 = ____rrrrggggbbbb
	 punpcklwd	 mm4,mm7
IFE &rgb32
	psllq		mm0,16
	 psllq		 mm4,16
ENDIF
	punpckhwd	mm2,mm3	; mm2 = ____RRRRGGGGBBBB
	 punpckhwd	 mm6,mm7
	packuswb	mm0,mm2	; mm0 = __RRGGBB__rrggbb <- ta dah!
	 packuswb	 mm4,mm6

IF &rgb32
	por mm0, [edx+ofs_xFF000000_FF000000]	 ; set alpha channels "on"
	 por mm4, [edx+ofs_xFF000000_FF000000]
	movq	[edi-16],mm0	; store the quadwords independently
	 movq	 [edi-8],mm4
ELSE
	psrlq	mm0,8		; pack the two quadwords into 12 bytes
	psllq	mm4,8		; (note: the two shifts above leave
	movd	DWORD PTR [edi-12],mm0	; mm0,4 = __RRGGBBrrggbb__)
	psrlq	mm0,32
	por	mm4,mm0
	movd	DWORD PTR [edi-8],mm4
	psrlq	mm4,32
	movd	DWORD PTR [edi-4],mm4
ENDIF

	ENDM

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

YUV2RGB_PROC	MACRO	procname,uyvy,rgb32

	PUBLIC	C _&procname

;;void __cdecl procname(
;;	[esp+ 4] const BYTE* src,
;;	[esp+ 8] BYTE* dst,
;;	[esp+12] const BYTE* src_end,
;;	[esp+16] int src_pitch,
;;	[esp+20] int row_size,
;;	[esp+24] rec709 matrix);  0=rec601, 1=rec709, 3=PC_601, 7=PC_709

_&procname	PROC

	push	esi
	push	edi
	push	ebx

	mov	eax,[esp+16+12]		; src_pitch
	mov	esi,[esp+12+12]		; src_end - read source bottom-up
	mov	edi,[esp+8+12]		; dstp
	mov	ebx,[esp+20+12]		; row_size
	mov	edx,offset yuv2rgb_constants_rec601
	test	byte ptr [esp+24+12],1
	jz	loop0
	mov	edx,offset yuv2rgb_constants_rec709

	test	byte ptr [esp+24+12],2
	jz	loop0
	mov	edx,offset yuv2rgb_constants_PC_601

	test	byte ptr [esp+24+12],4
	jz	loop0
	mov	edx,offset yuv2rgb_constants_PC_709

loop0:
	sub	esi,eax
	lea	ecx,[esi+ebx-8]

	align 32
loop1:
	YUV2RGB_INNER_LOOP	uyvy,rgb32,0
	jb	loop1

	YUV2RGB_INNER_LOOP	uyvy,rgb32,1

	sub	esi,ebx
	cmp	esi,[esp+4+12]		; src
	ja	loop0

	emms
	pop	ebx
	pop	edi
	pop	esi
	retn

_&procname	ENDP

	ENDM

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

YUV2RGB_PROC	mmx_YUY2toRGB24,0,0
YUV2RGB_PROC	mmx_YUY2toRGB32,0,1

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

	END