Welcome to mirror list, hosted at ThFree Co, Russian Federation.

a_triblt_sse2.asm « source « Kasumi « VirtualDub « thirdparty « src - github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 54514b31780b6ad393dc28621aeea0ba4a2aec3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
		segment	.rdata, align=16

correct		dq			0000800000008000h, 0000800000008000h
round		dq			0000200000002000h, 0000200000002000h
round1		dq			0000020000000200h, 0000020000000200h
round2		dq			0002000000020000h, 0002000000020000h

		segment	.text

		%include	"a_triblt.inc"

		extern		_kVDCubicInterpTableFX14_075_MMX

;--------------------------------------------------------------------------
	global	_vdasm_triblt_span_bicubic_mip_linear_sse2
_vdasm_triblt_span_bicubic_mip_linear_sse2:

;parameters
%define .p_texinfo	20

;aligned frame
%define .af_vtemp0	0
%define .af_mipbase	16
%define	.af_prevesp	20
%define .afsize		24

		push		ebp
		lea			ebp, [esp-12]
		push		edi
		push		esi
		push		ebx
		
		sub			esp, .afsize
		and			esp, -16
		
		mov			[esp + .af_prevesp], ebp
		
		mov			ebx, [ebp + .p_texinfo]
		mov			ebp, [ebx + texinfo.dst]
		mov			esi, [ebx + texinfo.w]
		shl			esi, 2
		add			ebp,esi
		neg			esi

		mov			edi, [ebx + texinfo.src]
		mov			[esp + .af_mipbase], ebx
		pxor		xmm7, xmm7

.xloop:

		;registers:
		;	eax		base texel address
		;	ebx		first mip info
		;	ecx		horizontal filter
		;	edx		vertical filter
		;	esi		horizontal count
		;	edi		mipspan
		;	ebp		destination

%macro .SETUPADDR 1
		;compute mipmap index and UV
		movd		xmm0, [edi + mipspan.u]
		movd		xmm1, [edi + mipspan.v]
		punpckldq	xmm0, xmm1
		mov			ebx, [edi + mipspan.lambda]
		shr			ebx, 4
		and			ebx, byte -16
		
		add			ebx, mipmap_size*%1
		movd		xmm2, ebx
		add			ebx, [esp + .af_mipbase]
		psrlq		xmm2, 4
		psrad		xmm0, xmm2
		paddd		xmm0, [correct]
		pshufd		xmm1, xmm0, 01010101b

		;compute horizontal filters
		movd		ecx, xmm0
		shr			ecx, 4
		and			ecx, 0ff0h
		add			ecx, _kVDCubicInterpTableFX14_075_MMX
		
		;compute vertical filter
		movd		edx, xmm1
		and			edx, 0ff00h
		shr			edx, 4
		add			edx, _kVDCubicInterpTableFX14_075_MMX

		;compute texel address
		movd		xmm1, [ebx + mipmap.uvmul]
		psrld		xmm0, 16
		packssdw	xmm0, xmm0
		pmaddwd		xmm0, xmm1
		movd		eax, xmm0
		add			eax, [ebx + mipmap.bits]
%endmacro
		
%macro .HCUBIC 4
		movd		%1, dword [eax]
		movd		%3, dword [eax+4]
		movd		%2, dword [eax+8]
		movd		%4, dword [eax+12]		
		punpcklbw	%1, %3
		punpcklbw	%2, %4
		punpcklbw	%1, xmm7
		punpcklbw	%2, xmm7
		movdqa		%3, [ecx]
		pshufd		%4, %3, 11101110b
		pshufd		%3, %3, 01000100b
		pmaddwd		%1, %3
		pmaddwd		%2, %4
		paddd		%1, %2
%endmacro

%macro	.VCUBIC		1
		.HCUBIC		xmm0, xmm4, xmm5, xmm6
		add			eax, %1		
		.HCUBIC		xmm1, xmm4, xmm5, xmm6
		add			eax, %1
		.HCUBIC		xmm2, xmm4, xmm5, xmm6
		add			eax, %1		
		.HCUBIC		xmm3, xmm4, xmm5, xmm6
		
		movq		xmm4, [round1]
		
		paddd		xmm0, xmm4
		
		paddd		xmm1, xmm4
		psrad		xmm0, 10
		
		paddd		xmm2, xmm4
		psrad		xmm1, 10
		packssdw	xmm0, xmm0
		
		paddd		xmm3, xmm4
		psrad		xmm2, 10
		packssdw	xmm1, xmm1

		movdqa		xmm5, [edx]
		psrad		xmm3, 10		
		punpcklwd	xmm0, xmm1

		packssdw	xmm2, xmm2
		packssdw	xmm3, xmm3
		pshufd		xmm4, xmm5, 01000100b				

		pmaddwd		xmm0, xmm4
		punpcklwd	xmm2, xmm3

		pshufd		xmm5, xmm5, 11101110b
		
		pmaddwd		xmm2, xmm5
		paddd		xmm0, xmm2
		paddd		xmm0, [round2]
		psrad		xmm0, 18

		packssdw	xmm0, xmm0
%endmacro

		;fetch mipmap 1
		.SETUPADDR	0
		.VCUBIC		[ebx+mipmap.pitch]
		
		movq		[esp + .af_vtemp0], xmm0

		;fetch mipmap 2		
		.SETUPADDR	1
		.VCUBIC		[ebx+mipmap.pitch]
		
		;blend mips
		movq		xmm1, [esp + .af_vtemp0]
		
		psubw		xmm0, xmm1

		movd		xmm3, [edi+mipspan.lambda]
		pshuflw		xmm3, xmm3, 0
		psllw		xmm3, 8
		psrlq		xmm3, 1
		
		paddw		xmm0, xmm0
		pmulhw		xmm0, xmm3
		paddw		xmm0, xmm1
		packuswb	xmm0, xmm0

		movd		dword [ebp+esi], xmm0
		add			edi, mipspan_size
		add			esi,4
		jnc			.xloop

		mov			esp, [esp + .af_prevesp]
		pop			ebx
		pop			esi
		pop			edi
		pop			ebp
		ret

		end