Welcome to mirror list, hosted at ThFree Co, Russian Federation.

a_spanutils_isse.asm « source « Kasumi « VirtualDub « thirdparty « src - github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3fe7cedbc59720f979bbb79fb34d0458962f47af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
		section	.rdata, rdata, align=16

xfefefefefefefefe	dq	0fefefefefefefefeh
xe0e0e0e0e0e0e0e0	dq	0e0e0e0e0e0e0e0e0h
x0002000200020002	dq	00002000200020002h

		section	.text

;==============================================================================
		global _vdasm_horiz_expand2x_coaligned_ISSE
_vdasm_horiz_expand2x_coaligned_ISSE:
		mov			ecx, [esp+8]
		mov			edx, [esp+4]
		mov			eax, [esp+12]
.xloop:
		movq		mm0, [ecx]
		movq		mm1, mm0
		pavgb		mm0, [ecx+1]
		movq		mm2, mm1
		punpcklbw	mm1, mm0
		punpckhbw	mm2, mm0

		movq		[edx], mm1
		movq		[edx+8], mm2
		add			edx, 16
		add			ecx, 8

		sub			eax, 16
		jne			.xloop
		ret

;==============================================================================
		global	_vdasm_vert_average_13_ISSE
_vdasm_vert_average_13_ISSE:
		push	ebx
		mov		ebx, [esp+12+4]
		mov		ecx, [esp+8+4]
		mov		edx, [esp+4+4]
		mov		eax, [esp+16+4]

		add		ebx, eax
		add		ecx, eax
		add		edx, eax
		neg		eax

		pcmpeqb	mm7, mm7
.xloop:
		movq	mm0, [ebx+eax]
		movq	mm1, [ecx+eax]
		movq	mm2, mm0

		movq	mm3, [ebx+eax+8]
		pxor	mm0, mm7
		pxor	mm1, mm7

		movq	mm4, [ecx+eax+8]
		movq	mm5, mm3
		pxor	mm3, mm7

		pxor	mm4, mm7
		pavgb	mm0, mm1
		pavgb	mm3, mm4

		pxor	mm0, mm7
		pxor	mm3, mm7
		pavgb	mm0, mm2

		movq	[edx+eax], mm0
		pavgb	mm3, mm5

		movq	[edx+eax+8], mm3
		add		eax, 16
		jne		.xloop

		pop		ebx
		ret

;==============================================================================
		global	_vdasm_vert_average_17_ISSE
_vdasm_vert_average_17_ISSE:
		push	ebx
		mov		ebx, [esp+12+4]
		mov		ecx, [esp+8+4]
		mov		edx, [esp+4+4]
		mov		eax, [esp+16+4]

		add		ebx, eax
		add		ecx, eax
		add		edx, eax
		neg		eax

		;r = avgup(avgdown(avgdown(a, b), a), a)
		;  = pavgb(~pavgb(pavgb(~a, ~b), ~a), a)
		
		pcmpeqb		mm7, mm7
.xloop:
		movq		mm0, [ecx+eax]
		movq		mm1, [ebx+eax]
		movq		mm2, mm0
		pxor		mm0, mm7			;~a
		pxor		mm1, mm7			;~b
		pavgb		mm1, mm0			;pavgb(~a, ~b) = ~avgdown(a, b)
		pavgb		mm1, mm0			;pavgb(~avgdown(a, b), ~a) = ~avgdown(avgdown(a, b), a)
		pxor		mm1, mm7			;avgdown(avgdown(a, b), a)
		pavgb		mm1, mm2			;pavgb(avgdown(avgdown(a, b), a), a) = round((7*a + b)/8)
		movq		[edx+eax], mm1
		
		add		eax, 8
		jne		.xloop

		pop		ebx
		ret

;==============================================================================
		global	_vdasm_vert_average_35_ISSE
_vdasm_vert_average_35_ISSE:
		push	ebx
		mov		ebx, [esp+12+4]
		mov		ecx, [esp+8+4]
		mov		edx, [esp+4+4]
		mov		eax, [esp+16+4]

		add		ebx, eax
		add		ecx, eax
		add		edx, eax
		neg		eax

		;r = avgup(avgdown(avgdown(a, b), b), a)
		;  = pavgb(~pavgb(pavgb(~a, ~b), ~b), a)
		
		pcmpeqb		mm7, mm7
.xloop:
		movq		mm0, [ecx+eax]
		movq		mm1, [ebx+eax]
		movq		mm2, mm0
		pxor		mm0, mm7		;~a
		pxor		mm1, mm7		;~b
		pavgb		mm0, mm1		;avgup(~a, ~b) = ~avgdown(a, b)
		pavgb		mm0, mm1		;avgup(~avgdown(a, b), ~b) = ~avgdown(avgdown(a, b), b)
		pxor		mm0, mm7		;avgdown(avgdown(a, b), b)
		pavgb		mm0, mm2		;avgup(avgdown(avgdown(a, b), b), a) = round((5*a + 3*b) / 8)
		movq		[edx+eax], mm0
		
		add		eax, 8
		jne		.xloop

		pop		ebx
		ret

;==============================================================================
		global	_vdasm_horiz_expand4x_coaligned_MMX
_vdasm_horiz_expand4x_coaligned_MMX:
		mov			edx, [esp+4]
		mov			ecx, [esp+8]
		mov			eax, [esp+12]
		movq		mm6, qword [x0002000200020002]
		pxor		mm7, mm7
.xloop:
		movd		mm0, [ecx]
		movd		mm1, [ecx+1]
		add			ecx, 4
		punpcklbw	mm0, mm7
		punpcklbw	mm1, mm7
		psubw		mm1, mm0		;x1
		movq		mm2, mm1
		paddw		mm1, mm6		;x1 + 2
		movq		mm3, mm1
		paddw		mm2, mm2		;x2
		paddw		mm3, mm2		;x3 + 2
		paddw		mm2, mm6		;x2 + 2
		psraw		mm1, 2			;x1/4
		psraw		mm2, 2			;x2/4
		psraw		mm3, 2			;x3/4
		paddw		mm1, mm0
		paddw		mm2, mm0
		paddw		mm3, mm0
		movd		mm0, [ecx-4]
		packuswb	mm1, mm1
		packuswb	mm2, mm2
		packuswb	mm3, mm3
		punpcklbw	mm0, mm1
		punpcklbw	mm2, mm3
		movq		mm1, mm0
		punpcklwd	mm0, mm2
		punpckhwd	mm1, mm2
		
		movq		[edx], mm0
		movq		[edx+8], mm1
		add			edx, 16
		sub			eax, 1
		jne			.xloop
		
		ret