Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/neutrinolabs/librfxcodec.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormirabilos <thorsten.glaser@teckids.org>2017-03-25 23:24:29 +0300
committermirabilos <thorsten.glaser@teckids.org>2017-03-25 23:24:29 +0300
commit4f940da706f2f0be9869022b128c23bea2dad645 (patch)
treebe4836979802a5ae6e6664bb5301c4c56f26dcb4
parent7b6e65ab7a8ec9ec708eccf29998b2bc45276410 (diff)
lsym() for amd64
-rw-r--r--src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm148
-rw-r--r--src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm148
-rw-r--r--src/common.asm23
3 files changed, 161 insertions, 158 deletions
diff --git a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm
index 8cf901e..c93add1 100644
--- a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm
+++ b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm
@@ -55,8 +55,8 @@ loop1a:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -66,8 +66,8 @@ loop1a:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -85,8 +85,8 @@ loop1a:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -247,8 +247,8 @@ loop1c:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -258,8 +258,8 @@ loop1c:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -277,8 +277,8 @@ loop1c:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -328,8 +328,8 @@ loop1c:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -339,8 +339,8 @@ loop1c:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -358,8 +358,8 @@ loop1c:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -423,8 +423,8 @@ loop1c1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -434,8 +434,8 @@ loop1c1:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -453,8 +453,8 @@ loop1c1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -501,8 +501,8 @@ loop1c1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -512,8 +512,8 @@ loop1c1:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -531,8 +531,8 @@ loop1c1:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -690,8 +690,8 @@ loop1e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -701,8 +701,8 @@ loop1e:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -720,8 +720,8 @@ loop1e:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -774,8 +774,8 @@ loop2e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -785,8 +785,8 @@ loop2e:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -804,8 +804,8 @@ loop2e:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -857,8 +857,8 @@ loop2e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -868,8 +868,8 @@ loop2e:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -887,8 +887,8 @@ loop2e:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -952,8 +952,8 @@ loop1e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -963,8 +963,8 @@ loop1e1:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -982,8 +982,8 @@ loop1e1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -1033,8 +1033,8 @@ loop2e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -1044,8 +1044,8 @@ loop2e1:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -1063,8 +1063,8 @@ loop2e1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -1113,8 +1113,8 @@ loop2e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
pslld xmm1, 16
pslld xmm2, 16
psrad xmm1, 16
@@ -1124,8 +1124,8 @@ loop2e1:
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
pslld xmm2, 16
pslld xmm3, 16
psrad xmm2, 16
@@ -1143,8 +1143,8 @@ loop2e1:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
pslld xmm3, 16
pslld xmm4, 16
psrad xmm3, 16
@@ -1207,9 +1207,9 @@ loop1f:
punpcklbw xmm1, xmm0
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
- psubw xmm1, [rel cw128]
- psubw xmm2, [rel cw128]
- psubw xmm3, [rel cw128]
+ psubw xmm1, [lsym(cw128)]
+ psubw xmm2, [lsym(cw128)]
+ psubw xmm3, [lsym(cw128)]
psllw xmm1, 5
psllw xmm2, 5
psllw xmm3, 5
@@ -1241,8 +1241,8 @@ loop2f:
movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
- psubw xmm2, [rel cw128]
- psubw xmm3, [rel cw128]
+ psubw xmm2, [lsym(cw128)]
+ psubw xmm3, [lsym(cw128)]
psllw xmm2, 5
psllw xmm3, 5
movdqa xmm4, xmm1
@@ -1274,7 +1274,7 @@ loop2f:
movdqa xmm1, xmm3 ; src[2n]
movq xmm2, [rsi + 64 * 1] ; src[2n + 1]
punpcklbw xmm2, xmm0
- psubw xmm2, [rel cw128]
+ psubw xmm2, [lsym(cw128)]
psllw xmm2, 5
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -1314,7 +1314,7 @@ set_quants_hi:
sub rax, 6 - 5
movd xmm9, eax
imul rax, 16
- lea rdx, [rel cwa0]
+ lea rdx, [lsym(cwa0)]
add rdx, rax
movdqa xmm8, [rdx]
ret
@@ -1323,7 +1323,7 @@ set_quants_lo:
sub rax, 6 - 5
movd xmm11, eax
imul rax, 16
- lea rdx, [rel cwa0]
+ lea rdx, [lsym(cwa0)]
add rdx, rax
movdqa xmm10, [rdx]
ret
diff --git a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm
index 34d8e50..fca391a 100644
--- a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm
+++ b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm
@@ -55,15 +55,15 @@ loop1a:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -77,8 +77,8 @@ loop1a:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -235,15 +235,15 @@ loop1c:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -257,8 +257,8 @@ loop1c:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -304,15 +304,15 @@ loop1c:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -326,8 +326,8 @@ loop1c:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -387,15 +387,15 @@ loop1c1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -409,8 +409,8 @@ loop1c1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -453,15 +453,15 @@ loop1c1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -475,8 +475,8 @@ loop1c1:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -630,15 +630,15 @@ loop1e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -652,8 +652,8 @@ loop1e:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -702,15 +702,15 @@ loop2e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -724,8 +724,8 @@ loop2e:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -773,15 +773,15 @@ loop2e:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -795,8 +795,8 @@ loop2e:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -856,15 +856,15 @@ loop1e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -878,8 +878,8 @@ loop1e1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -925,15 +925,15 @@ loop2e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -947,8 +947,8 @@ loop2e1:
movd xmm5, eax
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -993,15 +993,15 @@ loop2e1:
movdqa xmm2, [rsi + 16]
movdqa xmm6, xmm1
movdqa xmm7, xmm2
- pand xmm1, [rel cdFFFF]
- pand xmm2, [rel cdFFFF]
+ pand xmm1, [lsym(cdFFFF)]
+ pand xmm2, [lsym(cdFFFF)]
packusdw xmm1, xmm2
movdqa xmm2, xmm6 ; src[2n + 1]
movdqa xmm3, xmm7
psrldq xmm2, 2
psrldq xmm3, 2
- pand xmm2, [rel cdFFFF]
- pand xmm3, [rel cdFFFF]
+ pand xmm2, [lsym(cdFFFF)]
+ pand xmm3, [lsym(cdFFFF)]
packusdw xmm2, xmm3
movdqa xmm3, xmm6 ; src[2n + 2]
movdqa xmm4, xmm7
@@ -1015,8 +1015,8 @@ loop2e1:
psrldq xmm5, 12
pslldq xmm5, 12
por xmm4, xmm5
- pand xmm3, [rel cdFFFF]
- pand xmm4, [rel cdFFFF]
+ pand xmm3, [lsym(cdFFFF)]
+ pand xmm4, [lsym(cdFFFF)]
packusdw xmm3, xmm4
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -1075,9 +1075,9 @@ loop1f:
punpcklbw xmm1, xmm0
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
- psubw xmm1, [rel cw128]
- psubw xmm2, [rel cw128]
- psubw xmm3, [rel cw128]
+ psubw xmm1, [lsym(cw128)]
+ psubw xmm2, [lsym(cw128)]
+ psubw xmm3, [lsym(cw128)]
psllw xmm1, 5
psllw xmm2, 5
psllw xmm3, 5
@@ -1109,8 +1109,8 @@ loop2f:
movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
- psubw xmm2, [rel cw128]
- psubw xmm3, [rel cw128]
+ psubw xmm2, [lsym(cw128)]
+ psubw xmm3, [lsym(cw128)]
psllw xmm2, 5
psllw xmm3, 5
movdqa xmm4, xmm1
@@ -1142,7 +1142,7 @@ loop2f:
movdqa xmm1, xmm3 ; src[2n]
movq xmm2, [rsi + 64 * 1] ; src[2n + 1]
punpcklbw xmm2, xmm0
- psubw xmm2, [rel cw128]
+ psubw xmm2, [lsym(cw128)]
psllw xmm2, 5
movdqa xmm4, xmm1
movdqa xmm5, xmm2
@@ -1182,7 +1182,7 @@ set_quants_hi:
sub rax, 6 - 5
movd xmm9, eax
imul rax, 16
- lea rdx, [rel cwa0]
+ lea rdx, [lsym(cwa0)]
add rdx, rax
movdqa xmm8, [rdx]
ret
@@ -1191,7 +1191,7 @@ set_quants_lo:
sub rax, 6 - 5
movd xmm11, eax
imul rax, 16
- lea rdx, [rel cwa0]
+ lea rdx, [lsym(cwa0)]
add rdx, rax
movdqa xmm10, [rdx]
ret
diff --git a/src/common.asm b/src/common.asm
index 62c6f5c..d498b9f 100644
--- a/src/common.asm
+++ b/src/common.asm
@@ -53,6 +53,14 @@ section .note.GNU-stack noalloc noexec nowrite progbits
; Macros for relative access to local data
%undef use_elf_pic
+%undef lsym
+
+%ifdef ASM_ARCH_AMD64
+%macro get_GOT 0
+%endmacro
+%define lsym(name) rel name
+%endif
+
%ifdef ASM_ARCH_I386
%ifdef is_elf
%ifdef PIC
@@ -65,23 +73,18 @@ section .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%define lsym(name) ebx + name wrt ..gotoff
%else
-; i386 ELF, not PIC
-%macro get_GOT 0
-%endmacro
-%define lsym(name) name
+; i386 ELF, not PIC, default case (see below)
%endif
%else
; i386 not ELF
%ifdef PIC
%error "Position-Independent Code is currently only supported for ELF"
%endif
-; i386 not ELF, not PIC
-%macro get_GOT 0
-%endmacro
-%define lsym(name) name
+; i386 not ELF, not PIC, default case (see below)
%endif
-%else
-; not i386
+%endif
+
+%ifndef lsym
%macro get_GOT 0
%endmacro
%define lsym(name) name