diff options
author | mirabilos <thorsten.glaser@teckids.org> | 2017-03-25 23:24:29 +0300 |
---|---|---|
committer | mirabilos <thorsten.glaser@teckids.org> | 2017-03-25 23:24:29 +0300 |
commit | 4f940da706f2f0be9869022b128c23bea2dad645 (patch) | |
tree | be4836979802a5ae6e6664bb5301c4c56f26dcb4 | |
parent | 7b6e65ab7a8ec9ec708eccf29998b2bc45276410 (diff) |
lsym() for amd64
-rw-r--r-- | src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm | 148 | ||||
-rw-r--r-- | src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm | 148 | ||||
-rw-r--r-- | src/common.asm | 23 |
3 files changed, 161 insertions, 158 deletions
diff --git a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm index 8cf901e..c93add1 100644 --- a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm +++ b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm @@ -55,8 +55,8 @@ loop1a: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -66,8 +66,8 @@ loop1a: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -85,8 +85,8 @@ loop1a: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -247,8 +247,8 @@ loop1c: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -258,8 +258,8 @@ loop1c: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -277,8 +277,8 @@ loop1c: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -328,8 +328,8 @@ loop1c: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -339,8 +339,8 @@ loop1c: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -358,8 +358,8 @@ loop1c: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -423,8 +423,8 @@ loop1c1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -434,8 +434,8 @@ loop1c1: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -453,8 +453,8 @@ loop1c1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -501,8 +501,8 @@ loop1c1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -512,8 +512,8 @@ loop1c1: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -531,8 +531,8 @@ loop1c1: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -690,8 +690,8 @@ loop1e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -701,8 +701,8 @@ loop1e: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -720,8 +720,8 @@ loop1e: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -774,8 +774,8 @@ loop2e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -785,8 +785,8 @@ loop2e: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -804,8 +804,8 @@ loop2e: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -857,8 +857,8 @@ loop2e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -868,8 +868,8 @@ loop2e: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -887,8 +887,8 @@ loop2e: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -952,8 +952,8 @@ loop1e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -963,8 +963,8 @@ loop1e1: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -982,8 +982,8 @@ loop1e1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -1033,8 +1033,8 @@ loop2e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -1044,8 +1044,8 @@ loop2e1: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -1063,8 +1063,8 @@ loop2e1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -1113,8 +1113,8 @@ loop2e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] pslld xmm1, 16 pslld xmm2, 16 psrad xmm1, 16 @@ -1124,8 +1124,8 @@ loop2e1: movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] pslld xmm2, 16 pslld xmm3, 16 psrad xmm2, 16 @@ -1143,8 +1143,8 @@ loop2e1: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] pslld xmm3, 16 pslld xmm4, 16 psrad xmm3, 16 @@ -1207,9 +1207,9 @@ loop1f: punpcklbw xmm1, xmm0 punpcklbw xmm2, xmm0 punpcklbw xmm3, xmm0 - psubw xmm1, [rel cw128] - psubw xmm2, [rel cw128] - psubw xmm3, [rel cw128] + psubw xmm1, [lsym(cw128)] + psubw xmm2, [lsym(cw128)] + psubw xmm3, [lsym(cw128)] psllw xmm1, 5 psllw xmm2, 5 psllw xmm3, 5 @@ -1241,8 +1241,8 @@ loop2f: movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2] punpcklbw xmm2, xmm0 punpcklbw xmm3, xmm0 - psubw xmm2, [rel cw128] - psubw xmm3, [rel cw128] + psubw xmm2, [lsym(cw128)] + psubw xmm3, [lsym(cw128)] psllw xmm2, 5 psllw xmm3, 5 movdqa xmm4, xmm1 @@ -1274,7 +1274,7 @@ loop2f: movdqa xmm1, xmm3 ; src[2n] movq xmm2, [rsi + 64 * 1] ; src[2n + 1] punpcklbw xmm2, xmm0 - psubw xmm2, [rel cw128] + psubw xmm2, [lsym(cw128)] psllw xmm2, 5 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -1314,7 +1314,7 @@ set_quants_hi: sub rax, 6 - 5 movd xmm9, eax imul rax, 16 - lea rdx, [rel cwa0] + lea rdx, [lsym(cwa0)] add rdx, rax movdqa xmm8, [rdx] ret @@ -1323,7 +1323,7 @@ set_quants_lo: sub rax, 6 - 5 movd xmm11, eax imul rax, 16 - lea rdx, [rel cwa0] + lea rdx, [lsym(cwa0)] add rdx, rax movdqa xmm10, [rdx] ret diff --git a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm index 34d8e50..fca391a 100644 --- a/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm +++ b/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm @@ -55,15 +55,15 @@ loop1a: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -77,8 +77,8 @@ loop1a: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -235,15 +235,15 @@ loop1c: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -257,8 +257,8 @@ loop1c: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -304,15 +304,15 @@ loop1c: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -326,8 +326,8 @@ loop1c: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -387,15 +387,15 @@ loop1c1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -409,8 +409,8 @@ loop1c1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -453,15 +453,15 @@ loop1c1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -475,8 +475,8 @@ loop1c1: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -630,15 +630,15 @@ loop1e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -652,8 +652,8 @@ loop1e: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -702,15 +702,15 @@ loop2e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -724,8 +724,8 @@ loop2e: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -773,15 +773,15 @@ loop2e: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -795,8 +795,8 @@ loop2e: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -856,15 +856,15 @@ loop1e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -878,8 +878,8 @@ loop1e1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -925,15 +925,15 @@ loop2e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -947,8 +947,8 @@ loop2e1: movd xmm5, eax pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -993,15 +993,15 @@ loop2e1: movdqa xmm2, [rsi + 16] movdqa xmm6, xmm1 movdqa xmm7, xmm2 - pand xmm1, [rel cdFFFF] - pand xmm2, [rel cdFFFF] + pand xmm1, [lsym(cdFFFF)] + pand xmm2, [lsym(cdFFFF)] packusdw xmm1, xmm2 movdqa xmm2, xmm6 ; src[2n + 1] movdqa xmm3, xmm7 psrldq xmm2, 2 psrldq xmm3, 2 - pand xmm2, [rel cdFFFF] - pand xmm3, [rel cdFFFF] + pand xmm2, [lsym(cdFFFF)] + pand xmm3, [lsym(cdFFFF)] packusdw xmm2, xmm3 movdqa xmm3, xmm6 ; src[2n + 2] movdqa xmm4, xmm7 @@ -1015,8 +1015,8 @@ loop2e1: psrldq xmm5, 12 pslldq xmm5, 12 por xmm4, xmm5 - pand xmm3, [rel cdFFFF] - pand xmm4, [rel cdFFFF] + pand xmm3, [lsym(cdFFFF)] + pand xmm4, [lsym(cdFFFF)] packusdw xmm3, xmm4 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -1075,9 +1075,9 @@ loop1f: punpcklbw xmm1, xmm0 punpcklbw xmm2, xmm0 punpcklbw xmm3, xmm0 - psubw xmm1, [rel cw128] - psubw xmm2, [rel cw128] - psubw xmm3, [rel cw128] + psubw xmm1, [lsym(cw128)] + psubw xmm2, [lsym(cw128)] + psubw xmm3, [lsym(cw128)] psllw xmm1, 5 psllw xmm2, 5 psllw xmm3, 5 @@ -1109,8 +1109,8 @@ loop2f: movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2] punpcklbw xmm2, xmm0 punpcklbw xmm3, xmm0 - psubw xmm2, [rel cw128] - psubw xmm3, [rel cw128] + psubw xmm2, [lsym(cw128)] + psubw xmm3, [lsym(cw128)] psllw xmm2, 5 psllw xmm3, 5 movdqa xmm4, xmm1 @@ -1142,7 +1142,7 @@ loop2f: movdqa xmm1, xmm3 ; src[2n] movq xmm2, [rsi + 64 * 1] ; src[2n + 1] punpcklbw xmm2, xmm0 - psubw xmm2, [rel cw128] + psubw xmm2, [lsym(cw128)] psllw xmm2, 5 movdqa xmm4, xmm1 movdqa xmm5, xmm2 @@ -1182,7 +1182,7 @@ set_quants_hi: sub rax, 6 - 5 movd xmm9, eax imul rax, 16 - lea rdx, [rel cwa0] + lea rdx, [lsym(cwa0)] add rdx, rax movdqa xmm8, [rdx] ret @@ -1191,7 +1191,7 @@ set_quants_lo: sub rax, 6 - 5 movd xmm11, eax imul rax, 16 - lea rdx, [rel cwa0] + lea rdx, [lsym(cwa0)] add rdx, rax movdqa xmm10, [rdx] ret diff --git a/src/common.asm b/src/common.asm index 62c6f5c..d498b9f 100644 --- a/src/common.asm +++ b/src/common.asm @@ -53,6 +53,14 @@ section .note.GNU-stack noalloc noexec nowrite progbits ; Macros for relative access to local data %undef use_elf_pic +%undef lsym + +%ifdef ASM_ARCH_AMD64 +%macro get_GOT 0 +%endmacro +%define lsym(name) rel name +%endif + %ifdef ASM_ARCH_I386 %ifdef is_elf %ifdef PIC @@ -65,23 +73,18 @@ section .note.GNU-stack noalloc noexec nowrite progbits %endmacro %define lsym(name) ebx + name wrt ..gotoff %else -; i386 ELF, not PIC -%macro get_GOT 0 -%endmacro -%define lsym(name) name +; i386 ELF, not PIC, default case (see below) %endif %else ; i386 not ELF %ifdef PIC %error "Position-Independent Code is currently only supported for ELF" %endif -; i386 not ELF, not PIC -%macro get_GOT 0 -%endmacro -%define lsym(name) name +; i386 not ELF, not PIC, default case (see below) %endif -%else -; not i386 +%endif + +%ifndef lsym %macro get_GOT 0 %endmacro %define lsym(name) name |