Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/GStreamer/orc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/orc
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2010-08-28 02:26:19 +0400
committerDavid Schleef <ds@schleef.org>2010-08-28 02:26:19 +0400
commit9d286ebc21f784d14edde720f50bb7537c42e72c (patch)
tree7630598bf78eb06fd8f635112be3c537a9ff6223 /orc
parentca4bbaaf9067981dc74c5c53c81718e0a1736279 (diff)
Change constant values to orc_union64 internally
Also update the MMX backend. This implements much of the infrastructure for 64-bit constants, but parsing of 64-bit integer constants is broken (no portable strtoll implementation), and the backends probably don't load 64-bit constants correctly.
Diffstat (limited to 'orc')
-rw-r--r--orc/orcexecutor.c2
-rw-r--r--orc/orcparse.c38
-rw-r--r--orc/orcprogram-c.c8
-rw-r--r--orc/orcprogram-c64x-c.c2
-rw-r--r--orc/orcprogram-mmx.c2
-rw-r--r--orc/orcprogram.c52
-rw-r--r--orc/orcprogram.h46
-rw-r--r--orc/orcrules-altivec.c6
-rw-r--r--orc/orcrules-arm.c2
-rw-r--r--orc/orcrules-mmx.c304
-rw-r--r--orc/orcrules-neon.c16
-rw-r--r--orc/orcrules-sse.c31
12 files changed, 401 insertions, 108 deletions
diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c
index 3184b4a..e67dd3b 100644
--- a/orc/orcexecutor.c
+++ b/orc/orcexecutor.c
@@ -265,7 +265,7 @@ orc_executor_emulate (OrcExecutor *ex)
opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]];
/* FIXME hack */
load_constant (tmpspace[insn->src_args[k]], 4,
- var->value);
+ var->value.i);
} else if (var->vartype == ORC_VAR_TYPE_PARAM) {
opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]];
/* FIXME hack */
diff --git a/orc/orcparse.c b/orc/orcparse.c
index 92bb59d..88bb712 100644
--- a/orc/orcparse.c
+++ b/orc/orcparse.c
@@ -186,16 +186,8 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log)
orc_program_add_parameter (parser->program, size, token[2]);
} else if (strcmp (token[0], ".const") == 0) {
int size = strtol (token[1], NULL, 0);
- char *end, *endf;
- int value;
- double valuef;
- value = strtol (token[3], &end, 0);
- valuef = strtod (token[3], &endf);
- if (endf > end) {
- orc_program_add_constant_float (parser->program, size, valuef, token[2]);
- } else {
- orc_program_add_constant (parser->program, size, value, token[2]);
- }
+
+ orc_program_add_constant_str (parser->program, size, token[3], token[2]);
} else if (strcmp (token[0], ".floatparam") == 0) {
int size = strtol (token[1], NULL, 0);
orc_program_add_parameter_float (parser->program, size, token[2]);
@@ -220,7 +212,6 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log)
if (o) {
int n_args = opcode_n_args (o);
- char const_regs[10][10];
int i;
if (n_tokens != 1 + offset + n_args) {
@@ -231,22 +222,11 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log)
for(i=offset+1;i<n_tokens;i++){
char *end;
- char *endf;
- int imm;
- double immf;
- imm = strtol (token[i], &end, 0);
- immf = strtod (token[i], &endf);
- if ((end != token[i]) || (endf != token[i])) {
- sprintf(const_regs[i], "c%d", parser->creg_index);
- parser->creg_index++;
- if (end >= endf) {
- orc_program_add_constant (parser->program, 2, imm,
- const_regs[i]);
- } else {
- orc_program_add_constant_float (parser->program, 2, immf,
- const_regs[i]);
- }
- token[i] = const_regs[i];
+ double d;
+ d = strtod (token[i], &end);
+ if (end != token[i]) {
+ orc_program_add_constant_str (parser->program, 4, token[i],
+ token[i]);
}
}
@@ -322,7 +302,7 @@ orc_parse_log_valist (OrcParser *parser, const char *format, va_list args)
sprintf(s, "In function %s:\n", parser->program->name);
len = strlen(s);
- if (parser->log_size + len > parser->log_alloc) {
+ if (parser->log_size + len + 1 >= parser->log_alloc) {
parser->log_alloc += 100;
parser->log = realloc (parser->log, parser->log_alloc);
}
@@ -335,7 +315,7 @@ orc_parse_log_valist (OrcParser *parser, const char *format, va_list args)
vsprintf(s, format, args);
len = strlen(s);
- if (parser->log_size + len > parser->log_alloc) {
+ if (parser->log_size + len + 1 >= parser->log_alloc) {
parser->log_alloc += 100;
parser->log = realloc (parser->log, parser->log_alloc);
}
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index 690219e..7ac4b42 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -442,10 +442,14 @@ c_get_name_int (char *name, OrcCompiler *p, OrcInstruction *insn, int var)
}
}
} else if (p->vars[var].vartype == ORC_VAR_TYPE_CONST) {
- if (p->vars[var].value == 0x80000000) {
+ if (p->vars[var].value.i == 0x80000000) {
sprintf(name,"0x80000000");
} else {
- sprintf(name, "%d", p->vars[var].value);
+ if (p->vars[var].value.i == (int)p->vars[var].value.i) {
+ sprintf(name, "%d", (int)p->vars[var].value.i);
+ } else {
+ ORC_ASSERT(0);
+ }
}
} else {
if (insn && (insn->flags & ORC_INSTRUCTION_FLAG_X2)) {
diff --git a/orc/orcprogram-c64x-c.c b/orc/orcprogram-c64x-c.c
index 3f92ca9..2728203 100644
--- a/orc/orcprogram-c64x-c.c
+++ b/orc/orcprogram-c64x-c.c
@@ -262,7 +262,7 @@ orc_compiler_c64x_c_assemble (OrcCompiler *compiler)
switch (var->vartype) {
case ORC_VAR_TYPE_CONST:
{
- int value = var->value;
+ int value = var->value.i;
if (var->size == 1) {
value = (value&0xff);
diff --git a/orc/orcprogram-mmx.c b/orc/orcprogram-mmx.c
index 1d80297..872f108 100644
--- a/orc/orcprogram-mmx.c
+++ b/orc/orcprogram-mmx.c
@@ -673,6 +673,8 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler)
compiler->n_fixups = 0;
}
+ if (compiler->error) return;
+
orc_x86_emit_prologue (compiler);
#ifndef MMX
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index 643e60e..aa9ffcd 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -350,7 +350,7 @@ orc_program_add_constant (OrcProgram *program, int size, int value, const char *
program->vars[i].vartype = ORC_VAR_TYPE_CONST;
program->vars[i].size = size;
- program->vars[i].value = value;
+ program->vars[i].value.i = value;
program->vars[i].name = strdup(name);
program->n_const_vars++;
@@ -361,7 +361,17 @@ int
orc_program_add_constant_int64 (OrcProgram *program, int size,
orc_int64 value, const char *name)
{
- ORC_ASSERT(0);
+ int i;
+
+ i = ORC_VAR_C1 + program->n_const_vars;
+
+ program->vars[i].vartype = ORC_VAR_TYPE_CONST;
+ program->vars[i].size = size;
+ program->vars[i].value.i = value;
+ program->vars[i].name = strdup(name);
+ program->n_const_vars++;
+
+ return i;
}
int
@@ -382,6 +392,44 @@ orc_program_add_constant_double (OrcProgram *program, int size,
return orc_program_add_constant_int64 (program, size, u.i, name);
}
+int
+orc_program_add_constant_str (OrcProgram *program, int size,
+ const char *value, const char *name)
+{
+ int i;
+ char *end;
+ int val_i;
+ double val_d;
+
+ i = ORC_VAR_C1 + program->n_const_vars;
+
+ val_i = strtol (value, &end, 0);
+ if (end[0] == 0) {
+ program->vars[i].value.i = val_i;
+ } else if ((end[0] == 'l' || end[0] == 'L') && end[1] == 0) {
+ program->vars[i].value.i = val_i;
+ } else {
+ val_d = strtod (value, &end);
+
+ if (end[0] == 0) {
+ orc_union32 u;
+ u.f = val_d;
+ program->vars[i].value.i = u.i;
+ } else if ((end[0] == 'l' || end[0] == 'L') && end[1] == 0) {
+ program->vars[i].value.f = val_d;
+ } else {
+ return -1;
+ }
+ }
+
+ program->vars[i].vartype = ORC_VAR_TYPE_CONST;
+ program->vars[i].size = size;
+ program->vars[i].name = strdup(name);
+ program->n_const_vars++;
+
+ return i;
+}
+
/**
* orc_program_add_parameter:
* @program: a pointer to an OrcProgram structure
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index 05fd592..de448d1 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -223,7 +223,7 @@ struct _OrcVariable {
int is_aligned;
int is_uncached;
- int value;
+ orc_union64 value;
int ptr_register;
int ptr_offset;
@@ -355,7 +355,34 @@ struct _OrcProgram {
} _unused[ORC_N_INSNS]; /* needed for ABI compatibility */
int n_insns;
- OrcVariable vars[ORC_N_VARIABLES];
+ struct {
+ char *name;
+ char *type_name;
+
+ int size;
+ OrcVarType vartype;
+
+ int used;
+ int first_use;
+ int last_use;
+ int replaced;
+ int replacement;
+
+ int alloc;
+ int is_chained;
+ int is_aligned;
+ int is_uncached;
+
+ int value;
+
+ int ptr_register;
+ int ptr_offset;
+ int mask_alloc;
+ int aligned_data;
+ int param_type;
+ int load_dest;
+ } _unused3[ORC_N_VARIABLES]; /* needed for ABI compatibility */
+
int n_src_vars;
int n_dest_vars;
int n_param_vars;
@@ -371,6 +398,7 @@ struct _OrcProgram {
void *code_exec;
OrcInstruction insns[ORC_N_INSNS];
+ OrcVariable vars[ORC_N_VARIABLES];
void *backup_func;
int is_2d;
@@ -455,8 +483,8 @@ struct _OrcCompiler {
#define ORC_DEST_ARG(p,i,n) ((p)->vars[(i)->dest_args[(n)]].alloc)
#define ORC_SRC_TYPE(p,i,n) ((p)->vars[(i)->src_args[(n)]].vartype)
#define ORC_DEST_TYPE(p,i,n) ((p)->vars[(i)->dest_args[(n)]].vartype)
-#define ORC_SRC_VAL(p,i,n) ((p)->vars[(i)->src_args[(n)]].value)
-#define ORC_DEST_VAL(p,i,n) ((p)->vars[(i)->dest_args[(n)]].value)
+#define ORC_SRC_VAL(p,insn,n) ((p)->vars[(insn)->src_args[(n)]].value.i)
+#define ORC_DEST_VAL(p,insn,n) ((p)->vars[(insn)->dest_args[(n)]].value.i)
/**
* OrcOpcodeExecutor:
@@ -496,7 +524,7 @@ struct _OrcExecutor {
/* m is stored in params[ORC_VAR_A1] */
/* m_index is stored in params[ORC_VAR_A2] */
/* elapsed time is stored in params[ORC_VAR_A3] */
- /* source resampling parameters are in params[ORC_VAR_C1..C8] */
+ /* high half of params is stored in params[ORC_VAR_T1..] */
};
/* the alternate view of OrcExecutor */
@@ -516,9 +544,10 @@ struct _OrcExecutorAlt {
int m_index;
int time;
int unused2;
- int src_resample[8];
+ int unused4[8];
int params[ORC_VAR_T1-ORC_VAR_P1];
- int unused3[ORC_N_VARIABLES - ORC_VAR_T1];
+ int params_hi[ORC_VAR_T1-ORC_VAR_P1];
+ int unused3[ORC_N_VARIABLES - ORC_VAR_T9];
int accumulators[4];
};
#define ORC_EXECUTOR_EXEC(ex) ((OrcExecutorFunc)((ex)->arrays[ORC_VAR_A1]))
@@ -531,7 +560,7 @@ struct _OrcCodeVariable {
/*< private >*/
int vartype;
int size;
- int value;
+ orc_union64 value;
};
struct _OrcCode {
@@ -636,6 +665,7 @@ int orc_program_add_constant (OrcProgram *program, int size, int value, const ch
int orc_program_add_constant_int64 (OrcProgram *program, int size, orc_int64 value, const char *name);
int orc_program_add_constant_float (OrcProgram *program, int size, float value, const char *name);
int orc_program_add_constant_double (OrcProgram *program, int size, double value, const char *name);
+int orc_program_add_constant_str (OrcProgram *program, int size, const char *value, const char *name);
int orc_program_add_parameter (OrcProgram *program, int size, const char *name);
int orc_program_add_parameter_float (OrcProgram *program, int size, const char *name);
int orc_program_add_parameter_double (OrcProgram *program, int size, const char *name);
diff --git a/orc/orcrules-altivec.c b/orc/orcrules-altivec.c
index 11a4679..e86802c 100644
--- a/orc/orcrules-altivec.c
+++ b/orc/orcrules-altivec.c
@@ -66,7 +66,7 @@ powerpc_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
break;
}
} else {
- int value = src->value;
+ int value = src->value.i;
switch (size) {
case 1:
@@ -253,9 +253,9 @@ powerpc_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \
int dest = ORC_DEST_ARG (p, insn, 0); \
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { \
ORC_ASM_CODE(p," vspltisb %s, %d\n", \
- powerpc_get_regname(p->tmpreg), p->vars[insn->src_args[1]].value); \
+ powerpc_get_regname(p->tmpreg), (int)p->vars[insn->src_args[1]].value.i); \
powerpc_emit_VX(p, 0x1000030c, \
- powerpc_regnum(p->tmpreg), p->vars[insn->src_args[1]].value, 0); \
+ powerpc_regnum(p->tmpreg), (int)p->vars[insn->src_args[1]].value.i, 0); \
powerpc_emit_VX_2 (p, opcode, code , dest, src1, p->tmpreg);\
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { \
ORC_COMPILER_ERROR(p,"rule only works with constants"); \
diff --git a/orc/orcrules-arm.c b/orc/orcrules-arm.c
index fcc8353..dacd8dc 100644
--- a/orc/orcrules-arm.c
+++ b/orc/orcrules-arm.c
@@ -83,7 +83,7 @@ arm_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
if (compiler->vars[insn->src_args[0]].vartype == ORC_VAR_TYPE_CONST) {
orc_arm_emit_load_imm (compiler, compiler->vars[insn->dest_args[0]].alloc,
- (int)compiler->vars[insn->src_args[0]].value);
+ (int)compiler->vars[insn->src_args[0]].value.i);
} else {
orc_arm_loadw (compiler, compiler->vars[insn->dest_args[0]].alloc,
compiler->exec_reg,
diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c
index 9debb67..e41908f 100644
--- a/orc/orcrules-mmx.c
+++ b/orc/orcrules-mmx.c
@@ -46,7 +46,7 @@ mmx_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
}
#endif
} else if (src->vartype == ORC_VAR_TYPE_CONST) {
- mmx_load_constant (compiler, dest->alloc, size, src->value);
+ mmx_load_constant (compiler, dest->alloc, size, src->value.i);
} else {
ORC_ASSERT(0);
}
@@ -112,7 +112,7 @@ mmx_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
return;
}
- offset = (compiler->offset + compiler->vars[insn->src_args[1]].value) *
+ offset = (compiler->offset + compiler->vars[insn->src_args[1]].value.i) *
src->size;
if (src->ptr_register == 0) {
int i = insn->src_args[0];
@@ -610,7 +610,7 @@ mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type],
- p->vars[insn->src_args[1]].value,
+ p->vars[insn->src_args[1]].value.i,
p->vars[insn->dest_args[0]].alloc);
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
int tmp = orc_compiler_get_temp_reg (p);
@@ -636,9 +636,9 @@ mmx_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn)
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_mmx_emit_psllw (p, p->vars[insn->src_args[1]].value, dest);
+ orc_mmx_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
- 0xff&(0xff<<p->vars[insn->src_args[1]].value));
+ 0xff&(0xff<<p->vars[insn->src_args[1]].value.i));
orc_mmx_emit_pand (p, tmp, dest);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants");
@@ -656,10 +656,10 @@ mmx_rule_shrsb (OrcCompiler *p, void *user, OrcInstruction *insn)
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
orc_mmx_emit_movq (p, src, tmp);
orc_mmx_emit_psllw (p, 8, tmp);
- orc_mmx_emit_psraw (p, p->vars[insn->src_args[1]].value, tmp);
+ orc_mmx_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp);
orc_mmx_emit_psrlw (p, 8, tmp);
- orc_mmx_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value, dest);
+ orc_mmx_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest);
orc_mmx_emit_psllw (p, 8, dest);
orc_mmx_emit_por (p, tmp, dest);
@@ -676,9 +676,9 @@ mmx_rule_shrub (OrcCompiler *p, void *user, OrcInstruction *insn)
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_mmx_emit_psrlw (p, p->vars[insn->src_args[1]].value, dest);
+ orc_mmx_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
- (0xff>>p->vars[insn->src_args[1]].value));
+ (0xff>>p->vars[insn->src_args[1]].value.i));
orc_mmx_emit_pand (p, tmp, dest);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants");
@@ -1219,6 +1219,25 @@ mmx_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
FALSE);
}
+#ifndef MMX
+static void
+mmx_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = orc_compiler_get_temp_reg (p);
+ int tmp2 = orc_compiler_get_temp_reg (p);
+
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), dest, tmp);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), src, tmp2);
+ orc_mmx_emit_pmuldq (p, src, dest);
+ orc_mmx_emit_pmuldq (p, tmp, tmp2);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), dest, dest);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), tmp2, tmp2);
+ orc_mmx_emit_punpckldq (p, tmp2, dest);
+}
+#endif
+
static void
mmx_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
{
@@ -1255,41 +1274,24 @@ mmx_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE);
}
+#ifndef MMX
static void
-mmx_rule_mulhul_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+mmx_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- int i;
- int regsize = p->is_64bit ? 8 : 4;
- int stackframe;
-
- stackframe = 32 + 2*regsize;
- stackframe = (stackframe + 0xf) & (~0xf);
-
- orc_x86_emit_add_imm_reg (p, regsize, -stackframe, X86_ESP, FALSE);
- orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[0]].alloc,
- 0, X86_ESP, FALSE, FALSE);
- orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[1]].alloc,
- 16, X86_ESP, FALSE, FALSE);
- orc_x86_emit_mov_reg_memoffset (p, 4, X86_EAX, 32, X86_ESP);
- orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, 32 + regsize, X86_ESP);
-
- for(i=0;i<(1<<p->loop_shift);i++) {
- orc_x86_emit_mov_memoffset_reg (p, 4, 4*i, X86_ESP, X86_EAX);
- ORC_ASM_CODE(p," mull %d(%%%s)\n", 16+4*i,
- orc_x86_get_regname_ptr(p, X86_ESP));
- orc_x86_emit_rex(p, 4, 0, 0, X86_ESP);
- *p->codeptr++ = 0xf7;
- orc_x86_emit_modrm_memoffset (p, 4, 16+4*i, X86_ESP);
- orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, 4*i, X86_ESP);
- }
-
- orc_x86_emit_mov_memoffset_mmx (p, 16, 0, X86_ESP,
- p->vars[insn->dest_args[0]].alloc, FALSE);
- orc_x86_emit_mov_memoffset_reg (p, 4, 32, X86_ESP, X86_EAX);
- orc_x86_emit_mov_memoffset_reg (p, 4, 32 + regsize, X86_ESP, X86_EDX);
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = orc_compiler_get_temp_reg (p);
+ int tmp2 = orc_compiler_get_temp_reg (p);
- orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), dest, tmp);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), src, tmp2);
+ orc_mmx_emit_pmuludq (p, src, dest);
+ orc_mmx_emit_pmuludq (p, tmp, tmp2);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), dest, dest);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), tmp2, tmp2);
+ orc_mmx_emit_punpckldq (p, tmp2, dest);
}
+#endif
static void
mmx_rule_select0lw (OrcCompiler *p, void *user, OrcInstruction *insn)
@@ -1344,6 +1346,22 @@ mmx_rule_select1wb (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+mmx_rule_splitql (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest1 = p->vars[insn->dest_args[0]].alloc;
+ int dest2 = p->vars[insn->dest_args[1]].alloc;
+
+#ifndef MMX
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,2,0), src, dest2);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(3,1,3,1), src, dest1);
+#else
+ orc_mmx_emit_movq (p, src, dest2);
+ orc_mmx_emit_pshufw (p, ORC_MMX_SHUF(3,2,3,2), src, dest1);
+#endif
+}
+
+static void
mmx_rule_splitlw (OrcCompiler *p, void *user, OrcInstruction *insn)
{
int src = p->vars[insn->src_args[0]].alloc;
@@ -1439,6 +1457,27 @@ mmx_rule_swapl (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_mmx_emit_por (p, tmp, dest);
}
+static void
+mmx_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = orc_compiler_get_temp_reg (p);
+
+ orc_mmx_emit_movq (p, src, tmp);
+ orc_mmx_emit_psllq (p, 32, tmp);
+ orc_mmx_emit_psrlq (p, 32, dest);
+ orc_mmx_emit_por (p, tmp, dest);
+ orc_mmx_emit_movq (p, dest, tmp);
+ orc_mmx_emit_pslld (p, 16, tmp);
+ orc_mmx_emit_psrld (p, 16, dest);
+ orc_mmx_emit_por (p, tmp, dest);
+ orc_mmx_emit_movq (p, dest, tmp);
+ orc_mmx_emit_psllw (p, 8, tmp);
+ orc_mmx_emit_psrlw (p, 8, dest);
+ orc_mmx_emit_por (p, tmp, dest);
+}
+
#define LOAD_MASK_IS_SLOW
#ifndef LOAD_MASK_IS_SLOW
static void
@@ -1947,6 +1986,30 @@ BINARY_F(mulf, "mulps", 0x59)
BINARY_F(divf, "divps", 0x5e)
UNARY_F(sqrtf, "sqrtps", 0x51)
+#define UNARY_D(opcode,insn_name,code) \
+static void \
+mmx_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ orc_mmx_emit_660f (p, insn_name, code, \
+ p->vars[insn->src_args[0]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+#define BINARY_D(opcode,insn_name,code) \
+static void \
+mmx_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ orc_mmx_emit_660f (p, insn_name, code, \
+ p->vars[insn->src_args[1]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+BINARY_D(addd, "addpd", 0x58)
+BINARY_D(subd, "subpd", 0x5c)
+BINARY_D(muld, "mulpd", 0x59)
+BINARY_D(divd, "divpd", 0x5e)
+UNARY_D(sqrtd, "sqrtpd", 0x51)
+
static void
mmx_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
@@ -1972,6 +2035,30 @@ mmx_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+mmx_rule_mind (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ if (p->target_flags & ORC_TARGET_FAST_NAN) {
+ orc_mmx_emit_660f (p, "minpd", 0x5d,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ } else {
+ int tmp = orc_compiler_get_temp_reg (p);
+ orc_mmx_emit_movq (p,
+ p->vars[insn->src_args[1]].alloc,
+ tmp);
+ orc_mmx_emit_660f (p, "minpd", 0x5d,
+ p->vars[insn->dest_args[0]].alloc,
+ tmp);
+ orc_mmx_emit_660f (p, "minpd", 0x5d,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ orc_mmx_emit_por (p,
+ tmp,
+ p->vars[insn->dest_args[0]].alloc);
+ }
+}
+
+static void
mmx_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
@@ -1996,6 +2083,30 @@ mmx_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+mmx_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ if (p->target_flags & ORC_TARGET_FAST_NAN) {
+ orc_mmx_emit_660f (p, "maxpd", 0x5f,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ } else {
+ int tmp = orc_compiler_get_temp_reg (p);
+ orc_mmx_emit_movq (p,
+ p->vars[insn->src_args[1]].alloc,
+ tmp);
+ orc_mmx_emit_660f (p, "maxpd", 0x5f,
+ p->vars[insn->dest_args[0]].alloc,
+ tmp);
+ orc_mmx_emit_660f (p, "maxpd", 0x5f,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ orc_mmx_emit_por (p,
+ tmp,
+ p->vars[insn->dest_args[0]].alloc);
+ }
+}
+
+static void
mmx_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_mmx_emit_0f (p, "cmpeqps", 0xc2,
@@ -2005,6 +2116,16 @@ mmx_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+mmx_rule_cmpeqd (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_mmx_emit_660f (p, "cmpeqpd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x00;
+}
+
+
+static void
mmx_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_mmx_emit_0f (p, "cmpltps", 0xc2,
@@ -2014,6 +2135,16 @@ mmx_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+mmx_rule_cmpltd (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_mmx_emit_660f (p, "cmpltpd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x01;
+}
+
+
+static void
mmx_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_mmx_emit_0f (p, "cmpleps", 0xc2,
@@ -2023,11 +2154,48 @@ mmx_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
-mmx_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn)
+mmx_rule_cmpled (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_mmx_emit_f30f (p, "cvttps2dq", 0x5b,
- p->vars[insn->src_args[0]].alloc,
+ orc_mmx_emit_660f (p, "cmplepd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x02;
+}
+
+
+static void
+mmx_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmpc;
+ int tmp = orc_compiler_get_temp_reg (p);
+
+ tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000);
+ orc_mmx_emit_movq (p, src, tmp);
+ orc_mmx_emit_f30f (p, "cvttps2dq", 0x5b, src, dest);
+ orc_mmx_emit_psrad (p, 31, tmp);
+ orc_mmx_emit_pcmpeqd (p, dest, tmpc);
+ orc_mmx_emit_pandn (p, tmpc, tmp);
+ orc_mmx_emit_paddd (p, tmp, dest);
+
+}
+
+static void
+mmx_rule_convdl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmpc;
+ int tmp = orc_compiler_get_temp_reg (p);
+
+ tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000);
+ orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(3,1,3,1), src, tmp);
+ orc_mmx_emit_660f (p, "cvttpd2dq", 0xe6, src, dest);
+ orc_mmx_emit_psrad (p, 31, tmp);
+ orc_mmx_emit_pcmpeqd (p, dest, tmpc);
+ orc_mmx_emit_pandn (p, tmpc, tmp);
+ orc_mmx_emit_paddd (p, tmp, dest);
}
static void
@@ -2037,6 +2205,30 @@ mmx_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
+
+static void
+mmx_rule_convld (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_mmx_emit_f30f (p, "cvtdq2pd", 0xe6,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+mmx_rule_convfd (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_mmx_emit_0f (p, "cvtps2pd", 0x5a,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+mmx_rule_convdf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_mmx_emit_660f (p, "cvtpd2ps", 0x5a,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
#endif
void
@@ -2129,6 +2321,7 @@ orc_compiler_mmx_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "copyb", mmx_rule_copyx, NULL);
orc_rule_register (rule_set, "copyw", mmx_rule_copyx, NULL);
orc_rule_register (rule_set, "copyl", mmx_rule_copyx, NULL);
+ orc_rule_register (rule_set, "copyq", mmx_rule_copyx, NULL);
orc_rule_register (rule_set, "shlw", mmx_rule_shift, (void *)0);
orc_rule_register (rule_set, "shruw", mmx_rule_shift, (void *)1);
@@ -2175,6 +2368,22 @@ orc_compiler_mmx_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "cmplef", mmx_rule_cmplef, NULL);
orc_rule_register (rule_set, "convfl", mmx_rule_convfl, NULL);
orc_rule_register (rule_set, "convlf", mmx_rule_convlf, NULL);
+
+ orc_rule_register (rule_set, "addd", mmx_rule_addd, NULL);
+ orc_rule_register (rule_set, "subd", mmx_rule_subd, NULL);
+ orc_rule_register (rule_set, "muld", mmx_rule_muld, NULL);
+ orc_rule_register (rule_set, "divd", mmx_rule_divd, NULL);
+ orc_rule_register (rule_set, "mind", mmx_rule_mind, NULL);
+ orc_rule_register (rule_set, "maxd", mmx_rule_maxd, NULL);
+ orc_rule_register (rule_set, "sqrtd", mmx_rule_sqrtd, NULL);
+ orc_rule_register (rule_set, "cmpeqd", mmx_rule_cmpeqd, NULL);
+ orc_rule_register (rule_set, "cmpltd", mmx_rule_cmpltd, NULL);
+ orc_rule_register (rule_set, "cmpled", mmx_rule_cmpled, NULL);
+ orc_rule_register (rule_set, "convdl", mmx_rule_convdl, NULL);
+ orc_rule_register (rule_set, "convld", mmx_rule_convld, NULL);
+
+ orc_rule_register (rule_set, "convfd", mmx_rule_convfd, NULL);
+ orc_rule_register (rule_set, "convdf", mmx_rule_convdf, NULL);
#endif
/* slow rules */
@@ -2195,6 +2404,8 @@ orc_compiler_mmx_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "absl", mmx_rule_absl_slow, NULL);
orc_rule_register (rule_set, "swapw", mmx_rule_swapw, NULL);
orc_rule_register (rule_set, "swapl", mmx_rule_swapl, NULL);
+ orc_rule_register (rule_set, "swapq", mmx_rule_swapq, NULL);
+ orc_rule_register (rule_set, "splitql", mmx_rule_splitql, NULL);
orc_rule_register (rule_set, "splitlw", mmx_rule_splitlw, NULL);
orc_rule_register (rule_set, "splitwb", mmx_rule_splitwb, NULL);
orc_rule_register (rule_set, "avgsl", mmx_rule_avgsl, NULL);
@@ -2204,7 +2415,9 @@ orc_compiler_mmx_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "shrub", mmx_rule_shrub, NULL);
orc_rule_register (rule_set, "mulll", mmx_rule_mulll_slow, NULL);
orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl_slow, NULL);
- orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul_slow, NULL);
+#ifndef MMX
+ orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul, NULL);
+#endif
orc_rule_register (rule_set, "mullb", mmx_rule_mullb, NULL);
orc_rule_register (rule_set, "mulhsb", mmx_rule_mulhsb, NULL);
orc_rule_register (rule_set, "mulhub", mmx_rule_mulhub, NULL);
@@ -2255,6 +2468,9 @@ orc_compiler_mmx_register_rules (OrcTarget *target)
REG(minul);
REG(mulll);
orc_rule_register (rule_set, "convsuslw", mmx_rule_convsuslw, NULL);
+#ifndef MMX
+ orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl, NULL);
+#endif
/* SSE 4.2 -- no rules */
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c
index 03a1cad..a14ef65 100644
--- a/orc/orcrules-neon.c
+++ b/orc/orcrules-neon.c
@@ -784,13 +784,13 @@ neon_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
if (src->vartype == ORC_VAR_TYPE_CONST) {
if (size == 1) {
- orc_neon_emit_loadib (compiler, dest->alloc, src->value);
+ orc_neon_emit_loadib (compiler, dest->alloc, src->value.i);
} else if (size == 2) {
- orc_neon_emit_loadiw (compiler, dest->alloc, src->value);
+ orc_neon_emit_loadiw (compiler, dest->alloc, src->value.i);
} else if (size == 4) {
- orc_neon_emit_loadil (compiler, dest->alloc, src->value);
+ orc_neon_emit_loadil (compiler, dest->alloc, src->value.i);
} else if (size == 8) {
- orc_neon_emit_loadiq (compiler, dest->alloc, src->value);
+ orc_neon_emit_loadiq (compiler, dest->alloc, src->value.i);
} else {
ORC_PROGRAM_ERROR(compiler,"unimplemented");
}
@@ -838,7 +838,7 @@ neon_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
ptr_register = compiler->gp_tmpreg;
orc_arm_emit_add_imm (compiler, ptr_register,
src->ptr_register,
- compiler->vars[insn->src_args[1]].value * src->size);
+ compiler->vars[insn->src_args[1]].value.i * src->size);
update = FALSE;
is_aligned = FALSE;
@@ -1454,7 +1454,7 @@ orc_neon_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_uint32 code;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- int shift = p->vars[insn->src_args[1]].value;
+ int shift = p->vars[insn->src_args[1]].value.i;
if (shift < 0) {
ORC_COMPILER_ERROR(p, "shift negative");
return;
@@ -1469,13 +1469,13 @@ orc_neon_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
immshift_info[type].name,
orc_neon_reg_name (p->vars[insn->dest_args[0]].alloc),
orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
- p->vars[insn->src_args[1]].value);
+ (int)p->vars[insn->src_args[1]].value.i);
} else {
ORC_ASM_CODE(p," %s %s, %s, #%d\n",
immshift_info[type].name,
orc_neon_reg_name_quad (p->vars[insn->dest_args[0]].alloc),
orc_neon_reg_name_quad (p->vars[insn->src_args[0]].alloc),
- p->vars[insn->src_args[1]].value);
+ (int)p->vars[insn->src_args[1]].value.i);
code |= 0x40;
}
code |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12;
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index ad66827..d42bc32 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -46,7 +46,7 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
}
#endif
} else if (src->vartype == ORC_VAR_TYPE_CONST) {
- sse_load_constant (compiler, dest->alloc, size, src->value);
+ sse_load_constant (compiler, dest->alloc, size, src->value.i);
} else {
ORC_ASSERT(0);
}
@@ -112,7 +112,7 @@ sse_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
return;
}
- offset = (compiler->offset + compiler->vars[insn->src_args[1]].value) *
+ offset = (compiler->offset + compiler->vars[insn->src_args[1]].value.i) *
src->size;
if (src->ptr_register == 0) {
int i = insn->src_args[0];
@@ -610,7 +610,7 @@ sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
orc_sse_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type],
- p->vars[insn->src_args[1]].value,
+ p->vars[insn->src_args[1]].value.i,
p->vars[insn->dest_args[0]].alloc);
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
int tmp = orc_compiler_get_temp_reg (p);
@@ -636,9 +636,9 @@ sse_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn)
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value, dest);
+ orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
- 0xff&(0xff<<p->vars[insn->src_args[1]].value));
+ 0xff&(0xff<<p->vars[insn->src_args[1]].value.i));
orc_sse_emit_pand (p, tmp, dest);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants");
@@ -656,10 +656,10 @@ sse_rule_shrsb (OrcCompiler *p, void *user, OrcInstruction *insn)
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
orc_sse_emit_movdqa (p, src, tmp);
orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value, tmp);
+ orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp);
orc_sse_emit_psrlw (p, 8, tmp);
- orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value, dest);
+ orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest);
orc_sse_emit_psllw (p, 8, dest);
orc_sse_emit_por (p, tmp, dest);
@@ -676,9 +676,9 @@ sse_rule_shrub (OrcCompiler *p, void *user, OrcInstruction *insn)
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value, dest);
+ orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
- (0xff>>p->vars[insn->src_args[1]].value));
+ (0xff>>p->vars[insn->src_args[1]].value.i));
orc_sse_emit_pand (p, tmp, dest);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants");
@@ -1219,6 +1219,7 @@ sse_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
FALSE);
}
+#ifndef MMX
static void
sse_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
@@ -1235,6 +1236,7 @@ sse_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,3,1), tmp2, tmp2);
orc_sse_emit_punpckldq (p, tmp2, dest);
}
+#endif
static void
sse_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
@@ -1272,6 +1274,7 @@ sse_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE);
}
+#ifndef MMX
static void
sse_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn)
{
@@ -1288,6 +1291,7 @@ sse_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,3,1), tmp2, tmp2);
orc_sse_emit_punpckldq (p, tmp2, dest);
}
+#endif
static void
sse_rule_select0lw (OrcCompiler *p, void *user, OrcInstruction *insn)
@@ -1348,8 +1352,13 @@ sse_rule_splitql (OrcCompiler *p, void *user, OrcInstruction *insn)
int dest1 = p->vars[insn->dest_args[0]].alloc;
int dest2 = p->vars[insn->dest_args[1]].alloc;
+#ifndef MMX
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,2,0), src, dest2);
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(3,1,3,1), src, dest1);
+#else
+ orc_sse_emit_movdqa (p, src, dest2);
+ orc_sse_emit_pshufw (p, ORC_SSE_SHUF(3,2,3,2), src, dest1);
+#endif
}
static void
@@ -2406,7 +2415,9 @@ orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "shrub", sse_rule_shrub, NULL);
orc_rule_register (rule_set, "mulll", sse_rule_mulll_slow, NULL);
orc_rule_register (rule_set, "mulhsl", sse_rule_mulhsl_slow, NULL);
+#ifndef MMX
orc_rule_register (rule_set, "mulhul", sse_rule_mulhul, NULL);
+#endif
orc_rule_register (rule_set, "mullb", sse_rule_mullb, NULL);
orc_rule_register (rule_set, "mulhsb", sse_rule_mulhsb, NULL);
orc_rule_register (rule_set, "mulhub", sse_rule_mulhub, NULL);
@@ -2457,7 +2468,9 @@ orc_compiler_sse_register_rules (OrcTarget *target)
REG(minul);
REG(mulll);
orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL);
+#ifndef MMX
orc_rule_register (rule_set, "mulhsl", sse_rule_mulhsl, NULL);
+#endif
/* SSE 4.2 -- no rules */