diff options
author | David Schleef <ds@schleef.org> | 2010-08-28 02:26:19 +0400 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2010-08-28 02:26:19 +0400 |
commit | 9d286ebc21f784d14edde720f50bb7537c42e72c (patch) | |
tree | 7630598bf78eb06fd8f635112be3c537a9ff6223 /orc | |
parent | ca4bbaaf9067981dc74c5c53c81718e0a1736279 (diff) |
Change constant values to orc_union64 internally
Also update the MMX backend. This implements much of the
infrastructure for 64-bit constants, but parsing of 64-bit
integer constants is broken (no portable strtoll implementation),
and the backends probably don't load 64-bit constants correctly.
Diffstat (limited to 'orc')
-rw-r--r-- | orc/orcexecutor.c | 2 | ||||
-rw-r--r-- | orc/orcparse.c | 38 | ||||
-rw-r--r-- | orc/orcprogram-c.c | 8 | ||||
-rw-r--r-- | orc/orcprogram-c64x-c.c | 2 | ||||
-rw-r--r-- | orc/orcprogram-mmx.c | 2 | ||||
-rw-r--r-- | orc/orcprogram.c | 52 | ||||
-rw-r--r-- | orc/orcprogram.h | 46 | ||||
-rw-r--r-- | orc/orcrules-altivec.c | 6 | ||||
-rw-r--r-- | orc/orcrules-arm.c | 2 | ||||
-rw-r--r-- | orc/orcrules-mmx.c | 304 | ||||
-rw-r--r-- | orc/orcrules-neon.c | 16 | ||||
-rw-r--r-- | orc/orcrules-sse.c | 31 |
12 files changed, 401 insertions, 108 deletions
diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index 3184b4a..e67dd3b 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -265,7 +265,7 @@ orc_executor_emulate (OrcExecutor *ex) opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ load_constant (tmpspace[insn->src_args[k]], 4, - var->value); + var->value.i); } else if (var->vartype == ORC_VAR_TYPE_PARAM) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ diff --git a/orc/orcparse.c b/orc/orcparse.c index 92bb59d..88bb712 100644 --- a/orc/orcparse.c +++ b/orc/orcparse.c @@ -186,16 +186,8 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log) orc_program_add_parameter (parser->program, size, token[2]); } else if (strcmp (token[0], ".const") == 0) { int size = strtol (token[1], NULL, 0); - char *end, *endf; - int value; - double valuef; - value = strtol (token[3], &end, 0); - valuef = strtod (token[3], &endf); - if (endf > end) { - orc_program_add_constant_float (parser->program, size, valuef, token[2]); - } else { - orc_program_add_constant (parser->program, size, value, token[2]); - } + + orc_program_add_constant_str (parser->program, size, token[3], token[2]); } else if (strcmp (token[0], ".floatparam") == 0) { int size = strtol (token[1], NULL, 0); orc_program_add_parameter_float (parser->program, size, token[2]); @@ -220,7 +212,6 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log) if (o) { int n_args = opcode_n_args (o); - char const_regs[10][10]; int i; if (n_tokens != 1 + offset + n_args) { @@ -231,22 +222,11 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log) for(i=offset+1;i<n_tokens;i++){ char *end; - char *endf; - int imm; - double immf; - imm = strtol (token[i], &end, 0); - immf = strtod (token[i], &endf); - if ((end != token[i]) || (endf != token[i])) { - sprintf(const_regs[i], "c%d", parser->creg_index); - parser->creg_index++; - if (end >= endf) { - orc_program_add_constant (parser->program, 2, imm, - const_regs[i]); - } else { - orc_program_add_constant_float (parser->program, 2, immf, - const_regs[i]); - } - token[i] = const_regs[i]; + double d; + d = strtod (token[i], &end); + if (end != token[i]) { + orc_program_add_constant_str (parser->program, 4, token[i], + token[i]); } } @@ -322,7 +302,7 @@ orc_parse_log_valist (OrcParser *parser, const char *format, va_list args) sprintf(s, "In function %s:\n", parser->program->name); len = strlen(s); - if (parser->log_size + len > parser->log_alloc) { + if (parser->log_size + len + 1 >= parser->log_alloc) { parser->log_alloc += 100; parser->log = realloc (parser->log, parser->log_alloc); } @@ -335,7 +315,7 @@ orc_parse_log_valist (OrcParser *parser, const char *format, va_list args) vsprintf(s, format, args); len = strlen(s); - if (parser->log_size + len > parser->log_alloc) { + if (parser->log_size + len + 1 >= parser->log_alloc) { parser->log_alloc += 100; parser->log = realloc (parser->log, parser->log_alloc); } diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index 690219e..7ac4b42 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -442,10 +442,14 @@ c_get_name_int (char *name, OrcCompiler *p, OrcInstruction *insn, int var) } } } else if (p->vars[var].vartype == ORC_VAR_TYPE_CONST) { - if (p->vars[var].value == 0x80000000) { + if (p->vars[var].value.i == 0x80000000) { sprintf(name,"0x80000000"); } else { - sprintf(name, "%d", p->vars[var].value); + if (p->vars[var].value.i == (int)p->vars[var].value.i) { + sprintf(name, "%d", (int)p->vars[var].value.i); + } else { + ORC_ASSERT(0); + } } } else { if (insn && (insn->flags & ORC_INSTRUCTION_FLAG_X2)) { diff --git a/orc/orcprogram-c64x-c.c b/orc/orcprogram-c64x-c.c index 3f92ca9..2728203 100644 --- a/orc/orcprogram-c64x-c.c +++ b/orc/orcprogram-c64x-c.c @@ -262,7 +262,7 @@ orc_compiler_c64x_c_assemble (OrcCompiler *compiler) switch (var->vartype) { case ORC_VAR_TYPE_CONST: { - int value = var->value; + int value = var->value.i; if (var->size == 1) { value = (value&0xff); diff --git a/orc/orcprogram-mmx.c b/orc/orcprogram-mmx.c index 1d80297..872f108 100644 --- a/orc/orcprogram-mmx.c +++ b/orc/orcprogram-mmx.c @@ -673,6 +673,8 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) compiler->n_fixups = 0; } + if (compiler->error) return; + orc_x86_emit_prologue (compiler); #ifndef MMX diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 643e60e..aa9ffcd 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -350,7 +350,7 @@ orc_program_add_constant (OrcProgram *program, int size, int value, const char * program->vars[i].vartype = ORC_VAR_TYPE_CONST; program->vars[i].size = size; - program->vars[i].value = value; + program->vars[i].value.i = value; program->vars[i].name = strdup(name); program->n_const_vars++; @@ -361,7 +361,17 @@ int orc_program_add_constant_int64 (OrcProgram *program, int size, orc_int64 value, const char *name) { - ORC_ASSERT(0); + int i; + + i = ORC_VAR_C1 + program->n_const_vars; + + program->vars[i].vartype = ORC_VAR_TYPE_CONST; + program->vars[i].size = size; + program->vars[i].value.i = value; + program->vars[i].name = strdup(name); + program->n_const_vars++; + + return i; } int @@ -382,6 +392,44 @@ orc_program_add_constant_double (OrcProgram *program, int size, return orc_program_add_constant_int64 (program, size, u.i, name); } +int +orc_program_add_constant_str (OrcProgram *program, int size, + const char *value, const char *name) +{ + int i; + char *end; + int val_i; + double val_d; + + i = ORC_VAR_C1 + program->n_const_vars; + + val_i = strtol (value, &end, 0); + if (end[0] == 0) { + program->vars[i].value.i = val_i; + } else if ((end[0] == 'l' || end[0] == 'L') && end[1] == 0) { + program->vars[i].value.i = val_i; + } else { + val_d = strtod (value, &end); + + if (end[0] == 0) { + orc_union32 u; + u.f = val_d; + program->vars[i].value.i = u.i; + } else if ((end[0] == 'l' || end[0] == 'L') && end[1] == 0) { + program->vars[i].value.f = val_d; + } else { + return -1; + } + } + + program->vars[i].vartype = ORC_VAR_TYPE_CONST; + program->vars[i].size = size; + program->vars[i].name = strdup(name); + program->n_const_vars++; + + return i; +} + /** * orc_program_add_parameter: * @program: a pointer to an OrcProgram structure diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 05fd592..de448d1 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -223,7 +223,7 @@ struct _OrcVariable { int is_aligned; int is_uncached; - int value; + orc_union64 value; int ptr_register; int ptr_offset; @@ -355,7 +355,34 @@ struct _OrcProgram { } _unused[ORC_N_INSNS]; /* needed for ABI compatibility */ int n_insns; - OrcVariable vars[ORC_N_VARIABLES]; + struct { + char *name; + char *type_name; + + int size; + OrcVarType vartype; + + int used; + int first_use; + int last_use; + int replaced; + int replacement; + + int alloc; + int is_chained; + int is_aligned; + int is_uncached; + + int value; + + int ptr_register; + int ptr_offset; + int mask_alloc; + int aligned_data; + int param_type; + int load_dest; + } _unused3[ORC_N_VARIABLES]; /* needed for ABI compatibility */ + int n_src_vars; int n_dest_vars; int n_param_vars; @@ -371,6 +398,7 @@ struct _OrcProgram { void *code_exec; OrcInstruction insns[ORC_N_INSNS]; + OrcVariable vars[ORC_N_VARIABLES]; void *backup_func; int is_2d; @@ -455,8 +483,8 @@ struct _OrcCompiler { #define ORC_DEST_ARG(p,i,n) ((p)->vars[(i)->dest_args[(n)]].alloc) #define ORC_SRC_TYPE(p,i,n) ((p)->vars[(i)->src_args[(n)]].vartype) #define ORC_DEST_TYPE(p,i,n) ((p)->vars[(i)->dest_args[(n)]].vartype) -#define ORC_SRC_VAL(p,i,n) ((p)->vars[(i)->src_args[(n)]].value) -#define ORC_DEST_VAL(p,i,n) ((p)->vars[(i)->dest_args[(n)]].value) +#define ORC_SRC_VAL(p,insn,n) ((p)->vars[(insn)->src_args[(n)]].value.i) +#define ORC_DEST_VAL(p,insn,n) ((p)->vars[(insn)->dest_args[(n)]].value.i) /** * OrcOpcodeExecutor: @@ -496,7 +524,7 @@ struct _OrcExecutor { /* m is stored in params[ORC_VAR_A1] */ /* m_index is stored in params[ORC_VAR_A2] */ /* elapsed time is stored in params[ORC_VAR_A3] */ - /* source resampling parameters are in params[ORC_VAR_C1..C8] */ + /* high half of params is stored in params[ORC_VAR_T1..] */ }; /* the alternate view of OrcExecutor */ @@ -516,9 +544,10 @@ struct _OrcExecutorAlt { int m_index; int time; int unused2; - int src_resample[8]; + int unused4[8]; int params[ORC_VAR_T1-ORC_VAR_P1]; - int unused3[ORC_N_VARIABLES - ORC_VAR_T1]; + int params_hi[ORC_VAR_T1-ORC_VAR_P1]; + int unused3[ORC_N_VARIABLES - ORC_VAR_T9]; int accumulators[4]; }; #define ORC_EXECUTOR_EXEC(ex) ((OrcExecutorFunc)((ex)->arrays[ORC_VAR_A1])) @@ -531,7 +560,7 @@ struct _OrcCodeVariable { /*< private >*/ int vartype; int size; - int value; + orc_union64 value; }; struct _OrcCode { @@ -636,6 +665,7 @@ int orc_program_add_constant (OrcProgram *program, int size, int value, const ch int orc_program_add_constant_int64 (OrcProgram *program, int size, orc_int64 value, const char *name); int orc_program_add_constant_float (OrcProgram *program, int size, float value, const char *name); int orc_program_add_constant_double (OrcProgram *program, int size, double value, const char *name); +int orc_program_add_constant_str (OrcProgram *program, int size, const char *value, const char *name); int orc_program_add_parameter (OrcProgram *program, int size, const char *name); int orc_program_add_parameter_float (OrcProgram *program, int size, const char *name); int orc_program_add_parameter_double (OrcProgram *program, int size, const char *name); diff --git a/orc/orcrules-altivec.c b/orc/orcrules-altivec.c index 11a4679..e86802c 100644 --- a/orc/orcrules-altivec.c +++ b/orc/orcrules-altivec.c @@ -66,7 +66,7 @@ powerpc_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) break; } } else { - int value = src->value; + int value = src->value.i; switch (size) { case 1: @@ -253,9 +253,9 @@ powerpc_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \ int dest = ORC_DEST_ARG (p, insn, 0); \ if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { \ ORC_ASM_CODE(p," vspltisb %s, %d\n", \ - powerpc_get_regname(p->tmpreg), p->vars[insn->src_args[1]].value); \ + powerpc_get_regname(p->tmpreg), (int)p->vars[insn->src_args[1]].value.i); \ powerpc_emit_VX(p, 0x1000030c, \ - powerpc_regnum(p->tmpreg), p->vars[insn->src_args[1]].value, 0); \ + powerpc_regnum(p->tmpreg), (int)p->vars[insn->src_args[1]].value.i, 0); \ powerpc_emit_VX_2 (p, opcode, code , dest, src1, p->tmpreg);\ } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { \ ORC_COMPILER_ERROR(p,"rule only works with constants"); \ diff --git a/orc/orcrules-arm.c b/orc/orcrules-arm.c index fcc8353..dacd8dc 100644 --- a/orc/orcrules-arm.c +++ b/orc/orcrules-arm.c @@ -83,7 +83,7 @@ arm_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) { if (compiler->vars[insn->src_args[0]].vartype == ORC_VAR_TYPE_CONST) { orc_arm_emit_load_imm (compiler, compiler->vars[insn->dest_args[0]].alloc, - (int)compiler->vars[insn->src_args[0]].value); + (int)compiler->vars[insn->src_args[0]].value.i); } else { orc_arm_loadw (compiler, compiler->vars[insn->dest_args[0]].alloc, compiler->exec_reg, diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c index 9debb67..e41908f 100644 --- a/orc/orcrules-mmx.c +++ b/orc/orcrules-mmx.c @@ -46,7 +46,7 @@ mmx_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) } #endif } else if (src->vartype == ORC_VAR_TYPE_CONST) { - mmx_load_constant (compiler, dest->alloc, size, src->value); + mmx_load_constant (compiler, dest->alloc, size, src->value.i); } else { ORC_ASSERT(0); } @@ -112,7 +112,7 @@ mmx_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn) return; } - offset = (compiler->offset + compiler->vars[insn->src_args[1]].value) * + offset = (compiler->offset + compiler->vars[insn->src_args[1]].value.i) * src->size; if (src->ptr_register == 0) { int i = insn->src_args[0]; @@ -610,7 +610,7 @@ mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type], - p->vars[insn->src_args[1]].value, + p->vars[insn->src_args[1]].value.i, p->vars[insn->dest_args[0]].alloc); } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { int tmp = orc_compiler_get_temp_reg (p); @@ -636,9 +636,9 @@ mmx_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_mmx_emit_psllw (p, p->vars[insn->src_args[1]].value, dest); + orc_mmx_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, - 0xff&(0xff<<p->vars[insn->src_args[1]].value)); + 0xff&(0xff<<p->vars[insn->src_args[1]].value.i)); orc_mmx_emit_pand (p, tmp, dest); } else { ORC_COMPILER_ERROR(p,"rule only works with constants"); @@ -656,10 +656,10 @@ mmx_rule_shrsb (OrcCompiler *p, void *user, OrcInstruction *insn) if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { orc_mmx_emit_movq (p, src, tmp); orc_mmx_emit_psllw (p, 8, tmp); - orc_mmx_emit_psraw (p, p->vars[insn->src_args[1]].value, tmp); + orc_mmx_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp); orc_mmx_emit_psrlw (p, 8, tmp); - orc_mmx_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value, dest); + orc_mmx_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest); orc_mmx_emit_psllw (p, 8, dest); orc_mmx_emit_por (p, tmp, dest); @@ -676,9 +676,9 @@ mmx_rule_shrub (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_mmx_emit_psrlw (p, p->vars[insn->src_args[1]].value, dest); + orc_mmx_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, - (0xff>>p->vars[insn->src_args[1]].value)); + (0xff>>p->vars[insn->src_args[1]].value.i)); orc_mmx_emit_pand (p, tmp, dest); } else { ORC_COMPILER_ERROR(p,"rule only works with constants"); @@ -1219,6 +1219,25 @@ mmx_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn) FALSE); } +#ifndef MMX +static void +mmx_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = orc_compiler_get_temp_reg (p); + int tmp2 = orc_compiler_get_temp_reg (p); + + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), dest, tmp); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), src, tmp2); + orc_mmx_emit_pmuldq (p, src, dest); + orc_mmx_emit_pmuldq (p, tmp, tmp2); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), dest, dest); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), tmp2, tmp2); + orc_mmx_emit_punpckldq (p, tmp2, dest); +} +#endif + static void mmx_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -1255,41 +1274,24 @@ mmx_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE); } +#ifndef MMX static void -mmx_rule_mulhul_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +mmx_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn) { - int i; - int regsize = p->is_64bit ? 8 : 4; - int stackframe; - - stackframe = 32 + 2*regsize; - stackframe = (stackframe + 0xf) & (~0xf); - - orc_x86_emit_add_imm_reg (p, regsize, -stackframe, X86_ESP, FALSE); - orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[0]].alloc, - 0, X86_ESP, FALSE, FALSE); - orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[1]].alloc, - 16, X86_ESP, FALSE, FALSE); - orc_x86_emit_mov_reg_memoffset (p, 4, X86_EAX, 32, X86_ESP); - orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, 32 + regsize, X86_ESP); - - for(i=0;i<(1<<p->loop_shift);i++) { - orc_x86_emit_mov_memoffset_reg (p, 4, 4*i, X86_ESP, X86_EAX); - ORC_ASM_CODE(p," mull %d(%%%s)\n", 16+4*i, - orc_x86_get_regname_ptr(p, X86_ESP)); - orc_x86_emit_rex(p, 4, 0, 0, X86_ESP); - *p->codeptr++ = 0xf7; - orc_x86_emit_modrm_memoffset (p, 4, 16+4*i, X86_ESP); - orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, 4*i, X86_ESP); - } - - orc_x86_emit_mov_memoffset_mmx (p, 16, 0, X86_ESP, - p->vars[insn->dest_args[0]].alloc, FALSE); - orc_x86_emit_mov_memoffset_reg (p, 4, 32, X86_ESP, X86_EAX); - orc_x86_emit_mov_memoffset_reg (p, 4, 32 + regsize, X86_ESP, X86_EDX); + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = orc_compiler_get_temp_reg (p); + int tmp2 = orc_compiler_get_temp_reg (p); - orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), dest, tmp); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,3,0,1), src, tmp2); + orc_mmx_emit_pmuludq (p, src, dest); + orc_mmx_emit_pmuludq (p, tmp, tmp2); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), dest, dest); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,3,1), tmp2, tmp2); + orc_mmx_emit_punpckldq (p, tmp2, dest); } +#endif static void mmx_rule_select0lw (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -1344,6 +1346,22 @@ mmx_rule_select1wb (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +mmx_rule_splitql (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest1 = p->vars[insn->dest_args[0]].alloc; + int dest2 = p->vars[insn->dest_args[1]].alloc; + +#ifndef MMX + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(2,0,2,0), src, dest2); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(3,1,3,1), src, dest1); +#else + orc_mmx_emit_movq (p, src, dest2); + orc_mmx_emit_pshufw (p, ORC_MMX_SHUF(3,2,3,2), src, dest1); +#endif +} + +static void mmx_rule_splitlw (OrcCompiler *p, void *user, OrcInstruction *insn) { int src = p->vars[insn->src_args[0]].alloc; @@ -1439,6 +1457,27 @@ mmx_rule_swapl (OrcCompiler *p, void *user, OrcInstruction *insn) orc_mmx_emit_por (p, tmp, dest); } +static void +mmx_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = orc_compiler_get_temp_reg (p); + + orc_mmx_emit_movq (p, src, tmp); + orc_mmx_emit_psllq (p, 32, tmp); + orc_mmx_emit_psrlq (p, 32, dest); + orc_mmx_emit_por (p, tmp, dest); + orc_mmx_emit_movq (p, dest, tmp); + orc_mmx_emit_pslld (p, 16, tmp); + orc_mmx_emit_psrld (p, 16, dest); + orc_mmx_emit_por (p, tmp, dest); + orc_mmx_emit_movq (p, dest, tmp); + orc_mmx_emit_psllw (p, 8, tmp); + orc_mmx_emit_psrlw (p, 8, dest); + orc_mmx_emit_por (p, tmp, dest); +} + #define LOAD_MASK_IS_SLOW #ifndef LOAD_MASK_IS_SLOW static void @@ -1947,6 +1986,30 @@ BINARY_F(mulf, "mulps", 0x59) BINARY_F(divf, "divps", 0x5e) UNARY_F(sqrtf, "sqrtps", 0x51) +#define UNARY_D(opcode,insn_name,code) \ +static void \ +mmx_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + orc_mmx_emit_660f (p, insn_name, code, \ + p->vars[insn->src_args[0]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +#define BINARY_D(opcode,insn_name,code) \ +static void \ +mmx_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + orc_mmx_emit_660f (p, insn_name, code, \ + p->vars[insn->src_args[1]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +BINARY_D(addd, "addpd", 0x58) +BINARY_D(subd, "subpd", 0x5c) +BINARY_D(muld, "mulpd", 0x59) +BINARY_D(divd, "divpd", 0x5e) +UNARY_D(sqrtd, "sqrtpd", 0x51) + static void mmx_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -1972,6 +2035,30 @@ mmx_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +mmx_rule_mind (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + if (p->target_flags & ORC_TARGET_FAST_NAN) { + orc_mmx_emit_660f (p, "minpd", 0x5d, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + } else { + int tmp = orc_compiler_get_temp_reg (p); + orc_mmx_emit_movq (p, + p->vars[insn->src_args[1]].alloc, + tmp); + orc_mmx_emit_660f (p, "minpd", 0x5d, + p->vars[insn->dest_args[0]].alloc, + tmp); + orc_mmx_emit_660f (p, "minpd", 0x5d, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + orc_mmx_emit_por (p, + tmp, + p->vars[insn->dest_args[0]].alloc); + } +} + +static void mmx_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_FAST_NAN) { @@ -1996,6 +2083,30 @@ mmx_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +mmx_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + if (p->target_flags & ORC_TARGET_FAST_NAN) { + orc_mmx_emit_660f (p, "maxpd", 0x5f, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + } else { + int tmp = orc_compiler_get_temp_reg (p); + orc_mmx_emit_movq (p, + p->vars[insn->src_args[1]].alloc, + tmp); + orc_mmx_emit_660f (p, "maxpd", 0x5f, + p->vars[insn->dest_args[0]].alloc, + tmp); + orc_mmx_emit_660f (p, "maxpd", 0x5f, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + orc_mmx_emit_por (p, + tmp, + p->vars[insn->dest_args[0]].alloc); + } +} + +static void mmx_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn) { orc_mmx_emit_0f (p, "cmpeqps", 0xc2, @@ -2005,6 +2116,16 @@ mmx_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +mmx_rule_cmpeqd (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_mmx_emit_660f (p, "cmpeqpd", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x00; +} + + +static void mmx_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn) { orc_mmx_emit_0f (p, "cmpltps", 0xc2, @@ -2014,6 +2135,16 @@ mmx_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +mmx_rule_cmpltd (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_mmx_emit_660f (p, "cmpltpd", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x01; +} + + +static void mmx_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn) { orc_mmx_emit_0f (p, "cmpleps", 0xc2, @@ -2023,11 +2154,48 @@ mmx_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn) } static void -mmx_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn) +mmx_rule_cmpled (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_mmx_emit_f30f (p, "cvttps2dq", 0x5b, - p->vars[insn->src_args[0]].alloc, + orc_mmx_emit_660f (p, "cmplepd", 0xc2, + p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x02; +} + + +static void +mmx_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmpc; + int tmp = orc_compiler_get_temp_reg (p); + + tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000); + orc_mmx_emit_movq (p, src, tmp); + orc_mmx_emit_f30f (p, "cvttps2dq", 0x5b, src, dest); + orc_mmx_emit_psrad (p, 31, tmp); + orc_mmx_emit_pcmpeqd (p, dest, tmpc); + orc_mmx_emit_pandn (p, tmpc, tmp); + orc_mmx_emit_paddd (p, tmp, dest); + +} + +static void +mmx_rule_convdl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmpc; + int tmp = orc_compiler_get_temp_reg (p); + + tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000); + orc_mmx_emit_pshufd (p, ORC_MMX_SHUF(3,1,3,1), src, tmp); + orc_mmx_emit_660f (p, "cvttpd2dq", 0xe6, src, dest); + orc_mmx_emit_psrad (p, 31, tmp); + orc_mmx_emit_pcmpeqd (p, dest, tmpc); + orc_mmx_emit_pandn (p, tmpc, tmp); + orc_mmx_emit_paddd (p, tmp, dest); } static void @@ -2037,6 +2205,30 @@ mmx_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc, p->vars[insn->dest_args[0]].alloc); } + +static void +mmx_rule_convld (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_mmx_emit_f30f (p, "cvtdq2pd", 0xe6, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +mmx_rule_convfd (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_mmx_emit_0f (p, "cvtps2pd", 0x5a, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +mmx_rule_convdf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_mmx_emit_660f (p, "cvtpd2ps", 0x5a, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} #endif void @@ -2129,6 +2321,7 @@ orc_compiler_mmx_register_rules (OrcTarget *target) orc_rule_register (rule_set, "copyb", mmx_rule_copyx, NULL); orc_rule_register (rule_set, "copyw", mmx_rule_copyx, NULL); orc_rule_register (rule_set, "copyl", mmx_rule_copyx, NULL); + orc_rule_register (rule_set, "copyq", mmx_rule_copyx, NULL); orc_rule_register (rule_set, "shlw", mmx_rule_shift, (void *)0); orc_rule_register (rule_set, "shruw", mmx_rule_shift, (void *)1); @@ -2175,6 +2368,22 @@ orc_compiler_mmx_register_rules (OrcTarget *target) orc_rule_register (rule_set, "cmplef", mmx_rule_cmplef, NULL); orc_rule_register (rule_set, "convfl", mmx_rule_convfl, NULL); orc_rule_register (rule_set, "convlf", mmx_rule_convlf, NULL); + + orc_rule_register (rule_set, "addd", mmx_rule_addd, NULL); + orc_rule_register (rule_set, "subd", mmx_rule_subd, NULL); + orc_rule_register (rule_set, "muld", mmx_rule_muld, NULL); + orc_rule_register (rule_set, "divd", mmx_rule_divd, NULL); + orc_rule_register (rule_set, "mind", mmx_rule_mind, NULL); + orc_rule_register (rule_set, "maxd", mmx_rule_maxd, NULL); + orc_rule_register (rule_set, "sqrtd", mmx_rule_sqrtd, NULL); + orc_rule_register (rule_set, "cmpeqd", mmx_rule_cmpeqd, NULL); + orc_rule_register (rule_set, "cmpltd", mmx_rule_cmpltd, NULL); + orc_rule_register (rule_set, "cmpled", mmx_rule_cmpled, NULL); + orc_rule_register (rule_set, "convdl", mmx_rule_convdl, NULL); + orc_rule_register (rule_set, "convld", mmx_rule_convld, NULL); + + orc_rule_register (rule_set, "convfd", mmx_rule_convfd, NULL); + orc_rule_register (rule_set, "convdf", mmx_rule_convdf, NULL); #endif /* slow rules */ @@ -2195,6 +2404,8 @@ orc_compiler_mmx_register_rules (OrcTarget *target) orc_rule_register (rule_set, "absl", mmx_rule_absl_slow, NULL); orc_rule_register (rule_set, "swapw", mmx_rule_swapw, NULL); orc_rule_register (rule_set, "swapl", mmx_rule_swapl, NULL); + orc_rule_register (rule_set, "swapq", mmx_rule_swapq, NULL); + orc_rule_register (rule_set, "splitql", mmx_rule_splitql, NULL); orc_rule_register (rule_set, "splitlw", mmx_rule_splitlw, NULL); orc_rule_register (rule_set, "splitwb", mmx_rule_splitwb, NULL); orc_rule_register (rule_set, "avgsl", mmx_rule_avgsl, NULL); @@ -2204,7 +2415,9 @@ orc_compiler_mmx_register_rules (OrcTarget *target) orc_rule_register (rule_set, "shrub", mmx_rule_shrub, NULL); orc_rule_register (rule_set, "mulll", mmx_rule_mulll_slow, NULL); orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl_slow, NULL); - orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul_slow, NULL); +#ifndef MMX + orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul, NULL); +#endif orc_rule_register (rule_set, "mullb", mmx_rule_mullb, NULL); orc_rule_register (rule_set, "mulhsb", mmx_rule_mulhsb, NULL); orc_rule_register (rule_set, "mulhub", mmx_rule_mulhub, NULL); @@ -2255,6 +2468,9 @@ orc_compiler_mmx_register_rules (OrcTarget *target) REG(minul); REG(mulll); orc_rule_register (rule_set, "convsuslw", mmx_rule_convsuslw, NULL); +#ifndef MMX + orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl, NULL); +#endif /* SSE 4.2 -- no rules */ diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index 03a1cad..a14ef65 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -784,13 +784,13 @@ neon_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) if (src->vartype == ORC_VAR_TYPE_CONST) { if (size == 1) { - orc_neon_emit_loadib (compiler, dest->alloc, src->value); + orc_neon_emit_loadib (compiler, dest->alloc, src->value.i); } else if (size == 2) { - orc_neon_emit_loadiw (compiler, dest->alloc, src->value); + orc_neon_emit_loadiw (compiler, dest->alloc, src->value.i); } else if (size == 4) { - orc_neon_emit_loadil (compiler, dest->alloc, src->value); + orc_neon_emit_loadil (compiler, dest->alloc, src->value.i); } else if (size == 8) { - orc_neon_emit_loadiq (compiler, dest->alloc, src->value); + orc_neon_emit_loadiq (compiler, dest->alloc, src->value.i); } else { ORC_PROGRAM_ERROR(compiler,"unimplemented"); } @@ -838,7 +838,7 @@ neon_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) ptr_register = compiler->gp_tmpreg; orc_arm_emit_add_imm (compiler, ptr_register, src->ptr_register, - compiler->vars[insn->src_args[1]].value * src->size); + compiler->vars[insn->src_args[1]].value.i * src->size); update = FALSE; is_aligned = FALSE; @@ -1454,7 +1454,7 @@ orc_neon_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) orc_uint32 code; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - int shift = p->vars[insn->src_args[1]].value; + int shift = p->vars[insn->src_args[1]].value.i; if (shift < 0) { ORC_COMPILER_ERROR(p, "shift negative"); return; @@ -1469,13 +1469,13 @@ orc_neon_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) immshift_info[type].name, orc_neon_reg_name (p->vars[insn->dest_args[0]].alloc), orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), - p->vars[insn->src_args[1]].value); + (int)p->vars[insn->src_args[1]].value.i); } else { ORC_ASM_CODE(p," %s %s, %s, #%d\n", immshift_info[type].name, orc_neon_reg_name_quad (p->vars[insn->dest_args[0]].alloc), orc_neon_reg_name_quad (p->vars[insn->src_args[0]].alloc), - p->vars[insn->src_args[1]].value); + (int)p->vars[insn->src_args[1]].value.i); code |= 0x40; } code |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12; diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index ad66827..d42bc32 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -46,7 +46,7 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) } #endif } else if (src->vartype == ORC_VAR_TYPE_CONST) { - sse_load_constant (compiler, dest->alloc, size, src->value); + sse_load_constant (compiler, dest->alloc, size, src->value.i); } else { ORC_ASSERT(0); } @@ -112,7 +112,7 @@ sse_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn) return; } - offset = (compiler->offset + compiler->vars[insn->src_args[1]].value) * + offset = (compiler->offset + compiler->vars[insn->src_args[1]].value.i) * src->size; if (src->ptr_register == 0) { int i = insn->src_args[0]; @@ -610,7 +610,7 @@ sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { orc_sse_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type], - p->vars[insn->src_args[1]].value, + p->vars[insn->src_args[1]].value.i, p->vars[insn->dest_args[0]].alloc); } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { int tmp = orc_compiler_get_temp_reg (p); @@ -636,9 +636,9 @@ sse_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value, dest); + orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, - 0xff&(0xff<<p->vars[insn->src_args[1]].value)); + 0xff&(0xff<<p->vars[insn->src_args[1]].value.i)); orc_sse_emit_pand (p, tmp, dest); } else { ORC_COMPILER_ERROR(p,"rule only works with constants"); @@ -656,10 +656,10 @@ sse_rule_shrsb (OrcCompiler *p, void *user, OrcInstruction *insn) if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value, tmp); + orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp); orc_sse_emit_psrlw (p, 8, tmp); - orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value, dest); + orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest); orc_sse_emit_psllw (p, 8, dest); orc_sse_emit_por (p, tmp, dest); @@ -676,9 +676,9 @@ sse_rule_shrub (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value, dest); + orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, - (0xff>>p->vars[insn->src_args[1]].value)); + (0xff>>p->vars[insn->src_args[1]].value.i)); orc_sse_emit_pand (p, tmp, dest); } else { ORC_COMPILER_ERROR(p,"rule only works with constants"); @@ -1219,6 +1219,7 @@ sse_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn) FALSE); } +#ifndef MMX static void sse_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -1235,6 +1236,7 @@ sse_rule_mulhsl (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,3,1), tmp2, tmp2); orc_sse_emit_punpckldq (p, tmp2, dest); } +#endif static void sse_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -1272,6 +1274,7 @@ sse_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_x86_emit_add_imm_reg (p, regsize, stackframe, X86_ESP, FALSE); } +#ifndef MMX static void sse_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -1288,6 +1291,7 @@ sse_rule_mulhul (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,3,1), tmp2, tmp2); orc_sse_emit_punpckldq (p, tmp2, dest); } +#endif static void sse_rule_select0lw (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -1348,8 +1352,13 @@ sse_rule_splitql (OrcCompiler *p, void *user, OrcInstruction *insn) int dest1 = p->vars[insn->dest_args[0]].alloc; int dest2 = p->vars[insn->dest_args[1]].alloc; +#ifndef MMX orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,2,0), src, dest2); orc_sse_emit_pshufd (p, ORC_SSE_SHUF(3,1,3,1), src, dest1); +#else + orc_sse_emit_movdqa (p, src, dest2); + orc_sse_emit_pshufw (p, ORC_SSE_SHUF(3,2,3,2), src, dest1); +#endif } static void @@ -2406,7 +2415,9 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "shrub", sse_rule_shrub, NULL); orc_rule_register (rule_set, "mulll", sse_rule_mulll_slow, NULL); orc_rule_register (rule_set, "mulhsl", sse_rule_mulhsl_slow, NULL); +#ifndef MMX orc_rule_register (rule_set, "mulhul", sse_rule_mulhul, NULL); +#endif orc_rule_register (rule_set, "mullb", sse_rule_mullb, NULL); orc_rule_register (rule_set, "mulhsb", sse_rule_mulhsb, NULL); orc_rule_register (rule_set, "mulhub", sse_rule_mulhub, NULL); @@ -2457,7 +2468,9 @@ orc_compiler_sse_register_rules (OrcTarget *target) REG(minul); REG(mulll); orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL); +#ifndef MMX orc_rule_register (rule_set, "mulhsl", sse_rule_mulhsl, NULL); +#endif /* SSE 4.2 -- no rules */ |