Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dosbox-staging/dosbox-staging.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjmarsh <jmarsh@vogons.org>2019-10-06 20:41:00 +0300
committerPatryk Obara <dreamer.tan@gmail.com>2020-02-24 16:47:47 +0300
commit26ecc855c10b7b2b3b7eaee421e9a2c5d67c8ade (patch)
tree0d4594302bd8ca688657f2cb1bc0a43c716bdfde
parent57bf045cc84de2f12dbea071fe8e9cba89f9fe31 (diff)
Add PowerPC dynamic recompilervogons/ppc-dynrec-r4330
This patch adds a dynamic recompiler for 32-bit PowerPC, based on the existing dynrec framework. I've only tested it on a wii but there should be no reason for it not to work on PowerPC based Macs. As far as performance goes with core=normal I get 0.7fps from PCPBENCH, with core=dynamic I get 3.1fps. There are some other big-endian improvements that can be made that get it up to 4.0 but I haven't included them here as they aren't related to dynrec. I haven't touched any of the autoconfigure scripts, config.h needs the following settings: The compiler needs to support gcc inline assembly (checked via defined(__GNUC__)) for dcache flushing/icache invalidation. There doesn't seem to be a portable way to achieve this, but they're not supervisor level instructions so should be fine for any userspace program to use. Some comments on the changes: - I had to name the FPU_Rec struct so it could be forward-declared in risc_ppc.h (having a dedicated register pointed to it helps FPU heavy code). - Removed some unneeded WORDS_BIGENDIAN guards in the self-modifying code detection, they weren't needed as the additions aren't meant to overflow between bytes. - Made dyn_run_code() get called before dyn_return(BR_Link1/BR_Link2) and shuffled their locations a bit. The reason for this is that the PPC dynrec generates its epilog once in gen_run_code() and then puts a jump to it whenever gen_return_function() is called, rather than emitting a full epilog every time. If dyn_return() was called before dyn_run_code() the address of the epilog is unknown. - Added missing cache_block_before_close()/cache_block_closing() calls for those blocks, since they were missing. - The dynrec decoder wasn't differentiating between little-endian (host) memory access and regular memory access. I added new functions where necessary (hopefully caught them all) and aliased them to the regular functions when WORDS_BIGENDIAN is not defined. - dyn_ret_near() was bugged, it tried to write a dword to &reg_ip which overran on big-endian. Imported-from: https://www.vogons.org/viewtopic.php?p=731185#p731185
-rw-r--r--include/fpu.h2
-rw-r--r--src/cpu/core_dynrec.cpp8
-rw-r--r--src/cpu/core_dynrec/Makefile.am3
-rw-r--r--src/cpu/core_dynrec/cache.h61
-rw-r--r--src/cpu/core_dynrec/decoder_basic.h17
-rw-r--r--src/cpu/core_dynrec/decoder_opcodes.h17
-rw-r--r--src/cpu/core_dynrec/risc_ppc.h901
7 files changed, 963 insertions, 46 deletions
diff --git a/include/fpu.h b/include/fpu.h
index 44acd31bc..2b7cee5ec 100644
--- a/include/fpu.h
+++ b/include/fpu.h
@@ -85,7 +85,7 @@ enum FPU_Round {
ROUND_Chop = 3
};
-typedef struct {
+typedef struct FPU_rec {
FPU_Reg regs[9];
FPU_P_Reg p_regs[9];
FPU_Tag tags[9];
diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp
index 9e232916a..b84cb4288 100644
--- a/src/cpu/core_dynrec.cpp
+++ b/src/cpu/core_dynrec.cpp
@@ -138,6 +138,7 @@ static struct {
#define MIPSEL 0x03
#define ARMV4LE 0x04
#define ARMV7LE 0x05
+#define POWERPC 0x06
#define ARMV8LE 0x07
#if C_TARGETCPU == X86_64
@@ -148,10 +149,17 @@ static struct {
#include "core_dynrec/risc_mipsel32.h"
#elif (C_TARGETCPU == ARMV4LE) || (C_TARGETCPU == ARMV7LE)
#include "core_dynrec/risc_armv4le.h"
+#elif C_TARGETCPU == POWERPC
+#include "core_dynrec/risc_ppc.h"
#elif C_TARGETCPU == ARMV8LE
#include "core_dynrec/risc_armv8le.h"
#endif
+#if !defined(WORDS_BIGENDIAN)
+#define gen_add_LE gen_add
+#define gen_mov_LE_word_to_reg gen_mov_word_to_reg
+#endif
+
#include "core_dynrec/decoder.h"
CacheBlockDynRec * LinkBlocks(BlockReturn ret) {
diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am
index f135543e8..57e7258ee 100644
--- a/src/cpu/core_dynrec/Makefile.am
+++ b/src/cpu/core_dynrec/Makefile.am
@@ -2,4 +2,5 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \
dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \
risc_armv4le.h risc_armv4le-common.h \
risc_armv4le-o3.h risc_armv4le-thumb.h \
- risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h
+ risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \
+ risc_ppc.h
diff --git a/src/cpu/core_dynrec/cache.h b/src/cpu/core_dynrec/cache.h
index 9ae81eb3f..faee31674 100644
--- a/src/cpu/core_dynrec/cache.h
+++ b/src/cpu/core_dynrec/cache.h
@@ -145,7 +145,7 @@ public:
if (host_readb(hostmem+addr)==(Bit8u)val) return;
host_writeb(hostmem+addr,val);
// see if there's code where we are writing to
- if (!host_readb(&write_map[addr])) {
+ if (!write_map[addr]) {
if (active_blocks) return; // still some blocks in this page
active_count--;
if (!active_count) Release(); // delay page releasing until active_count is zero
@@ -162,7 +162,7 @@ public:
if (host_readw(hostmem+addr)==(Bit16u)val) return;
host_writew(hostmem+addr,val);
// see if there's code where we are writing to
- if (!host_readw(&write_map[addr])) {
+ if (!*(Bit16u*)&write_map[addr]) {
if (active_blocks) return; // still some blocks in this page
active_count--;
if (!active_count) Release(); // delay page releasing until active_count is zero
@@ -171,7 +171,7 @@ public:
invalidation_map=(Bit8u*)malloc(4096);
memset(invalidation_map,0,4096);
}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
host_writew(&invalidation_map[addr],
host_readw(&invalidation_map[addr])+0x101);
#else
@@ -184,7 +184,7 @@ public:
if (host_readd(hostmem+addr)==(Bit32u)val) return;
host_writed(hostmem+addr,val);
// see if there's code where we are writing to
- if (!host_readd(&write_map[addr])) {
+ if (!*(Bit32u*)&write_map[addr]) {
if (active_blocks) return; // still some blocks in this page
active_count--;
if (!active_count) Release(); // delay page releasing until active_count is zero
@@ -193,7 +193,7 @@ public:
invalidation_map=(Bit8u*)malloc(4096);
memset(invalidation_map,0,4096);
}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
host_writed(&invalidation_map[addr],
host_readd(&invalidation_map[addr])+0x1010101);
#else
@@ -229,7 +229,7 @@ public:
addr&=4095;
if (host_readw(hostmem+addr)==(Bit16u)val) return false;
// see if there's code where we are writing to
- if (!host_readw(&write_map[addr])) {
+ if (!*(Bit16u*)&write_map[addr]) {
if (!active_blocks) {
// no blocks left in this page, still delay the page releasing a bit
active_count--;
@@ -240,7 +240,7 @@ public:
invalidation_map=(Bit8u*)malloc(4096);
memset(invalidation_map,0,4096);
}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
host_writew(&invalidation_map[addr],
host_readw(&invalidation_map[addr])+0x101);
#else
@@ -258,7 +258,7 @@ public:
addr&=4095;
if (host_readd(hostmem+addr)==(Bit32u)val) return false;
// see if there's code where we are writing to
- if (!host_readd(&write_map[addr])) {
+ if (!*(Bit32u*)&write_map[addr]) {
if (!active_blocks) {
// no blocks left in this page, still delay the page releasing a bit
active_count--;
@@ -269,7 +269,7 @@ public:
invalidation_map=(Bit8u*)malloc(4096);
memset(invalidation_map,0,4096);
}
-#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
+#if !defined(C_UNALIGNED_MEMORY)
host_writed(&invalidation_map[addr],
host_readd(&invalidation_map[addr])+0x1010101);
#else
@@ -372,11 +372,11 @@ public:
return 0; // none found
}
- HostPt GetHostReadPt(Bitu phys_page) {
+ HostPt GetHostReadPt(Bitu phys_page) {
hostmem=old_pagehandler->GetHostReadPt(phys_page);
return hostmem;
}
- HostPt GetHostWritePt(Bitu phys_page) {
+ HostPt GetHostWritePt(Bitu phys_page) {
return GetHostReadPt( phys_page );
}
public:
@@ -392,7 +392,7 @@ private:
Bitu active_blocks; // the number of cache blocks in this page
Bitu active_count; // delaying parameter to not immediately release a page
- HostPt hostmem;
+ HostPt hostmem;
Bitu phys_page;
};
@@ -433,13 +433,13 @@ void CacheBlockDynRec::Clear(void) {
wherelink = &(*wherelink)->link[ind].next;
}
// now remove the link
- if(*wherelink)
+ if(*wherelink)
*wherelink = (*wherelink)->link[ind].next;
else {
LOG(LOG_CPU,LOG_ERROR)("Cache anomaly. please investigate");
}
}
- } else
+ } else
cache_addunusedblock(this);
if (crossblock) {
// clear out the crossblock (in the page before) as well
@@ -464,7 +464,7 @@ static CacheBlockDynRec * cache_openblock(void) {
// check for enough space in this block
Bitu size=block->cache.size;
CacheBlockDynRec * nextblock=block->cache.next;
- if (block->page.handler)
+ if (block->page.handler)
block->Clear();
// block size must be at least CACHE_MAXSIZE
while (size<CACHE_MAXSIZE) {
@@ -473,7 +473,7 @@ static CacheBlockDynRec * cache_openblock(void) {
// merge blocks
size+=nextblock->cache.size;
CacheBlockDynRec * tempblock=nextblock->cache.next;
- if (nextblock->page.handler)
+ if (nextblock->page.handler)
nextblock->Clear();
// block is free now
cache_addunusedblock(nextblock);
@@ -500,8 +500,8 @@ static void cache_closeblock(void) {
Bitu written=(Bitu)(cache.pos-block->cache.start);
if (written>block->cache.size) {
if (!block->cache.next) {
- if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size);
- } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size);
+ if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size);
+ } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size);
} else {
Bitu new_size;
Bitu left=block->cache.size-written;
@@ -553,12 +553,14 @@ static INLINE void cache_addq(Bit64u val) {
static void dyn_return(BlockReturn retcode,bool ret_exception);
static void dyn_run_code(void);
+static void cache_block_before_close(void);
+static void cache_block_closing(Bit8u* block_start,Bitu block_size);
/* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */
#if (C_HAVE_MPROTECT)
#define PAGESIZE_TEMP PAGESIZE
-#else
+#else
#define PAGESIZE_TEMP 4096
#endif
@@ -614,18 +616,27 @@ static void cache_init(bool enable) {
}
// setup the default blocks for block linkage returns
cache.pos=&cache_code_link_blocks[0];
+ core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
+ // can use op to PAGESIZE_TEMP-64 bytes
+ dyn_run_code();
+ cache_block_before_close();
+ cache_block_closing(cache_code_link_blocks, cache.pos-cache_code_link_blocks);
+
+ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64];
link_blocks[0].cache.start=cache.pos;
// link code that returns with a special return code
+ // must be less than 32 bytes
dyn_return(BR_Link1,false);
- cache.pos=&cache_code_link_blocks[32];
+ cache_block_before_close();
+ cache_block_closing(link_blocks[0].cache.start, cache.pos-link_blocks[0].cache.start);
+
+ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32];
link_blocks[1].cache.start=cache.pos;
// link code that returns with a special return code
+ // must be less than 32 bytes
dyn_return(BR_Link2,false);
-
- cache.pos=&cache_code_link_blocks[64];
- core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
-// link_blocks[1].cache.start=cache.pos;
- dyn_run_code();
+ cache_block_before_close();
+ cache_block_closing(link_blocks[1].cache.start, cache.pos-link_blocks[1].cache.start);
cache.free_pages=0;
cache.last_page=0;
diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h
index c8e2a8ef2..3352c5fcb 100644
--- a/src/cpu/core_dynrec/decoder_basic.h
+++ b/src/cpu/core_dynrec/decoder_basic.h
@@ -502,7 +502,6 @@ static INLINE void dyn_set_eip_end(HostReg reg,Bit32u imm=0) {
gen_mov_word_to_reg(reg,&reg_eip,true); //get_extend_word will mask off the upper bits
//gen_mov_word_to_reg(reg,&reg_eip,decode.big_op);
gen_add_imm(reg,(Bit32u)(decode.code-decode.code_start+imm));
- if (!decode.big_op) gen_extend_word(false,reg);
}
@@ -995,10 +994,10 @@ skip_extend_word:
// succeeded, use the pointer to avoid code invalidation
if (!addseg) {
if (!scaled_reg_used) {
- gen_mov_word_to_reg(ea_reg,(void*)val,true);
+ gen_mov_LE_word_to_reg(ea_reg,(void*)val,true);
} else {
DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
}
} else {
if (!scaled_reg_used) {
@@ -1006,7 +1005,7 @@ skip_extend_word:
} else {
DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
}
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
}
return;
}
@@ -1047,10 +1046,10 @@ skip_extend_word:
if (!addseg) {
if (!scaled_reg_used) {
MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
} else {
DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
}
} else {
if (!scaled_reg_used) {
@@ -1059,7 +1058,7 @@ skip_extend_word:
DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
}
ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
}
return;
}
@@ -1124,11 +1123,11 @@ skip_extend_word:
// succeeded, use the pointer to avoid code invalidation
if (!addseg) {
MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
} else {
MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base));
ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
- gen_add(ea_reg,(void*)val);
+ gen_add_LE(ea_reg,(void*)val);
}
return;
}
diff --git a/src/cpu/core_dynrec/decoder_opcodes.h b/src/cpu/core_dynrec/decoder_opcodes.h
index 67eaee2c9..82bbbbb9d 100644
--- a/src/cpu/core_dynrec/decoder_opcodes.h
+++ b/src/cpu/core_dynrec/decoder_opcodes.h
@@ -250,12 +250,12 @@ static void dyn_prep_word_imm(Bit8u reg) {
Bitu val;
if (decode.big_op) {
if (decode_fetchd_imm(val)) {
- gen_mov_word_to_reg(FC_OP2,(void*)val,true);
+ gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true);
return;
}
} else {
if (decode_fetchw_imm(val)) {
- gen_mov_word_to_reg(FC_OP2,(void*)val,false);
+ gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false);
return;
}
}
@@ -287,13 +287,13 @@ static void dyn_mov_word_imm(Bit8u reg) {
Bitu val;
if (decode.big_op) {
if (decode_fetchd_imm(val)) {
- gen_mov_word_to_reg(FC_OP1,(void*)val,true);
+ gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true);
MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg);
return;
}
} else {
if (decode_fetchw_imm(val)) {
- gen_mov_word_to_reg(FC_OP1,(void*)val,false);
+ gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false);
MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg);
return;
}
@@ -330,7 +330,7 @@ static void dyn_mov_byte_direct_al() {
if (decode.big_addr) {
Bitu val;
if (decode_fetchd_imm(val)) {
- gen_add(FC_ADDR,(void*)val);
+ gen_add_LE(FC_ADDR,(void*)val);
} else {
gen_add_imm(FC_ADDR,(Bit32u)val);
}
@@ -1184,11 +1184,8 @@ static void dyn_ret_near(Bitu bytes) {
dyn_reduce_cycles();
if (decode.big_op) gen_call_function_raw((void*)&dynrec_pop_dword);
- else {
- gen_call_function_raw((void*)&dynrec_pop_word);
- gen_extend_word(false,FC_RETOP);
- }
- gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),true);
+ else gen_call_function_raw((void*)&dynrec_pop_word);
+ gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),decode.big_op);
if (bytes) gen_add_direct_word(&reg_esp,bytes,true);
dyn_return(BR_Normal);
diff --git a/src/cpu/core_dynrec/risc_ppc.h b/src/cpu/core_dynrec/risc_ppc.h
new file mode 100644
index 000000000..d424ff5c8
--- /dev/null
+++ b/src/cpu/core_dynrec/risc_ppc.h
@@ -0,0 +1,901 @@
+/*
+ * Copyright (C) 2002-2019 The DOSBox Team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+// some configuring defines that specify the capabilities of this architecture
+// or aspects of the recompiling
+
+// protect FC_ADDR over function calls if necessaray
+//#define DRC_PROTECT_ADDR_REG
+
+// try to use non-flags generating functions if possible
+#define DRC_FLAGS_INVALIDATION
+// try to replace _simple functions by code
+#define DRC_FLAGS_INVALIDATION_DCODE
+
+// type with the same size as a pointer
+#define DRC_PTR_SIZE_IM Bit32u
+
+// calling convention modifier
+#define DRC_FC /* nothing */
+#define DRC_CALL_CONV /* nothing */
+
+#define DRC_USE_REGS_ADDR
+#define DRC_USE_SEGS_ADDR
+
+#if defined(_CALL_SYSV)
+// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value)
+#define USE_SDA_BASE
+#endif
+
+// register mapping
+enum HostReg {
+ HOST_R0=0,
+ HOST_R1,
+ HOST_R2,
+ HOST_R3,
+ HOST_R4,
+ HOST_R5,
+ HOST_R6,
+ HOST_R7,
+ HOST_R8,
+ HOST_R9,
+ HOST_R10,
+ HOST_R11,
+ HOST_R12,
+ HOST_R13,
+ HOST_R14,
+ HOST_R15,
+ HOST_R16,
+ HOST_R17,
+ HOST_R18,
+ HOST_R19,
+ HOST_R20,
+ HOST_R21,
+ HOST_R22,
+ HOST_R23,
+ HOST_R24,
+ HOST_R25,
+ HOST_R26, // generic non-volatile (used for inline adc/sbb)
+ HOST_R27, // points to current CacheBlockDynRec (decode.block)
+ HOST_R28, // points to fpu
+ HOST_R29, // FC_ADDR
+ HOST_R30, // points to Segs
+ HOST_R31, // points to cpu_regs
+
+ HOST_NONE
+};
+
+static const HostReg RegParams[] = {
+ HOST_R3, HOST_R4, HOST_R5, HOST_R6,
+ HOST_R7, HOST_R8, HOST_R9, HOST_R10
+};
+
+#if C_FPU
+#include "fpu.h"
+extern struct FPU_rec fpu;
+#endif
+
+#if defined(USE_SDA_BASE)
+extern Bit32u _SDA_BASE_[];
+#endif
+
+// register that holds function return values
+#define FC_RETOP HOST_R3
+
+// register used for address calculations, if the ABI does not
+// state that this register is preserved across function calls
+// then define DRC_PROTECT_ADDR_REG above
+#define FC_ADDR HOST_R29
+
+// register that points to Segs[]
+#define FC_SEGS_ADDR HOST_R30
+// register that points to cpu_regs[]
+#define FC_REGS_ADDR HOST_R31
+
+// register that holds the first parameter
+#define FC_OP1 RegParams[0]
+
+// register that holds the second parameter
+#define FC_OP2 RegParams[1]
+
+// special register that holds the third parameter for _R3 calls (byte accessible)
+#define FC_OP3 RegParams[2]
+
+// register that holds byte-accessible temporary values
+#define FC_TMP_BA1 FC_OP2
+
+// register that holds byte-accessible temporary values
+#define FC_TMP_BA2 FC_OP1
+
+// temporary register for LEA
+#define TEMP_REG_DRC HOST_R10
+
+#define IMM(op, regsd, rega, imm) (((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit32u)(imm))&0xFFFF))
+#define EXT(regsd, rega, regb, op, rc) ( (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)| ((op)<<1)|(rc))
+#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc))
+
+#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm))
+#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc))
+#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
+
+// move a full register from reg_src to reg_dst
+static void gen_mov_regs(HostReg reg_dst,HostReg reg_src)
+{
+ if (reg_dst != reg_src)
+ EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src)
+}
+
+// move a 16bit constant value into dest_reg
+// the upper 16bit of the destination register may be destroyed
+static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)
+{
+ IMM_OP(14, dest_reg, 0, imm); // li dest,imm
+}
+
+DRC_PTR_SIZE_IM block_ptr;
+
+// Helper for loading addresses
+static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest)
+{
+ Bit32s off;
+
+ if ((Bit16s)addr == addr)
+ return HOST_R0;
+
+ off = addr - (Bit32s)&Segs;
+ if ((Bit16s)off == off)
+ {
+ addr = off;
+ return FC_SEGS_ADDR;
+ }
+
+ off = addr - (Bit32s)&cpu_regs;
+ if ((Bit16s)off == off)
+ {
+ addr = off;
+ return FC_REGS_ADDR;
+ }
+
+ off = addr - (Bit32s)block_ptr;
+ if ((Bit16s)off == off)
+ {
+ addr = off;
+ return HOST_R27;
+ }
+
+#if C_FPU
+ off = addr - (Bit32s)&fpu;
+ if ((Bit16s)off == off)
+ {
+ addr = off;
+ return HOST_R28;
+ }
+#endif
+
+#if defined(USE_SDA_BASE)
+ off = addr - (Bit32s)_SDA_BASE_;
+ if ((Bit16s)off == off)
+ {
+ addr = off;
+ return HOST_R13;
+ }
+#endif
+
+ IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha
+ addr = (Bit16s)addr;
+ return dest;
+}
+
+// move a 32bit constant value into dest_reg
+static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)
+{
+ HostReg ld = gen_addr((Bit32s&)imm, dest_reg);
+ if (imm || ld != dest_reg)
+ IMM_OP(14, dest_reg, ld, imm); // addi dest_reg, ldr, imm@l
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword)
+{
+ Bit32s addr = (Bit32s)data;
+ HostReg ld = gen_addr(addr, dest_reg);
+ IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld)
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg
+static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) {
+ Bit32u addr = (Bit32u)data;
+ gen_mov_dword_to_reg_imm(dest_reg, addr);
+ EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest
+}
+
+// move an 8bit constant value into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
+ gen_mov_word_to_reg_imm(dest_reg, imm);
+}
+
+// move an 8bit constant value into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
+ gen_mov_word_to_reg_imm(dest_reg, imm);
+}
+
+// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
+static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword)
+{
+ Bit32s addr = (Bit32s)dest;
+ HostReg ld = gen_addr(addr, HOST_R8);
+ IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld)
+}
+
+// move an 8bit value from memory into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data)
+{
+ Bit32s addr = (Bit32s)data;
+ HostReg ld = gen_addr(addr, dest_reg);
+ IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld)
+}
+
+// move an 8bit value from memory into dest_reg
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
+ gen_mov_byte_to_reg_low(dest_reg, data);
+}
+
+// move the lowest 8bit of a register into memory
+static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest)
+{
+ Bit32s addr = (Bit32s)dest;
+ HostReg ld = gen_addr(addr, HOST_R8);
+ IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld)
+}
+
+// convert an 8bit word to a 32bit dword
+// the register is zero-extended (sign==false) or sign-extended (sign==true)
+static void gen_extend_byte(bool sign,HostReg reg)
+{
+ if (sign)
+ EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src
+ else
+ RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31
+}
+
+// convert a 16bit word to a 32bit dword
+// the register is zero-extended (sign==false) or sign-extended (sign==true)
+static void gen_extend_word(bool sign,HostReg reg)
+{
+ if (sign)
+ EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
+ else
+ RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
+}
+
+// add a 32bit value from memory to a full register
+static void gen_add(HostReg reg,void* op)
+{
+ gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
+ EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8
+}
+
+// add a 32bit value from host memory to a full register
+static void gen_add_LE(HostReg reg,void* op)
+{
+ gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24);
+ EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8
+}
+
+// add a 32bit constant value to a full register
+static void gen_add_imm(HostReg reg,Bit32u imm)
+{
+ if ((Bit16s)imm != (Bit32s)imm)
+ IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
+ if ((Bit16s)imm)
+ IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l
+}
+
+// and a 32bit constant value with a full register
+static void gen_and_imm(HostReg reg,Bit32u imm) {
+ Bits sbit,ebit,tbit,bbit,abit,i;
+
+ // sbit = number of leading 0 bits
+ // ebit = number of trailing 0 bits
+ // tbit = number of total 0 bits
+ // bbit = number of leading 1 bits
+ // abit = number of trailing 1 bits
+
+ if (imm == 0xFFFFFFFF)
+ return;
+
+ if (!imm)
+ return gen_mov_word_to_reg_imm(reg, 0);
+
+ sbit = ebit = tbit = bbit = abit = 0;
+ for (i=0; i < 32; i++)
+ {
+ if (!(imm & (1<<(31-i))))
+ {
+ abit = 0;
+ tbit++;
+ if (sbit == i)
+ sbit++;
+ ebit++;
+ }
+ else
+ {
+ ebit = 0;
+ if (bbit == i)
+ bbit++;
+ abit++;
+ }
+ }
+
+ if (sbit + ebit == tbit)
+ {
+ RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
+ return;
+ }
+
+ if (sbit >= 16)
+ {
+ IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
+ return;
+ }
+ if (ebit >= 16)
+ {
+ IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
+ return;
+ }
+
+ if (bbit + abit == (32 - tbit))
+ {
+ RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1
+ return;
+ }
+
+ IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l
+ IMM_OP(29, reg, reg, imm>16); // andis. reg, reg, imm@h
+ EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0
+}
+
+// move a 32bit constant value into memory
+static void gen_mov_direct_dword(void* dest,Bit32u imm) {
+ gen_mov_dword_to_reg_imm(HOST_R9, imm);
+ gen_mov_word_from_reg(HOST_R9, dest, 1);
+}
+
+// move an address into memory (assumes address != NULL)
+static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm)
+{
+ block_ptr = 0;
+ gen_mov_dword_to_reg_imm(HOST_R27, imm);
+ // this will be used to look-up the linked blocks
+ block_ptr = imm;
+ gen_mov_word_from_reg(HOST_R27, dest, 1);
+}
+
+// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value
+static void gen_add_direct_word(void* dest,Bit32u imm,bool dword)
+{
+ HostReg ld;
+ Bit32s addr = (Bit32s)dest;
+
+ if (!dword)
+ {
+ imm &= 0xFFFF;
+ addr += 2;
+ }
+
+ if (!imm)
+ return;
+
+ ld = gen_addr(addr, HOST_R8);
+ IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
+ if (dword && (Bit16s)imm != (Bit32s)imm)
+ IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
+ if (!dword || (Bit16s)imm)
+ IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l
+ IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
+}
+
+// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value
+static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
+ gen_add_direct_word(dest, -(Bit32s)imm, dword);
+}
+
+// effective address calculation, destination is dest_reg
+// scale_reg is scaled by scale (scale_reg*(2^scale)) and
+// added to dest_reg, then the immediate value is added
+static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)
+{
+ if (scale)
+ {
+ RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale
+ scale_reg = HOST_R8;
+ }
+
+ gen_add_imm(dest_reg, imm);
+ EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
+}
+
+// effective address calculation, destination is dest_reg
+// dest_reg is scaled by scale (dest_reg*(2^scale)),
+// then the immediate value is added
+static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm)
+{
+ if (scale)
+ {
+ RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale
+ }
+
+ gen_add_imm(dest_reg, imm);
+}
+
+// helper function to choose direct or indirect call
+static int INLINE do_gen_call(void *func, Bit32u *pos, bool pad)
+{
+ Bit32s f = (Bit32s)func;
+ Bit32s off = f - (Bit32s)pos;
+
+ // relative branches are limited to +/- ~32MB
+ if (off < 0x02000000 && off >= -0x02000000)
+ {
+ pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func
+ if (pad)
+ {
+ pos[1] = 0x4800000C; // b 12+
+ pos[2] = pos[3] = IMM(24, 0, 0, 0); // nop
+ return 16;
+ }
+ return 4;
+ }
+
+ pos[0] = IMM(15, HOST_R8, 0, f>>16); // lis r8,imm@h
+ pos[1] = IMM(24, HOST_R8, HOST_R8, f); // ori r8,r8,imm@l
+ pos[2] = EXT(HOST_R8, 9, 0, 467, 0); // mtctr r8
+ pos[3] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl
+ return 16;
+}
+
+// generate a call to a parameterless function
+static void INLINE gen_call_function_raw(void * func,bool fastcall=true)
+{
+ cache.pos += do_gen_call(func, (Bit32u*)cache.pos, fastcall);
+}
+
+// generate a call to a function with paramcount parameters
+// note: the parameters are loaded in the architecture specific way
+// using the gen_load_param_ functions below
+static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false)
+{
+ Bit32u proc_addr=(Bit32u)cache.pos;
+ gen_call_function_raw(func,fastcall);
+ return proc_addr;
+}
+
+// load an immediate value as param'th function parameter
+static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
+ gen_mov_dword_to_reg_imm(RegParams[param], imm);
+}
+
+// load an address as param'th function parameter
+static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
+ gen_load_param_imm(addr, param);
+}
+
+// load a host-register as param'th function parameter
+static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
+ gen_mov_regs(RegParams[param], (HostReg)reg);
+}
+
+// load a value from memory as param'th function parameter
+static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
+ gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
+}
+
+// jump to an address pointed at by ptr, offset is in imm
+static void gen_jmp_ptr(void * ptr,Bits imm=0) {
+ gen_mov_word_to_reg(HOST_R8,ptr,true); // r8 = *(Bit32u*)ptr
+ if ((Bit16s)imm != (Bit32s)imm)
+ IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha
+ IMM_OP(32, HOST_R8, HOST_R8, imm); // lwz r8, imm@l(r8)
+ EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8
+ IMM_OP(19, 0x14, 0, 528<<1); // bctr
+}
+
+// short conditional jump (+-127 bytes) if register is zero
+// the destination is set by gen_fill_branch() later
+static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword)
+{
+ if (!dword)
+ IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
+ else
+ IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
+
+ IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq)
+ return ((Bit32u)cache.pos-4);
+}
+
+// short conditional jump (+-127 bytes) if register is nonzero
+// the destination is set by gen_fill_branch() later
+static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword)
+{
+ if (!dword)
+ IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
+ else
+ IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
+
+ IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne)
+ return ((Bit32u)cache.pos-4);
+}
+
+// calculate relative offset and fill it into the location pointed to by data
+static void gen_fill_branch(DRC_PTR_SIZE_IM data)
+{
+#if C_DEBUG
+ Bits len=(Bit32u)cache.pos-data;
+ if (len<0) len=-len;
+ if (len >= 0x8000) LOG_MSG("Big jump %d",len);
+#endif
+
+ ((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC;
+}
+
+
+// conditional jump if register is nonzero
+// for isdword==true the 32bit of the register are tested
+// for isdword==false the lowest 8bit of the register are tested
+static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword)
+{
+ if (!dword)
+ IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
+ else
+ IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
+
+ IMM_OP(16, 0x04, 2, 0); // bne
+ return ((Bit32u)cache.pos-4);
+}
+
+// compare 32bit-register against zero and jump if value less/equal than zero
+static Bit32u gen_create_branch_long_leqzero(HostReg reg)
+{
+ IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
+
+ IMM_OP(16, 0x04, 1, 0); // ble
+ return ((Bit32u)cache.pos-4);
+}
+
+// calculate long relative offset and fill it into the location pointed to by data
+static void gen_fill_branch_long(Bit32u data) {
+ return gen_fill_branch((DRC_PTR_SIZE_IM)data);
+}
+
+static void cache_block_closing(Bit8u* block_start,Bitu block_size)
+{
+#if defined(__GNUC__)
+ Bit8u* start = (Bit8u*)((Bit32u)block_start & -32);
+
+ while (start < block_start + block_size)
+ {
+ asm volatile("dcbst %y0\n\t icbi %y0" :: "Z"(*start));
+ start += 32;
+ }
+ asm volatile("sync\n\t isync");
+#else
+ #error "Don't know how to flush/invalidate CacheBlock with this compiler"
+#endif
+}
+
+static void cache_block_before_close(void) {}
+
+static void gen_function(void* func)
+{
+ Bit32s off = (Bit32s)func - (Bit32s)cache.pos;
+
+ // relative branches are limited to +/- 32MB
+ if (off < 0x02000000 && off >= -0x02000000) {
+ cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func
+ return;
+ }
+
+ gen_mov_dword_to_reg_imm(HOST_R8, (Bit32u)func); // r8 = func
+ EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8
+ IMM_OP(19, 0x14, 0, 528<<1); // bctr
+}
+
+// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
+static void* epilog_addr;
+static Bit8u *getCF_glue;
+static void gen_run_code(void)
+{
+ // prolog
+ IMM_OP(37, HOST_R1, HOST_R1, -256); // stwu sp,-256(sp)
+ EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
+ EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
+
+ IMM_OP(47, HOST_R26, HOST_R1, 128); // stmw r26, 128(sp)
+
+ IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16); // lis FC_SEGS_ADDR, Segs@h
+ IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs); // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l
+
+ IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16); // lis FC_REGS_ADDR, cpu_regs@h
+ IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs); // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l
+
+#if C_FPU
+ IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16); // lis r28, fpu@h
+ IMM_OP(24, HOST_R28, HOST_R28, &fpu); // ori r28, r28, fpu@l
+#endif
+
+ IMM_OP(36, HOST_R0, HOST_R1, 256+4); // stw r0,256+4(sp)
+ IMM_OP(19, 0x14, 0, 528<<1); // bctr
+
+ // epilog
+ epilog_addr = cache.pos;
+ IMM_OP(32, HOST_R0, HOST_R1, 256+4); // lwz r0,256+4(sp)
+ IMM_OP(46, HOST_R26, HOST_R1, 128); // lmw r26, 128(sp)
+ EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0
+ IMM_OP(14, HOST_R1, HOST_R1, 256); // addi sp, sp, 256
+ IMM_OP(19, 0x14, 0, 16<<1); // blr
+
+ // trampoline to call get_CF()
+ getCF_glue = cache.pos;
+ gen_function((void*)get_CF);
+}
+
+// return from a function
+static void gen_return_function(void)
+{
+ gen_function(epilog_addr);
+}
+
+// called when a call to a function can be replaced by a
+// call to a simpler function
+static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type)
+{
+ Bit32u *op = (Bit32u*)pos;
+ Bit32u *end = op+4;
+
+ switch (flags_type) {
+#if defined(DRC_FLAGS_INVALIDATION_DCODE)
+ // try to avoid function calls but rather directly fill in code
+ case t_ADDb:
+ case t_ADDw:
+ case t_ADDd:
+ *op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_ORb:
+ case t_ORw:
+ case t_ORd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_ADCb:
+ case t_ADCw:
+ case t_ADCd:
+ op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
+ op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
+ op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF)
+ op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + !!CF
+ return;
+ case t_SBBb:
+ case t_SBBw:
+ case t_SBBd:
+ op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2
+ op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
+ op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = !CF)
+ op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + !CF
+ return;
+ case t_ANDb:
+ case t_ANDw:
+ case t_ANDd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_SUBb:
+ case t_SUBw:
+ case t_SUBd:
+ *op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
+ break;
+ case t_XORb:
+ case t_XORw:
+ case t_XORd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_CMPb:
+ case t_CMPw:
+ case t_CMPd:
+ case t_TESTb:
+ case t_TESTw:
+ case t_TESTd:
+ break;
+ case t_INCb:
+ case t_INCw:
+ case t_INCd:
+ *op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
+ break;
+ case t_DECb:
+ case t_DECw:
+ case t_DECd:
+ *op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
+ break;
+ case t_NEGb:
+ case t_NEGw:
+ case t_NEGd:
+ *op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
+ break;
+ case t_SHLb:
+ case t_SHLw:
+ case t_SHLd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_SHRb:
+ case t_SHRw:
+ case t_SHRd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
+ break;
+ case t_SARb:
+ *op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
+ case t_SARw:
+ if (flags_type == t_SARw)
+ *op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
+ case t_SARd:
+ *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
+ break;
+
+ case t_ROLb:
+ *op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
+ case t_ROLw:
+ if (flags_type == t_ROLw)
+ *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
+ case t_ROLd:
+ *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
+ break;
+
+ case t_RORb:
+ *op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
+ case t_RORw:
+ if (flags_type == t_RORw)
+ *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
+ case t_RORd:
+ *op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
+ *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
+ break;
+
+ case t_DSHLw: // technically not correct for FC_OP3 > 16
+ *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
+ *op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
+ break;
+ case t_DSHLd:
+ op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
+ op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
+ op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
+ op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
+ return;
+ case t_DSHRw: // technically not correct for FC_OP3 > 16
+ *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
+ *op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
+ break;
+ case t_DSHRd:
+ op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
+ op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
+ op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
+ op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
+ return;
+#endif
+ default:
+ do_gen_call(fct_ptr, op, true);
+ return;
+ }
+
+ *op = 0x48000000 + 4*(end-op); // b end
+}
+
+// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
+ gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false);
+}
+
+// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
+ gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true);
+}
+
+// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
+static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
+ gen_add(reg, (Bit8u*)&Segs + index);
+}
+
+// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)
+{
+ gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false);
+}
+
+// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)
+{
+ gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true);
+}
+
+// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
+// the upper 24bit of the destination register can be destroyed
+// this function does not use FC_OP1/FC_OP2 as dest_reg as these
+// registers might not be directly byte-accessible on some architectures
+static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)
+{
+ gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index);
+}
+
+// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
+// the upper 24bit of the destination register can be destroyed
+// this function can use FC_OP1/FC_OP2 as dest_reg which are
+// not directly byte-accessible on some architectures
+static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
+ gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index);
+}
+
+// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
+static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)
+{
+ gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false);
+}
+
+// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)
+{
+ gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true);
+}
+
+// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
+static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)
+{
+ gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index);
+}
+
+// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
+static void gen_add_regval32_to_reg(HostReg reg,Bitu index)
+{
+ gen_add(reg, (Bit8u*)&cpu_regs + index);
+}
+
+// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
+static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
+ if (dword)
+ gen_mov_regval32_from_reg(src_reg, index);
+ else
+ gen_mov_regval16_from_reg(src_reg, index);
+}
+
+// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
+// 16bit moves may destroy the upper 16bit of the destination register
+static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
+ if (dword)
+ gen_mov_regval32_to_reg(dest_reg, index);
+ else
+ gen_mov_regval16_to_reg(dest_reg, index);
+}