1 |
/* |
2 |
* compiler/compemu_support.cpp - Core dynamic translation engine |
3 |
* |
4 |
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer |
5 |
* |
6 |
* Adaptation for Basilisk II and improvements, copyright 2000-2005 |
7 |
* Gwenole Beauchesne |
8 |
* |
9 |
* Basilisk II (C) 1997-2008 Christian Bauer |
10 |
* |
11 |
* This program is free software; you can redistribute it and/or modify |
12 |
* it under the terms of the GNU General Public License as published by |
13 |
* the Free Software Foundation; either version 2 of the License, or |
14 |
* (at your option) any later version. |
15 |
* |
16 |
* This program is distributed in the hope that it will be useful, |
17 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 |
* GNU General Public License for more details. |
20 |
* |
21 |
* You should have received a copy of the GNU General Public License |
22 |
* along with this program; if not, write to the Free Software |
23 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 |
*/ |
25 |
|
26 |
#if !REAL_ADDRESSING && !DIRECT_ADDRESSING |
27 |
#error "Only Real or Direct Addressing is supported with the JIT Compiler" |
28 |
#endif |
29 |
|
30 |
#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE |
31 |
#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" |
32 |
#endif |
33 |
|
34 |
/* NOTE: support for AMD64 assumes translation cache and other code |
35 |
* buffers are allocated into a 32-bit address space because (i) B2/JIT |
36 |
* code is not 64-bit clean and (ii) it's faster to resolve branches |
37 |
* that way. |
38 |
*/ |
39 |
#if !defined(__i386__) && !defined(__x86_64__) |
40 |
#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler" |
41 |
#endif |
42 |
|
43 |
#define USE_MATCH 0 |
44 |
|
45 |
/* kludge for Brian, so he can compile under MSVC++ */ |
46 |
#define USE_NORMAL_CALLING_CONVENTION 0 |
47 |
|
48 |
#ifndef WIN32 |
49 |
#include <unistd.h> |
50 |
#include <sys/types.h> |
51 |
#include <sys/mman.h> |
52 |
#endif |
53 |
|
54 |
#include <stdlib.h> |
55 |
#include <fcntl.h> |
56 |
#include <errno.h> |
57 |
|
58 |
#include "sysdeps.h" |
59 |
#include "cpu_emulation.h" |
60 |
#include "main.h" |
61 |
#include "prefs.h" |
62 |
#include "user_strings.h" |
63 |
#include "vm_alloc.h" |
64 |
|
65 |
#include "m68k.h" |
66 |
#include "memory.h" |
67 |
#include "readcpu.h" |
68 |
#include "newcpu.h" |
69 |
#include "comptbl.h" |
70 |
#include "compiler/compemu.h" |
71 |
#include "fpu/fpu.h" |
72 |
#include "fpu/flags.h" |
73 |
|
74 |
#define DEBUG 1 |
75 |
#include "debug.h" |
76 |
|
77 |
#ifdef ENABLE_MON |
78 |
#include "mon.h" |
79 |
#endif |
80 |
|
81 |
#ifndef WIN32 |
82 |
#define PROFILE_COMPILE_TIME 1 |
83 |
#define PROFILE_UNTRANSLATED_INSNS 1 |
84 |
#endif |
85 |
|
86 |
#if defined(__x86_64__) && 0 |
87 |
#define RECORD_REGISTER_USAGE 1 |
88 |
#endif |
89 |
|
90 |
#ifdef WIN32 |
91 |
#undef write_log |
92 |
#define write_log dummy_write_log |
93 |
static void dummy_write_log(const char *, ...) { } |
94 |
#endif |
95 |
|
96 |
#if JIT_DEBUG |
97 |
#undef abort |
98 |
#define abort() do { \ |
99 |
fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \ |
100 |
exit(EXIT_FAILURE); \ |
101 |
} while (0) |
102 |
#endif |
103 |
|
104 |
#if RECORD_REGISTER_USAGE |
105 |
static uint64 reg_count[16]; |
106 |
static int reg_count_local[16]; |
107 |
|
108 |
static int reg_count_compare(const void *ap, const void *bp) |
109 |
{ |
110 |
const int a = *((int *)ap); |
111 |
const int b = *((int *)bp); |
112 |
return reg_count[b] - reg_count[a]; |
113 |
} |
114 |
#endif |
115 |
|
116 |
#if PROFILE_COMPILE_TIME |
117 |
#include <time.h> |
118 |
static uae_u32 compile_count = 0; |
119 |
static clock_t compile_time = 0; |
120 |
static clock_t emul_start_time = 0; |
121 |
static clock_t emul_end_time = 0; |
122 |
#endif |
123 |
|
124 |
#if PROFILE_UNTRANSLATED_INSNS |
125 |
const int untranslated_top_ten = 20; |
126 |
static uae_u32 raw_cputbl_count[65536] = { 0, }; |
127 |
static uae_u16 opcode_nums[65536]; |
128 |
|
129 |
static int untranslated_compfn(const void *e1, const void *e2) |
130 |
{ |
131 |
return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; |
132 |
} |
133 |
#endif |
134 |
|
135 |
static compop_func *compfunctbl[65536]; |
136 |
static compop_func *nfcompfunctbl[65536]; |
137 |
static cpuop_func *nfcpufunctbl[65536]; |
138 |
uae_u8* comp_pc_p; |
139 |
|
140 |
// From newcpu.cpp |
141 |
extern bool quit_program; |
142 |
|
143 |
// gb-- Extra data for Basilisk II/JIT |
144 |
#if JIT_DEBUG |
145 |
static bool JITDebug = false; // Enable runtime disassemblers through mon? |
146 |
#else |
147 |
const bool JITDebug = false; // Don't use JIT debug mode at all |
148 |
#endif |
149 |
#if USE_INLINING |
150 |
static bool follow_const_jumps = true; // Flag: translation through constant jumps |
151 |
#else |
152 |
const bool follow_const_jumps = false; |
153 |
#endif |
154 |
|
155 |
const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) |
156 |
static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks |
157 |
static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already |
158 |
static bool lazy_flush = true; // Flag: lazy translation cache invalidation |
159 |
static bool avoid_fpu = true; // Flag: compile FPU instructions ? |
160 |
static bool have_cmov = false; // target has CMOV instructions ? |
161 |
static bool have_lahf_lm = true; // target has LAHF supported in long mode ? |
162 |
static bool have_rat_stall = true; // target has partial register stalls ? |
163 |
const bool tune_alignment = true; // Tune code alignments for running CPU ? |
164 |
const bool tune_nop_fillers = true; // Tune no-op fillers for architecture |
165 |
static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? |
166 |
static int align_loops = 32; // Align the start of loops |
167 |
static int align_jumps = 32; // Align the start of jumps |
168 |
static int optcount[10] = { |
169 |
10, // How often a block has to be executed before it is translated |
170 |
0, // How often to use naive translation |
171 |
0, 0, 0, 0, |
172 |
-1, -1, -1, -1 |
173 |
}; |
174 |
|
175 |
struct op_properties { |
176 |
uae_u8 use_flags; |
177 |
uae_u8 set_flags; |
178 |
uae_u8 is_addx; |
179 |
uae_u8 cflow; |
180 |
}; |
181 |
static op_properties prop[65536]; |
182 |
|
183 |
static inline int end_block(uae_u32 opcode) |
184 |
{ |
185 |
return (prop[opcode].cflow & fl_end_block); |
186 |
} |
187 |
|
188 |
static inline bool is_const_jump(uae_u32 opcode) |
189 |
{ |
190 |
return (prop[opcode].cflow == fl_const_jump); |
191 |
} |
192 |
|
193 |
static inline bool may_trap(uae_u32 opcode) |
194 |
{ |
195 |
return (prop[opcode].cflow & fl_trap); |
196 |
} |
197 |
|
198 |
static inline unsigned int cft_map (unsigned int f) |
199 |
{ |
200 |
#ifndef HAVE_GET_WORD_UNSWAPPED |
201 |
return f; |
202 |
#else |
203 |
return ((f >> 8) & 255) | ((f & 255) << 8); |
204 |
#endif |
205 |
} |
206 |
|
207 |
uae_u8* start_pc_p; |
208 |
uae_u32 start_pc; |
209 |
uae_u32 current_block_pc_p; |
210 |
static uintptr current_block_start_target; |
211 |
uae_u32 needed_flags; |
212 |
static uintptr next_pc_p; |
213 |
static uintptr taken_pc_p; |
214 |
static int branch_cc; |
215 |
static int redo_current_block; |
216 |
|
217 |
int segvcount=0; |
218 |
int soft_flush_count=0; |
219 |
int hard_flush_count=0; |
220 |
int checksum_count=0; |
221 |
static uae_u8* current_compile_p=NULL; |
222 |
static uae_u8* max_compile_start; |
223 |
static uae_u8* compiled_code=NULL; |
224 |
static uae_s32 reg_alloc_run; |
225 |
const int POPALLSPACE_SIZE = 1024; /* That should be enough space */ |
226 |
static uae_u8* popallspace=NULL; |
227 |
|
228 |
void* pushall_call_handler=NULL; |
229 |
static void* popall_do_nothing=NULL; |
230 |
static void* popall_exec_nostats=NULL; |
231 |
static void* popall_execute_normal=NULL; |
232 |
static void* popall_cache_miss=NULL; |
233 |
static void* popall_recompile_block=NULL; |
234 |
static void* popall_check_checksum=NULL; |
235 |
|
236 |
/* The 68k only ever executes from even addresses. So right now, we |
237 |
* waste half the entries in this array |
238 |
* UPDATE: We now use those entries to store the start of the linked |
239 |
* lists that we maintain for each hash result. |
240 |
*/ |
241 |
cacheline cache_tags[TAGSIZE]; |
242 |
int letit=0; |
243 |
blockinfo* hold_bi[MAX_HOLD_BI]; |
244 |
blockinfo* active; |
245 |
blockinfo* dormant; |
246 |
|
247 |
/* 68040 */ |
248 |
extern struct cputbl op_smalltbl_0_nf[]; |
249 |
extern struct comptbl op_smalltbl_0_comp_nf[]; |
250 |
extern struct comptbl op_smalltbl_0_comp_ff[]; |
251 |
|
252 |
/* 68020 + 68881 */ |
253 |
extern struct cputbl op_smalltbl_1_nf[]; |
254 |
|
255 |
/* 68020 */ |
256 |
extern struct cputbl op_smalltbl_2_nf[]; |
257 |
|
258 |
/* 68010 */ |
259 |
extern struct cputbl op_smalltbl_3_nf[]; |
260 |
|
261 |
/* 68000 */ |
262 |
extern struct cputbl op_smalltbl_4_nf[]; |
263 |
|
264 |
/* 68000 slow but compatible. */ |
265 |
extern struct cputbl op_smalltbl_5_nf[]; |
266 |
|
267 |
static void flush_icache_hard(int n); |
268 |
static void flush_icache_lazy(int n); |
269 |
static void flush_icache_none(int n); |
270 |
void (*flush_icache)(int n) = flush_icache_none; |
271 |
|
272 |
|
273 |
|
274 |
bigstate live; |
275 |
smallstate empty_ss; |
276 |
smallstate default_ss; |
277 |
static int optlev; |
278 |
|
279 |
static int writereg(int r, int size); |
280 |
static void unlock2(int r); |
281 |
static void setlock(int r); |
282 |
static int readreg_specific(int r, int size, int spec); |
283 |
static int writereg_specific(int r, int size, int spec); |
284 |
static void prepare_for_call_1(void); |
285 |
static void prepare_for_call_2(void); |
286 |
static void align_target(uae_u32 a); |
287 |
|
288 |
static uae_s32 nextused[VREGS]; |
289 |
|
290 |
uae_u32 m68k_pc_offset; |
291 |
|
292 |
/* Some arithmetic ooperations can be optimized away if the operands |
293 |
* are known to be constant. But that's only a good idea when the |
294 |
* side effects they would have on the flags are not important. This |
295 |
* variable indicates whether we need the side effects or not |
296 |
*/ |
297 |
uae_u32 needflags=0; |
298 |
|
299 |
/* Flag handling is complicated. |
300 |
* |
301 |
* x86 instructions create flags, which quite often are exactly what we |
302 |
* want. So at times, the "68k" flags are actually in the x86 flags. |
303 |
* |
304 |
* Then again, sometimes we do x86 instructions that clobber the x86 |
305 |
* flags, but don't represent a corresponding m68k instruction. In that |
306 |
* case, we have to save them. |
307 |
* |
308 |
* We used to save them to the stack, but now store them back directly |
309 |
* into the regflags.cznv of the traditional emulation. Thus some odd |
310 |
* names. |
311 |
* |
312 |
* So flags can be in either of two places (used to be three; boy were |
313 |
* things complicated back then!); And either place can contain either |
314 |
* valid flags or invalid trash (and on the stack, there was also the |
315 |
* option of "nothing at all", now gone). A couple of variables keep |
316 |
* track of the respective states. |
317 |
* |
318 |
* To make things worse, we might or might not be interested in the flags. |
319 |
* by default, we are, but a call to dont_care_flags can change that |
320 |
* until the next call to live_flags. If we are not, pretty much whatever |
321 |
* is in the register and/or the native flags is seen as valid. |
322 |
*/ |
323 |
|
324 |
static __inline__ blockinfo* get_blockinfo(uae_u32 cl) |
325 |
{ |
326 |
return cache_tags[cl+1].bi; |
327 |
} |
328 |
|
329 |
static __inline__ blockinfo* get_blockinfo_addr(void* addr) |
330 |
{ |
331 |
blockinfo* bi=get_blockinfo(cacheline(addr)); |
332 |
|
333 |
while (bi) { |
334 |
if (bi->pc_p==addr) |
335 |
return bi; |
336 |
bi=bi->next_same_cl; |
337 |
} |
338 |
return NULL; |
339 |
} |
340 |
|
341 |
|
342 |
/******************************************************************* |
343 |
* All sorts of list related functions for all of the lists * |
344 |
*******************************************************************/ |
345 |
|
346 |
static __inline__ void remove_from_cl_list(blockinfo* bi) |
347 |
{ |
348 |
uae_u32 cl=cacheline(bi->pc_p); |
349 |
|
350 |
if (bi->prev_same_cl_p) |
351 |
*(bi->prev_same_cl_p)=bi->next_same_cl; |
352 |
if (bi->next_same_cl) |
353 |
bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; |
354 |
if (cache_tags[cl+1].bi) |
355 |
cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; |
356 |
else |
357 |
cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; |
358 |
} |
359 |
|
360 |
static __inline__ void remove_from_list(blockinfo* bi) |
361 |
{ |
362 |
if (bi->prev_p) |
363 |
*(bi->prev_p)=bi->next; |
364 |
if (bi->next) |
365 |
bi->next->prev_p=bi->prev_p; |
366 |
} |
367 |
|
368 |
static __inline__ void remove_from_lists(blockinfo* bi) |
369 |
{ |
370 |
remove_from_list(bi); |
371 |
remove_from_cl_list(bi); |
372 |
} |
373 |
|
374 |
static __inline__ void add_to_cl_list(blockinfo* bi) |
375 |
{ |
376 |
uae_u32 cl=cacheline(bi->pc_p); |
377 |
|
378 |
if (cache_tags[cl+1].bi) |
379 |
cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); |
380 |
bi->next_same_cl=cache_tags[cl+1].bi; |
381 |
|
382 |
cache_tags[cl+1].bi=bi; |
383 |
bi->prev_same_cl_p=&(cache_tags[cl+1].bi); |
384 |
|
385 |
cache_tags[cl].handler=bi->handler_to_use; |
386 |
} |
387 |
|
388 |
static __inline__ void raise_in_cl_list(blockinfo* bi) |
389 |
{ |
390 |
remove_from_cl_list(bi); |
391 |
add_to_cl_list(bi); |
392 |
} |
393 |
|
394 |
static __inline__ void add_to_active(blockinfo* bi) |
395 |
{ |
396 |
if (active) |
397 |
active->prev_p=&(bi->next); |
398 |
bi->next=active; |
399 |
|
400 |
active=bi; |
401 |
bi->prev_p=&active; |
402 |
} |
403 |
|
404 |
static __inline__ void add_to_dormant(blockinfo* bi) |
405 |
{ |
406 |
if (dormant) |
407 |
dormant->prev_p=&(bi->next); |
408 |
bi->next=dormant; |
409 |
|
410 |
dormant=bi; |
411 |
bi->prev_p=&dormant; |
412 |
} |
413 |
|
414 |
static __inline__ void remove_dep(dependency* d) |
415 |
{ |
416 |
if (d->prev_p) |
417 |
*(d->prev_p)=d->next; |
418 |
if (d->next) |
419 |
d->next->prev_p=d->prev_p; |
420 |
d->prev_p=NULL; |
421 |
d->next=NULL; |
422 |
} |
423 |
|
424 |
/* This block's code is about to be thrown away, so it no longer |
425 |
depends on anything else */ |
426 |
static __inline__ void remove_deps(blockinfo* bi) |
427 |
{ |
428 |
remove_dep(&(bi->dep[0])); |
429 |
remove_dep(&(bi->dep[1])); |
430 |
} |
431 |
|
432 |
static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a) |
433 |
{ |
434 |
*(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4); |
435 |
} |
436 |
|
437 |
/******************************************************************** |
438 |
* Soft flush handling support functions * |
439 |
********************************************************************/ |
440 |
|
441 |
static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh) |
442 |
{ |
443 |
//write_log("bi is %p\n",bi); |
444 |
if (dh!=bi->direct_handler_to_use) { |
445 |
dependency* x=bi->deplist; |
446 |
//write_log("bi->deplist=%p\n",bi->deplist); |
447 |
while (x) { |
448 |
//write_log("x is %p\n",x); |
449 |
//write_log("x->next is %p\n",x->next); |
450 |
//write_log("x->prev_p is %p\n",x->prev_p); |
451 |
|
452 |
if (x->jmp_off) { |
453 |
adjust_jmpdep(x,dh); |
454 |
} |
455 |
x=x->next; |
456 |
} |
457 |
bi->direct_handler_to_use=dh; |
458 |
} |
459 |
} |
460 |
|
461 |
static __inline__ void invalidate_block(blockinfo* bi) |
462 |
{ |
463 |
int i; |
464 |
|
465 |
bi->optlevel=0; |
466 |
bi->count=optcount[0]-1; |
467 |
bi->handler=NULL; |
468 |
bi->handler_to_use=(cpuop_func *)popall_execute_normal; |
469 |
bi->direct_handler=NULL; |
470 |
set_dhtu(bi,bi->direct_pen); |
471 |
bi->needed_flags=0xff; |
472 |
bi->status=BI_INVALID; |
473 |
for (i=0;i<2;i++) { |
474 |
bi->dep[i].jmp_off=NULL; |
475 |
bi->dep[i].target=NULL; |
476 |
} |
477 |
remove_deps(bi); |
478 |
} |
479 |
|
480 |
static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) |
481 |
{ |
482 |
blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); |
483 |
|
484 |
Dif(!tbi) { |
485 |
write_log("Could not create jmpdep!\n"); |
486 |
abort(); |
487 |
} |
488 |
bi->dep[i].jmp_off=jmpaddr; |
489 |
bi->dep[i].source=bi; |
490 |
bi->dep[i].target=tbi; |
491 |
bi->dep[i].next=tbi->deplist; |
492 |
if (bi->dep[i].next) |
493 |
bi->dep[i].next->prev_p=&(bi->dep[i].next); |
494 |
bi->dep[i].prev_p=&(tbi->deplist); |
495 |
tbi->deplist=&(bi->dep[i]); |
496 |
} |
497 |
|
498 |
static __inline__ void block_need_recompile(blockinfo * bi) |
499 |
{ |
500 |
uae_u32 cl = cacheline(bi->pc_p); |
501 |
|
502 |
set_dhtu(bi, bi->direct_pen); |
503 |
bi->direct_handler = bi->direct_pen; |
504 |
|
505 |
bi->handler_to_use = (cpuop_func *)popall_execute_normal; |
506 |
bi->handler = (cpuop_func *)popall_execute_normal; |
507 |
if (bi == cache_tags[cl + 1].bi) |
508 |
cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; |
509 |
bi->status = BI_NEED_RECOMP; |
510 |
} |
511 |
|
512 |
static __inline__ void mark_callers_recompile(blockinfo * bi) |
513 |
{ |
514 |
dependency *x = bi->deplist; |
515 |
|
516 |
while (x) { |
517 |
dependency *next = x->next; /* This disappears when we mark for |
518 |
* recompilation and thus remove the |
519 |
* blocks from the lists */ |
520 |
if (x->jmp_off) { |
521 |
blockinfo *cbi = x->source; |
522 |
|
523 |
Dif(cbi->status == BI_INVALID) { |
524 |
// write_log("invalid block in dependency list\n"); // FIXME? |
525 |
// abort(); |
526 |
} |
527 |
if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) { |
528 |
block_need_recompile(cbi); |
529 |
mark_callers_recompile(cbi); |
530 |
} |
531 |
else if (cbi->status == BI_COMPILING) { |
532 |
redo_current_block = 1; |
533 |
} |
534 |
else if (cbi->status == BI_NEED_RECOMP) { |
535 |
/* nothing */ |
536 |
} |
537 |
else { |
538 |
//write_log("Status %d in mark_callers\n",cbi->status); // FIXME? |
539 |
} |
540 |
} |
541 |
x = next; |
542 |
} |
543 |
} |
544 |
|
545 |
static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate) |
546 |
{ |
547 |
blockinfo* bi=get_blockinfo_addr(addr); |
548 |
int i; |
549 |
|
550 |
if (!bi) { |
551 |
for (i=0;i<MAX_HOLD_BI && !bi;i++) { |
552 |
if (hold_bi[i]) { |
553 |
uae_u32 cl=cacheline(addr); |
554 |
|
555 |
bi=hold_bi[i]; |
556 |
hold_bi[i]=NULL; |
557 |
bi->pc_p=(uae_u8 *)addr; |
558 |
invalidate_block(bi); |
559 |
add_to_active(bi); |
560 |
add_to_cl_list(bi); |
561 |
|
562 |
} |
563 |
} |
564 |
} |
565 |
if (!bi) { |
566 |
write_log("Looking for blockinfo, can't find free one\n"); |
567 |
abort(); |
568 |
} |
569 |
return bi; |
570 |
} |
571 |
|
572 |
static void prepare_block(blockinfo* bi); |
573 |
|
574 |
/* Managment of blockinfos. |
575 |
|
576 |
A blockinfo struct is allocated whenever a new block has to be |
577 |
compiled. If the list of free blockinfos is empty, we allocate a new |
578 |
pool of blockinfos and link the newly created blockinfos altogether |
579 |
into the list of free blockinfos. Otherwise, we simply pop a structure |
580 |
off the free list. |
581 |
|
582 |
Blockinfo are lazily deallocated, i.e. chained altogether in the |
583 |
list of free blockinfos whenvever a translation cache flush (hard or |
584 |
soft) request occurs. |
585 |
*/ |
586 |
|
587 |
template< class T > |
588 |
class LazyBlockAllocator |
589 |
{ |
590 |
enum { |
591 |
kPoolSize = 1 + 4096 / sizeof(T) |
592 |
}; |
593 |
struct Pool { |
594 |
T chunk[kPoolSize]; |
595 |
Pool * next; |
596 |
}; |
597 |
Pool * mPools; |
598 |
T * mChunks; |
599 |
public: |
600 |
LazyBlockAllocator() : mPools(0), mChunks(0) { } |
601 |
~LazyBlockAllocator(); |
602 |
T * acquire(); |
603 |
void release(T * const); |
604 |
}; |
605 |
|
606 |
template< class T > |
607 |
LazyBlockAllocator<T>::~LazyBlockAllocator() |
608 |
{ |
609 |
Pool * currentPool = mPools; |
610 |
while (currentPool) { |
611 |
Pool * deadPool = currentPool; |
612 |
currentPool = currentPool->next; |
613 |
free(deadPool); |
614 |
} |
615 |
} |
616 |
|
617 |
template< class T > |
618 |
T * LazyBlockAllocator<T>::acquire() |
619 |
{ |
620 |
if (!mChunks) { |
621 |
// There is no chunk left, allocate a new pool and link the |
622 |
// chunks into the free list |
623 |
Pool * newPool = (Pool *)malloc(sizeof(Pool)); |
624 |
for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { |
625 |
chunk->next = mChunks; |
626 |
mChunks = chunk; |
627 |
} |
628 |
newPool->next = mPools; |
629 |
mPools = newPool; |
630 |
} |
631 |
T * chunk = mChunks; |
632 |
mChunks = chunk->next; |
633 |
return chunk; |
634 |
} |
635 |
|
636 |
template< class T > |
637 |
void LazyBlockAllocator<T>::release(T * const chunk) |
638 |
{ |
639 |
chunk->next = mChunks; |
640 |
mChunks = chunk; |
641 |
} |
642 |
|
643 |
template< class T > |
644 |
class HardBlockAllocator |
645 |
{ |
646 |
public: |
647 |
T * acquire() { |
648 |
T * data = (T *)current_compile_p; |
649 |
current_compile_p += sizeof(T); |
650 |
return data; |
651 |
} |
652 |
|
653 |
void release(T * const chunk) { |
654 |
// Deallocated on invalidation |
655 |
} |
656 |
}; |
657 |
|
658 |
#if USE_SEPARATE_BIA |
659 |
static LazyBlockAllocator<blockinfo> BlockInfoAllocator; |
660 |
static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator; |
661 |
#else |
662 |
static HardBlockAllocator<blockinfo> BlockInfoAllocator; |
663 |
static HardBlockAllocator<checksum_info> ChecksumInfoAllocator; |
664 |
#endif |
665 |
|
666 |
static __inline__ checksum_info *alloc_checksum_info(void) |
667 |
{ |
668 |
checksum_info *csi = ChecksumInfoAllocator.acquire(); |
669 |
csi->next = NULL; |
670 |
return csi; |
671 |
} |
672 |
|
673 |
static __inline__ void free_checksum_info(checksum_info *csi) |
674 |
{ |
675 |
csi->next = NULL; |
676 |
ChecksumInfoAllocator.release(csi); |
677 |
} |
678 |
|
679 |
static __inline__ void free_checksum_info_chain(checksum_info *csi) |
680 |
{ |
681 |
while (csi != NULL) { |
682 |
checksum_info *csi2 = csi->next; |
683 |
free_checksum_info(csi); |
684 |
csi = csi2; |
685 |
} |
686 |
} |
687 |
|
688 |
static __inline__ blockinfo *alloc_blockinfo(void) |
689 |
{ |
690 |
blockinfo *bi = BlockInfoAllocator.acquire(); |
691 |
#if USE_CHECKSUM_INFO |
692 |
bi->csi = NULL; |
693 |
#endif |
694 |
return bi; |
695 |
} |
696 |
|
697 |
static __inline__ void free_blockinfo(blockinfo *bi) |
698 |
{ |
699 |
#if USE_CHECKSUM_INFO |
700 |
free_checksum_info_chain(bi->csi); |
701 |
bi->csi = NULL; |
702 |
#endif |
703 |
BlockInfoAllocator.release(bi); |
704 |
} |
705 |
|
706 |
static __inline__ void alloc_blockinfos(void) |
707 |
{ |
708 |
int i; |
709 |
blockinfo* bi; |
710 |
|
711 |
for (i=0;i<MAX_HOLD_BI;i++) { |
712 |
if (hold_bi[i]) |
713 |
return; |
714 |
bi=hold_bi[i]=alloc_blockinfo(); |
715 |
prepare_block(bi); |
716 |
} |
717 |
} |
718 |
|
719 |
/******************************************************************** |
720 |
* Functions to emit data into memory, and other general support * |
721 |
********************************************************************/ |
722 |
|
723 |
static uae_u8* target; |
724 |
|
725 |
static void emit_init(void) |
726 |
{ |
727 |
} |
728 |
|
729 |
static __inline__ void emit_byte(uae_u8 x) |
730 |
{ |
731 |
*target++=x; |
732 |
} |
733 |
|
734 |
static __inline__ void emit_word(uae_u16 x) |
735 |
{ |
736 |
*((uae_u16*)target)=x; |
737 |
target+=2; |
738 |
} |
739 |
|
740 |
static __inline__ void emit_long(uae_u32 x) |
741 |
{ |
742 |
*((uae_u32*)target)=x; |
743 |
target+=4; |
744 |
} |
745 |
|
746 |
static __inline__ void emit_quad(uae_u64 x) |
747 |
{ |
748 |
*((uae_u64*)target)=x; |
749 |
target+=8; |
750 |
} |
751 |
|
752 |
static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen) |
753 |
{ |
754 |
memcpy((uae_u8 *)target,block,blocklen); |
755 |
target+=blocklen; |
756 |
} |
757 |
|
758 |
static __inline__ uae_u32 reverse32(uae_u32 v) |
759 |
{ |
760 |
#if 1 |
761 |
// gb-- We have specialized byteswapping functions, just use them |
762 |
return do_byteswap_32(v); |
763 |
#else |
764 |
return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000); |
765 |
#endif |
766 |
} |
767 |
|
768 |
/******************************************************************** |
769 |
* Getting the information about the target CPU * |
770 |
********************************************************************/ |
771 |
|
772 |
#include "codegen_x86.cpp" |
773 |
|
774 |
void set_target(uae_u8* t) |
775 |
{ |
776 |
target=t; |
777 |
} |
778 |
|
779 |
static __inline__ uae_u8* get_target_noopt(void) |
780 |
{ |
781 |
return target; |
782 |
} |
783 |
|
784 |
__inline__ uae_u8* get_target(void) |
785 |
{ |
786 |
return get_target_noopt(); |
787 |
} |
788 |
|
789 |
|
790 |
/******************************************************************** |
791 |
* Flags status handling. EMIT TIME! * |
792 |
********************************************************************/ |
793 |
|
794 |
static void bt_l_ri_noclobber(R4 r, IMM i); |
795 |
|
796 |
static void make_flags_live_internal(void) |
797 |
{ |
798 |
if (live.flags_in_flags==VALID) |
799 |
return; |
800 |
Dif (live.flags_on_stack==TRASH) { |
801 |
write_log("Want flags, got something on stack, but it is TRASH\n"); |
802 |
abort(); |
803 |
} |
804 |
if (live.flags_on_stack==VALID) { |
805 |
int tmp; |
806 |
tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); |
807 |
raw_reg_to_flags(tmp); |
808 |
unlock2(tmp); |
809 |
|
810 |
live.flags_in_flags=VALID; |
811 |
return; |
812 |
} |
813 |
write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n", |
814 |
live.flags_in_flags,live.flags_on_stack); |
815 |
abort(); |
816 |
} |
817 |
|
818 |
static void flags_to_stack(void) |
819 |
{ |
820 |
if (live.flags_on_stack==VALID) |
821 |
return; |
822 |
if (!live.flags_are_important) { |
823 |
live.flags_on_stack=VALID; |
824 |
return; |
825 |
} |
826 |
Dif (live.flags_in_flags!=VALID) |
827 |
abort(); |
828 |
else { |
829 |
int tmp; |
830 |
tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); |
831 |
raw_flags_to_reg(tmp); |
832 |
unlock2(tmp); |
833 |
} |
834 |
live.flags_on_stack=VALID; |
835 |
} |
836 |
|
837 |
static __inline__ void clobber_flags(void) |
838 |
{ |
839 |
if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) |
840 |
flags_to_stack(); |
841 |
live.flags_in_flags=TRASH; |
842 |
} |
843 |
|
844 |
/* Prepare for leaving the compiled stuff */ |
845 |
static __inline__ void flush_flags(void) |
846 |
{ |
847 |
flags_to_stack(); |
848 |
return; |
849 |
} |
850 |
|
851 |
int touchcnt; |
852 |
|
853 |
/******************************************************************** |
854 |
* Partial register flushing for optimized calls * |
855 |
********************************************************************/ |
856 |
|
857 |
struct regusage { |
858 |
uae_u16 rmask; |
859 |
uae_u16 wmask; |
860 |
}; |
861 |
|
862 |
static inline void ru_set(uae_u16 *mask, int reg) |
863 |
{ |
864 |
#if USE_OPTIMIZED_CALLS |
865 |
*mask |= 1 << reg; |
866 |
#endif |
867 |
} |
868 |
|
869 |
static inline bool ru_get(const uae_u16 *mask, int reg) |
870 |
{ |
871 |
#if USE_OPTIMIZED_CALLS |
872 |
return (*mask & (1 << reg)); |
873 |
#else |
874 |
/* Default: instruction reads & write to register */ |
875 |
return true; |
876 |
#endif |
877 |
} |
878 |
|
879 |
static inline void ru_set_read(regusage *ru, int reg) |
880 |
{ |
881 |
ru_set(&ru->rmask, reg); |
882 |
} |
883 |
|
884 |
static inline void ru_set_write(regusage *ru, int reg) |
885 |
{ |
886 |
ru_set(&ru->wmask, reg); |
887 |
} |
888 |
|
889 |
static inline bool ru_read_p(const regusage *ru, int reg) |
890 |
{ |
891 |
return ru_get(&ru->rmask, reg); |
892 |
} |
893 |
|
894 |
static inline bool ru_write_p(const regusage *ru, int reg) |
895 |
{ |
896 |
return ru_get(&ru->wmask, reg); |
897 |
} |
898 |
|
899 |
static void ru_fill_ea(regusage *ru, int reg, amodes mode, |
900 |
wordsizes size, int write_mode) |
901 |
{ |
902 |
switch (mode) { |
903 |
case Areg: |
904 |
reg += 8; |
905 |
/* fall through */ |
906 |
case Dreg: |
907 |
ru_set(write_mode ? &ru->wmask : &ru->rmask, reg); |
908 |
break; |
909 |
case Ad16: |
910 |
/* skip displacment */ |
911 |
m68k_pc_offset += 2; |
912 |
case Aind: |
913 |
case Aipi: |
914 |
case Apdi: |
915 |
ru_set_read(ru, reg+8); |
916 |
break; |
917 |
case Ad8r: |
918 |
ru_set_read(ru, reg+8); |
919 |
/* fall through */ |
920 |
case PC8r: { |
921 |
uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2); |
922 |
reg = (dp >> 12) & 15; |
923 |
ru_set_read(ru, reg); |
924 |
if (dp & 0x100) |
925 |
m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2); |
926 |
break; |
927 |
} |
928 |
case PC16: |
929 |
case absw: |
930 |
case imm0: |
931 |
case imm1: |
932 |
m68k_pc_offset += 2; |
933 |
break; |
934 |
case absl: |
935 |
case imm2: |
936 |
m68k_pc_offset += 4; |
937 |
break; |
938 |
case immi: |
939 |
m68k_pc_offset += (size == sz_long) ? 4 : 2; |
940 |
break; |
941 |
} |
942 |
} |
943 |
|
944 |
/* TODO: split into a static initialization part and a dynamic one |
945 |
(instructions depending on extension words) */ |
946 |
static void ru_fill(regusage *ru, uae_u32 opcode) |
947 |
{ |
948 |
m68k_pc_offset += 2; |
949 |
|
950 |
/* Default: no register is used or written to */ |
951 |
ru->rmask = 0; |
952 |
ru->wmask = 0; |
953 |
|
954 |
uae_u32 real_opcode = cft_map(opcode); |
955 |
struct instr *dp = &table68k[real_opcode]; |
956 |
|
957 |
bool rw_dest = true; |
958 |
bool handled = false; |
959 |
|
960 |
/* Handle some instructions specifically */ |
961 |
uae_u16 reg, ext; |
962 |
switch (dp->mnemo) { |
963 |
case i_BFCHG: |
964 |
case i_BFCLR: |
965 |
case i_BFEXTS: |
966 |
case i_BFEXTU: |
967 |
case i_BFFFO: |
968 |
case i_BFINS: |
969 |
case i_BFSET: |
970 |
case i_BFTST: |
971 |
ext = comp_get_iword((m68k_pc_offset+=2)-2); |
972 |
if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7); |
973 |
if (ext & 0x020) ru_set_read(ru, ext & 7); |
974 |
ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); |
975 |
if (dp->dmode == Dreg) |
976 |
ru_set_read(ru, dp->dreg); |
977 |
switch (dp->mnemo) { |
978 |
case i_BFEXTS: |
979 |
case i_BFEXTU: |
980 |
case i_BFFFO: |
981 |
ru_set_write(ru, (ext >> 12) & 7); |
982 |
break; |
983 |
case i_BFINS: |
984 |
ru_set_read(ru, (ext >> 12) & 7); |
985 |
/* fall through */ |
986 |
case i_BFCHG: |
987 |
case i_BFCLR: |
988 |
case i_BSET: |
989 |
if (dp->dmode == Dreg) |
990 |
ru_set_write(ru, dp->dreg); |
991 |
break; |
992 |
} |
993 |
handled = true; |
994 |
rw_dest = false; |
995 |
break; |
996 |
|
997 |
case i_BTST: |
998 |
rw_dest = false; |
999 |
break; |
1000 |
|
1001 |
case i_CAS: |
1002 |
{ |
1003 |
ext = comp_get_iword((m68k_pc_offset+=2)-2); |
1004 |
int Du = ext & 7; |
1005 |
ru_set_read(ru, Du); |
1006 |
int Dc = (ext >> 6) & 7; |
1007 |
ru_set_read(ru, Dc); |
1008 |
ru_set_write(ru, Dc); |
1009 |
break; |
1010 |
} |
1011 |
case i_CAS2: |
1012 |
{ |
1013 |
int Dc1, Dc2, Du1, Du2, Rn1, Rn2; |
1014 |
ext = comp_get_iword((m68k_pc_offset+=2)-2); |
1015 |
Rn1 = (ext >> 12) & 15; |
1016 |
Du1 = (ext >> 6) & 7; |
1017 |
Dc1 = ext & 7; |
1018 |
ru_set_read(ru, Rn1); |
1019 |
ru_set_read(ru, Du1); |
1020 |
ru_set_read(ru, Dc1); |
1021 |
ru_set_write(ru, Dc1); |
1022 |
ext = comp_get_iword((m68k_pc_offset+=2)-2); |
1023 |
Rn2 = (ext >> 12) & 15; |
1024 |
Du2 = (ext >> 6) & 7; |
1025 |
Dc2 = ext & 7; |
1026 |
ru_set_read(ru, Rn2); |
1027 |
ru_set_read(ru, Du2); |
1028 |
ru_set_write(ru, Dc2); |
1029 |
break; |
1030 |
} |
1031 |
case i_DIVL: case i_MULL: |
1032 |
m68k_pc_offset += 2; |
1033 |
break; |
1034 |
case i_LEA: |
1035 |
case i_MOVE: case i_MOVEA: case i_MOVE16: |
1036 |
rw_dest = false; |
1037 |
break; |
1038 |
case i_PACK: case i_UNPK: |
1039 |
rw_dest = false; |
1040 |
m68k_pc_offset += 2; |
1041 |
break; |
1042 |
case i_TRAPcc: |
1043 |
m68k_pc_offset += (dp->size == sz_long) ? 4 : 2; |
1044 |
break; |
1045 |
case i_RTR: |
1046 |
/* do nothing, just for coverage debugging */ |
1047 |
break; |
1048 |
/* TODO: handle EXG instruction */ |
1049 |
} |
1050 |
|
1051 |
/* Handle A-Traps better */ |
1052 |
if ((real_opcode & 0xf000) == 0xa000) { |
1053 |
handled = true; |
1054 |
} |
1055 |
|
1056 |
/* Handle EmulOps better */ |
1057 |
if ((real_opcode & 0xff00) == 0x7100) { |
1058 |
handled = true; |
1059 |
ru->rmask = 0xffff; |
1060 |
ru->wmask = 0; |
1061 |
} |
1062 |
|
1063 |
if (dp->suse && !handled) |
1064 |
ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); |
1065 |
|
1066 |
if (dp->duse && !handled) |
1067 |
ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); |
1068 |
|
1069 |
if (rw_dest) |
1070 |
ru->rmask |= ru->wmask; |
1071 |
|
1072 |
handled = handled || dp->suse || dp->duse; |
1073 |
|
1074 |
/* Mark all registers as used/written if the instruction may trap */ |
1075 |
if (may_trap(opcode)) { |
1076 |
handled = true; |
1077 |
ru->rmask = 0xffff; |
1078 |
ru->wmask = 0xffff; |
1079 |
} |
1080 |
|
1081 |
if (!handled) { |
1082 |
write_log("ru_fill: %04x = { %04x, %04x }\n", |
1083 |
real_opcode, ru->rmask, ru->wmask); |
1084 |
abort(); |
1085 |
} |
1086 |
} |
1087 |
|
1088 |
/******************************************************************** |
1089 |
* register allocation per block logging * |
1090 |
********************************************************************/ |
1091 |
|
1092 |
static uae_s8 vstate[VREGS]; |
1093 |
static uae_s8 vwritten[VREGS]; |
1094 |
static uae_s8 nstate[N_REGS]; |
1095 |
|
1096 |
#define L_UNKNOWN -127 |
1097 |
#define L_UNAVAIL -1 |
1098 |
#define L_NEEDED -2 |
1099 |
#define L_UNNEEDED -3 |
1100 |
|
1101 |
static __inline__ void big_to_small_state(bigstate * b, smallstate * s) |
1102 |
{ |
1103 |
int i; |
1104 |
|
1105 |
for (i = 0; i < VREGS; i++) |
1106 |
s->virt[i] = vstate[i]; |
1107 |
for (i = 0; i < N_REGS; i++) |
1108 |
s->nat[i] = nstate[i]; |
1109 |
} |
1110 |
|
1111 |
static __inline__ int callers_need_recompile(bigstate * b, smallstate * s) |
1112 |
{ |
1113 |
int i; |
1114 |
int reverse = 0; |
1115 |
|
1116 |
for (i = 0; i < VREGS; i++) { |
1117 |
if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED) |
1118 |
return 1; |
1119 |
if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED) |
1120 |
reverse++; |
1121 |
} |
1122 |
for (i = 0; i < N_REGS; i++) { |
1123 |
if (nstate[i] >= 0 && nstate[i] != s->nat[i]) |
1124 |
return 1; |
1125 |
if (nstate[i] < 0 && s->nat[i] >= 0) |
1126 |
reverse++; |
1127 |
} |
1128 |
if (reverse >= 2 && USE_MATCH) |
1129 |
return 1; /* In this case, it might be worth recompiling the |
1130 |
* callers */ |
1131 |
return 0; |
1132 |
} |
1133 |
|
1134 |
static __inline__ void log_startblock(void) |
1135 |
{ |
1136 |
int i; |
1137 |
|
1138 |
for (i = 0; i < VREGS; i++) { |
1139 |
vstate[i] = L_UNKNOWN; |
1140 |
vwritten[i] = 0; |
1141 |
} |
1142 |
for (i = 0; i < N_REGS; i++) |
1143 |
nstate[i] = L_UNKNOWN; |
1144 |
} |
1145 |
|
1146 |
/* Using an n-reg for a temp variable */ |
1147 |
static __inline__ void log_isused(int n) |
1148 |
{ |
1149 |
if (nstate[n] == L_UNKNOWN) |
1150 |
nstate[n] = L_UNAVAIL; |
1151 |
} |
1152 |
|
1153 |
static __inline__ void log_visused(int r) |
1154 |
{ |
1155 |
if (vstate[r] == L_UNKNOWN) |
1156 |
vstate[r] = L_NEEDED; |
1157 |
} |
1158 |
|
1159 |
static __inline__ void do_load_reg(int n, int r) |
1160 |
{ |
1161 |
if (r == FLAGTMP) |
1162 |
raw_load_flagreg(n, r); |
1163 |
else if (r == FLAGX) |
1164 |
raw_load_flagx(n, r); |
1165 |
else |
1166 |
raw_mov_l_rm(n, (uintptr) live.state[r].mem); |
1167 |
} |
1168 |
|
1169 |
static __inline__ void check_load_reg(int n, int r) |
1170 |
{ |
1171 |
raw_mov_l_rm(n, (uintptr) live.state[r].mem); |
1172 |
} |
1173 |
|
1174 |
static __inline__ void log_vwrite(int r) |
1175 |
{ |
1176 |
vwritten[r] = 1; |
1177 |
} |
1178 |
|
1179 |
/* Using an n-reg to hold a v-reg */ |
1180 |
static __inline__ void log_isreg(int n, int r) |
1181 |
{ |
1182 |
static int count = 0; |
1183 |
|
1184 |
if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) |
1185 |
nstate[n] = r; |
1186 |
else { |
1187 |
do_load_reg(n, r); |
1188 |
if (nstate[n] == L_UNKNOWN) |
1189 |
nstate[n] = L_UNAVAIL; |
1190 |
} |
1191 |
if (vstate[r] == L_UNKNOWN) |
1192 |
vstate[r] = L_NEEDED; |
1193 |
} |
1194 |
|
1195 |
static __inline__ void log_clobberreg(int r) |
1196 |
{ |
1197 |
if (vstate[r] == L_UNKNOWN) |
1198 |
vstate[r] = L_UNNEEDED; |
1199 |
} |
1200 |
|
1201 |
/* This ends all possibility of clever register allocation */ |
1202 |
|
1203 |
static __inline__ void log_flush(void) |
1204 |
{ |
1205 |
int i; |
1206 |
|
1207 |
for (i = 0; i < VREGS; i++) |
1208 |
if (vstate[i] == L_UNKNOWN) |
1209 |
vstate[i] = L_NEEDED; |
1210 |
for (i = 0; i < N_REGS; i++) |
1211 |
if (nstate[i] == L_UNKNOWN) |
1212 |
nstate[i] = L_UNAVAIL; |
1213 |
} |
1214 |
|
1215 |
static __inline__ void log_dump(void) |
1216 |
{ |
1217 |
int i; |
1218 |
|
1219 |
return; |
1220 |
|
1221 |
write_log("----------------------\n"); |
1222 |
for (i = 0; i < N_REGS; i++) { |
1223 |
switch (nstate[i]) { |
1224 |
case L_UNKNOWN: |
1225 |
write_log("Nat %d : UNKNOWN\n", i); |
1226 |
break; |
1227 |
case L_UNAVAIL: |
1228 |
write_log("Nat %d : UNAVAIL\n", i); |
1229 |
break; |
1230 |
default: |
1231 |
write_log("Nat %d : %d\n", i, nstate[i]); |
1232 |
break; |
1233 |
} |
1234 |
} |
1235 |
for (i = 0; i < VREGS; i++) { |
1236 |
if (vstate[i] == L_UNNEEDED) |
1237 |
write_log("Virt %d: UNNEEDED\n", i); |
1238 |
} |
1239 |
} |
1240 |
|
1241 |
/******************************************************************** |
1242 |
* register status handling. EMIT TIME! * |
1243 |
********************************************************************/ |
1244 |
|
1245 |
static __inline__ void set_status(int r, int status) |
1246 |
{ |
1247 |
if (status == ISCONST) |
1248 |
log_clobberreg(r); |
1249 |
live.state[r].status=status; |
1250 |
} |
1251 |
|
1252 |
static __inline__ int isinreg(int r) |
1253 |
{ |
1254 |
return live.state[r].status==CLEAN || live.state[r].status==DIRTY; |
1255 |
} |
1256 |
|
1257 |
static __inline__ void adjust_nreg(int r, uae_u32 val) |
1258 |
{ |
1259 |
if (!val) |
1260 |
return; |
1261 |
raw_lea_l_brr(r,r,val); |
1262 |
} |
1263 |
|
1264 |
static void tomem(int r) |
1265 |
{ |
1266 |
int rr=live.state[r].realreg; |
1267 |
|
1268 |
if (isinreg(r)) { |
1269 |
if (live.state[r].val && live.nat[rr].nholds==1 |
1270 |
&& !live.nat[rr].locked) { |
1271 |
// write_log("RemovingA offset %x from reg %d (%d) at %p\n", |
1272 |
// live.state[r].val,r,rr,target); |
1273 |
adjust_nreg(rr,live.state[r].val); |
1274 |
live.state[r].val=0; |
1275 |
live.state[r].dirtysize=4; |
1276 |
set_status(r,DIRTY); |
1277 |
} |
1278 |
} |
1279 |
|
1280 |
if (live.state[r].status==DIRTY) { |
1281 |
switch (live.state[r].dirtysize) { |
1282 |
case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; |
1283 |
case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; |
1284 |
case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; |
1285 |
default: abort(); |
1286 |
} |
1287 |
log_vwrite(r); |
1288 |
set_status(r,CLEAN); |
1289 |
live.state[r].dirtysize=0; |
1290 |
} |
1291 |
} |
1292 |
|
1293 |
static __inline__ int isconst(int r) |
1294 |
{ |
1295 |
return live.state[r].status==ISCONST; |
1296 |
} |
1297 |
|
1298 |
int is_const(int r) |
1299 |
{ |
1300 |
return isconst(r); |
1301 |
} |
1302 |
|
1303 |
static __inline__ void writeback_const(int r) |
1304 |
{ |
1305 |
if (!isconst(r)) |
1306 |
return; |
1307 |
Dif (live.state[r].needflush==NF_HANDLER) { |
1308 |
write_log("Trying to write back constant NF_HANDLER!\n"); |
1309 |
abort(); |
1310 |
} |
1311 |
|
1312 |
raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); |
1313 |
log_vwrite(r); |
1314 |
live.state[r].val=0; |
1315 |
set_status(r,INMEM); |
1316 |
} |
1317 |
|
1318 |
static __inline__ void tomem_c(int r) |
1319 |
{ |
1320 |
if (isconst(r)) { |
1321 |
writeback_const(r); |
1322 |
} |
1323 |
else |
1324 |
tomem(r); |
1325 |
} |
1326 |
|
1327 |
static void evict(int r) |
1328 |
{ |
1329 |
int rr; |
1330 |
|
1331 |
if (!isinreg(r)) |
1332 |
return; |
1333 |
tomem(r); |
1334 |
rr=live.state[r].realreg; |
1335 |
|
1336 |
Dif (live.nat[rr].locked && |
1337 |
live.nat[rr].nholds==1) { |
1338 |
write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg); |
1339 |
abort(); |
1340 |
} |
1341 |
|
1342 |
live.nat[rr].nholds--; |
1343 |
if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ |
1344 |
int topreg=live.nat[rr].holds[live.nat[rr].nholds]; |
1345 |
int thisind=live.state[r].realind; |
1346 |
|
1347 |
live.nat[rr].holds[thisind]=topreg; |
1348 |
live.state[topreg].realind=thisind; |
1349 |
} |
1350 |
live.state[r].realreg=-1; |
1351 |
set_status(r,INMEM); |
1352 |
} |
1353 |
|
1354 |
static __inline__ void free_nreg(int r) |
1355 |
{ |
1356 |
int i=live.nat[r].nholds; |
1357 |
|
1358 |
while (i) { |
1359 |
int vr; |
1360 |
|
1361 |
--i; |
1362 |
vr=live.nat[r].holds[i]; |
1363 |
evict(vr); |
1364 |
} |
1365 |
Dif (live.nat[r].nholds!=0) { |
1366 |
write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds); |
1367 |
abort(); |
1368 |
} |
1369 |
} |
1370 |
|
1371 |
/* Use with care! */ |
1372 |
static __inline__ void isclean(int r) |
1373 |
{ |
1374 |
if (!isinreg(r)) |
1375 |
return; |
1376 |
live.state[r].validsize=4; |
1377 |
live.state[r].dirtysize=0; |
1378 |
live.state[r].val=0; |
1379 |
set_status(r,CLEAN); |
1380 |
} |
1381 |
|
1382 |
static __inline__ void disassociate(int r) |
1383 |
{ |
1384 |
isclean(r); |
1385 |
evict(r); |
1386 |
} |
1387 |
|
1388 |
static __inline__ void set_const(int r, uae_u32 val) |
1389 |
{ |
1390 |
disassociate(r); |
1391 |
live.state[r].val=val; |
1392 |
set_status(r,ISCONST); |
1393 |
} |
1394 |
|
1395 |
static __inline__ uae_u32 get_offset(int r) |
1396 |
{ |
1397 |
return live.state[r].val; |
1398 |
} |
1399 |
|
1400 |
static int alloc_reg_hinted(int r, int size, int willclobber, int hint) |
1401 |
{ |
1402 |
int bestreg; |
1403 |
uae_s32 when; |
1404 |
int i; |
1405 |
uae_s32 badness=0; /* to shut up gcc */ |
1406 |
bestreg=-1; |
1407 |
when=2000000000; |
1408 |
|
1409 |
/* XXX use a regalloc_order table? */ |
1410 |
for (i=0;i<N_REGS;i++) { |
1411 |
badness=live.nat[i].touched; |
1412 |
if (live.nat[i].nholds==0) |
1413 |
badness=0; |
1414 |
if (i==hint) |
1415 |
badness-=200000000; |
1416 |
if (!live.nat[i].locked && badness<when) { |
1417 |
if ((size==1 && live.nat[i].canbyte) || |
1418 |
(size==2 && live.nat[i].canword) || |
1419 |
(size==4)) { |
1420 |
bestreg=i; |
1421 |
when=badness; |
1422 |
if (live.nat[i].nholds==0 && hint<0) |
1423 |
break; |
1424 |
if (i==hint) |
1425 |
break; |
1426 |
} |
1427 |
} |
1428 |
} |
1429 |
Dif (bestreg==-1) |
1430 |
abort(); |
1431 |
|
1432 |
if (live.nat[bestreg].nholds>0) { |
1433 |
free_nreg(bestreg); |
1434 |
} |
1435 |
if (isinreg(r)) { |
1436 |
int rr=live.state[r].realreg; |
1437 |
/* This will happen if we read a partially dirty register at a |
1438 |
bigger size */ |
1439 |
Dif (willclobber || live.state[r].validsize>=size) |
1440 |
abort(); |
1441 |
Dif (live.nat[rr].nholds!=1) |
1442 |
abort(); |
1443 |
if (size==4 && live.state[r].validsize==2) { |
1444 |
log_isused(bestreg); |
1445 |
log_visused(r); |
1446 |
raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); |
1447 |
raw_bswap_32(bestreg); |
1448 |
raw_zero_extend_16_rr(rr,rr); |
1449 |
raw_zero_extend_16_rr(bestreg,bestreg); |
1450 |
raw_bswap_32(bestreg); |
1451 |
raw_lea_l_brr_indexed(rr,rr,bestreg,1,0); |
1452 |
live.state[r].validsize=4; |
1453 |
live.nat[rr].touched=touchcnt++; |
1454 |
return rr; |
1455 |
} |
1456 |
if (live.state[r].validsize==1) { |
1457 |
/* Nothing yet */ |
1458 |
} |
1459 |
evict(r); |
1460 |
} |
1461 |
|
1462 |
if (!willclobber) { |
1463 |
if (live.state[r].status!=UNDEF) { |
1464 |
if (isconst(r)) { |
1465 |
raw_mov_l_ri(bestreg,live.state[r].val); |
1466 |
live.state[r].val=0; |
1467 |
live.state[r].dirtysize=4; |
1468 |
set_status(r,DIRTY); |
1469 |
log_isused(bestreg); |
1470 |
} |
1471 |
else { |
1472 |
log_isreg(bestreg, r); /* This will also load it! */ |
1473 |
live.state[r].dirtysize=0; |
1474 |
set_status(r,CLEAN); |
1475 |
} |
1476 |
} |
1477 |
else { |
1478 |
live.state[r].val=0; |
1479 |
live.state[r].dirtysize=0; |
1480 |
set_status(r,CLEAN); |
1481 |
log_isused(bestreg); |
1482 |
} |
1483 |
live.state[r].validsize=4; |
1484 |
} |
1485 |
else { /* this is the easiest way, but not optimal. FIXME! */ |
1486 |
/* Now it's trickier, but hopefully still OK */ |
1487 |
if (!isconst(r) || size==4) { |
1488 |
live.state[r].validsize=size; |
1489 |
live.state[r].dirtysize=size; |
1490 |
live.state[r].val=0; |
1491 |
set_status(r,DIRTY); |
1492 |
if (size == 4) { |
1493 |
log_clobberreg(r); |
1494 |
log_isused(bestreg); |
1495 |
} |
1496 |
else { |
1497 |
log_visused(r); |
1498 |
log_isused(bestreg); |
1499 |
} |
1500 |
} |
1501 |
else { |
1502 |
if (live.state[r].status!=UNDEF) |
1503 |
raw_mov_l_ri(bestreg,live.state[r].val); |
1504 |
live.state[r].val=0; |
1505 |
live.state[r].validsize=4; |
1506 |
live.state[r].dirtysize=4; |
1507 |
set_status(r,DIRTY); |
1508 |
log_isused(bestreg); |
1509 |
} |
1510 |
} |
1511 |
live.state[r].realreg=bestreg; |
1512 |
live.state[r].realind=live.nat[bestreg].nholds; |
1513 |
live.nat[bestreg].touched=touchcnt++; |
1514 |
live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; |
1515 |
live.nat[bestreg].nholds++; |
1516 |
|
1517 |
return bestreg; |
1518 |
} |
1519 |
|
1520 |
static int alloc_reg(int r, int size, int willclobber) |
1521 |
{ |
1522 |
return alloc_reg_hinted(r,size,willclobber,-1); |
1523 |
} |
1524 |
|
1525 |
static void unlock2(int r) |
1526 |
{ |
1527 |
Dif (!live.nat[r].locked) |
1528 |
abort(); |
1529 |
live.nat[r].locked--; |
1530 |
} |
1531 |
|
1532 |
static void setlock(int r) |
1533 |
{ |
1534 |
live.nat[r].locked++; |
1535 |
} |
1536 |
|
1537 |
|
1538 |
static void mov_nregs(int d, int s) |
1539 |
{ |
1540 |
int ns=live.nat[s].nholds; |
1541 |
int nd=live.nat[d].nholds; |
1542 |
int i; |
1543 |
|
1544 |
if (s==d) |
1545 |
return; |
1546 |
|
1547 |
if (nd>0) |
1548 |
free_nreg(d); |
1549 |
|
1550 |
log_isused(d); |
1551 |
raw_mov_l_rr(d,s); |
1552 |
|
1553 |
for (i=0;i<live.nat[s].nholds;i++) { |
1554 |
int vs=live.nat[s].holds[i]; |
1555 |
|
1556 |
live.state[vs].realreg=d; |
1557 |
live.state[vs].realind=i; |
1558 |
live.nat[d].holds[i]=vs; |
1559 |
} |
1560 |
live.nat[d].nholds=live.nat[s].nholds; |
1561 |
|
1562 |
live.nat[s].nholds=0; |
1563 |
} |
1564 |
|
1565 |
|
1566 |
static __inline__ void make_exclusive(int r, int size, int spec) |
1567 |
{ |
1568 |
int clobber; |
1569 |
reg_status oldstate; |
1570 |
int rr=live.state[r].realreg; |
1571 |
int nr; |
1572 |
int nind; |
1573 |
int ndirt=0; |
1574 |
int i; |
1575 |
|
1576 |
if (!isinreg(r)) |
1577 |
return; |
1578 |
if (live.nat[rr].nholds==1) |
1579 |
return; |
1580 |
for (i=0;i<live.nat[rr].nholds;i++) { |
1581 |
int vr=live.nat[rr].holds[i]; |
1582 |
if (vr!=r && |
1583 |
(live.state[vr].status==DIRTY || live.state[vr].val)) |
1584 |
ndirt++; |
1585 |
} |
1586 |
if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) { |
1587 |
/* Everything else is clean, so let's keep this register */ |
1588 |
for (i=0;i<live.nat[rr].nholds;i++) { |
1589 |
int vr=live.nat[rr].holds[i]; |
1590 |
if (vr!=r) { |
1591 |
evict(vr); |
1592 |
i--; /* Try that index again! */ |
1593 |
} |
1594 |
} |
1595 |
Dif (live.nat[rr].nholds!=1) { |
1596 |
write_log("natreg %d holds %d vregs, %d not exclusive\n", |
1597 |
rr,live.nat[rr].nholds,r); |
1598 |
abort(); |
1599 |
} |
1600 |
return; |
1601 |
} |
1602 |
|
1603 |
/* We have to split the register */ |
1604 |
oldstate=live.state[r]; |
1605 |
|
1606 |
setlock(rr); /* Make sure this doesn't go away */ |
1607 |
/* Forget about r being in the register rr */ |
1608 |
disassociate(r); |
1609 |
/* Get a new register, that we will clobber completely */ |
1610 |
if (oldstate.status==DIRTY) { |
1611 |
/* If dirtysize is <4, we need a register that can handle the |
1612 |
eventual smaller memory store! Thanks to Quake68k for exposing |
1613 |
this detail ;-) */ |
1614 |
nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec); |
1615 |
} |
1616 |
else { |
1617 |
nr=alloc_reg_hinted(r,4,1,spec); |
1618 |
} |
1619 |
nind=live.state[r].realind; |
1620 |
live.state[r]=oldstate; /* Keep all the old state info */ |
1621 |
live.state[r].realreg=nr; |
1622 |
live.state[r].realind=nind; |
1623 |
|
1624 |
if (size<live.state[r].validsize) { |
1625 |
if (live.state[r].val) { |
1626 |
/* Might as well compensate for the offset now */ |
1627 |
raw_lea_l_brr(nr,rr,oldstate.val); |
1628 |
live.state[r].val=0; |
1629 |
live.state[r].dirtysize=4; |
1630 |
set_status(r,DIRTY); |
1631 |
} |
1632 |
else |
1633 |
raw_mov_l_rr(nr,rr); /* Make another copy */ |
1634 |
} |
1635 |
unlock2(rr); |
1636 |
} |
1637 |
|
1638 |
static __inline__ void add_offset(int r, uae_u32 off) |
1639 |
{ |
1640 |
live.state[r].val+=off; |
1641 |
} |
1642 |
|
1643 |
static __inline__ void remove_offset(int r, int spec) |
1644 |
{ |
1645 |
reg_status oldstate; |
1646 |
int rr; |
1647 |
|
1648 |
if (isconst(r)) |
1649 |
return; |
1650 |
if (live.state[r].val==0) |
1651 |
return; |
1652 |
if (isinreg(r) && live.state[r].validsize<4) |
1653 |
evict(r); |
1654 |
|
1655 |
if (!isinreg(r)) |
1656 |
alloc_reg_hinted(r,4,0,spec); |
1657 |
|
1658 |
Dif (live.state[r].validsize!=4) { |
1659 |
write_log("Validsize=%d in remove_offset\n",live.state[r].validsize); |
1660 |
abort(); |
1661 |
} |
1662 |
make_exclusive(r,0,-1); |
1663 |
/* make_exclusive might have done the job already */ |
1664 |
if (live.state[r].val==0) |
1665 |
return; |
1666 |
|
1667 |
rr=live.state[r].realreg; |
1668 |
|
1669 |
if (live.nat[rr].nholds==1) { |
1670 |
//write_log("RemovingB offset %x from reg %d (%d) at %p\n", |
1671 |
// live.state[r].val,r,rr,target); |
1672 |
adjust_nreg(rr,live.state[r].val); |
1673 |
live.state[r].dirtysize=4; |
1674 |
live.state[r].val=0; |
1675 |
set_status(r,DIRTY); |
1676 |
return; |
1677 |
} |
1678 |
write_log("Failed in remove_offset\n"); |
1679 |
abort(); |
1680 |
} |
1681 |
|
1682 |
static __inline__ void remove_all_offsets(void) |
1683 |
{ |
1684 |
int i; |
1685 |
|
1686 |
for (i=0;i<VREGS;i++) |
1687 |
remove_offset(i,-1); |
1688 |
} |
1689 |
|
1690 |
static inline void flush_reg_count(void) |
1691 |
{ |
1692 |
#if RECORD_REGISTER_USAGE |
1693 |
for (int r = 0; r < 16; r++) |
1694 |
if (reg_count_local[r]) |
1695 |
ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1); |
1696 |
#endif |
1697 |
} |
1698 |
|
1699 |
static inline void record_register(int r) |
1700 |
{ |
1701 |
#if RECORD_REGISTER_USAGE |
1702 |
if (r < 16) |
1703 |
reg_count_local[r]++; |
1704 |
#endif |
1705 |
} |
1706 |
|
1707 |
static __inline__ int readreg_general(int r, int size, int spec, int can_offset) |
1708 |
{ |
1709 |
int n; |
1710 |
int answer=-1; |
1711 |
|
1712 |
record_register(r); |
1713 |
if (live.state[r].status==UNDEF) { |
1714 |
write_log("WARNING: Unexpected read of undefined register %d\n",r); |
1715 |
} |
1716 |
if (!can_offset) |
1717 |
remove_offset(r,spec); |
1718 |
|
1719 |
if (isinreg(r) && live.state[r].validsize>=size) { |
1720 |
n=live.state[r].realreg; |
1721 |
switch(size) { |
1722 |
case 1: |
1723 |
if (live.nat[n].canbyte || spec>=0) { |
1724 |
answer=n; |
1725 |
} |
1726 |
break; |
1727 |
case 2: |
1728 |
if (live.nat[n].canword || spec>=0) { |
1729 |
answer=n; |
1730 |
} |
1731 |
break; |
1732 |
case 4: |
1733 |
answer=n; |
1734 |
break; |
1735 |
default: abort(); |
1736 |
} |
1737 |
if (answer<0) |
1738 |
evict(r); |
1739 |
} |
1740 |
/* either the value was in memory to start with, or it was evicted and |
1741 |
is in memory now */ |
1742 |
if (answer<0) { |
1743 |
answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); |
1744 |
} |
1745 |
|
1746 |
if (spec>=0 && spec!=answer) { |
1747 |
/* Too bad */ |
1748 |
mov_nregs(spec,answer); |
1749 |
answer=spec; |
1750 |
} |
1751 |
live.nat[answer].locked++; |
1752 |
live.nat[answer].touched=touchcnt++; |
1753 |
return answer; |
1754 |
} |
1755 |
|
1756 |
|
1757 |
|
1758 |
static int readreg(int r, int size) |
1759 |
{ |
1760 |
return readreg_general(r,size,-1,0); |
1761 |
} |
1762 |
|
1763 |
static int readreg_specific(int r, int size, int spec) |
1764 |
{ |
1765 |
return readreg_general(r,size,spec,0); |
1766 |
} |
1767 |
|
1768 |
static int readreg_offset(int r, int size) |
1769 |
{ |
1770 |
return readreg_general(r,size,-1,1); |
1771 |
} |
1772 |
|
1773 |
/* writereg_general(r, size, spec) |
1774 |
* |
1775 |
* INPUT |
1776 |
* - r : mid-layer register |
1777 |
* - size : requested size (1/2/4) |
1778 |
* - spec : -1 if find or make a register free, otherwise specifies |
1779 |
* the physical register to use in any case |
1780 |
* |
1781 |
* OUTPUT |
1782 |
* - hard (physical, x86 here) register allocated to virtual register r |
1783 |
*/ |
1784 |
static __inline__ int writereg_general(int r, int size, int spec) |
1785 |
{ |
1786 |
int n; |
1787 |
int answer=-1; |
1788 |
|
1789 |
record_register(r); |
1790 |
if (size<4) { |
1791 |
remove_offset(r,spec); |
1792 |
} |
1793 |
|
1794 |
make_exclusive(r,size,spec); |
1795 |
if (isinreg(r)) { |
1796 |
int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; |
1797 |
int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; |
1798 |
n=live.state[r].realreg; |
1799 |
|
1800 |
Dif (live.nat[n].nholds!=1) |
1801 |
abort(); |
1802 |
switch(size) { |
1803 |
case 1: |
1804 |
if (live.nat[n].canbyte || spec>=0) { |
1805 |
live.state[r].dirtysize=ndsize; |
1806 |
live.state[r].validsize=nvsize; |
1807 |
answer=n; |
1808 |
} |
1809 |
break; |
1810 |
case 2: |
1811 |
if (live.nat[n].canword || spec>=0) { |
1812 |
live.state[r].dirtysize=ndsize; |
1813 |
live.state[r].validsize=nvsize; |
1814 |
answer=n; |
1815 |
} |
1816 |
break; |
1817 |
case 4: |
1818 |
live.state[r].dirtysize=ndsize; |
1819 |
live.state[r].validsize=nvsize; |
1820 |
answer=n; |
1821 |
break; |
1822 |
default: abort(); |
1823 |
} |
1824 |
if (answer<0) |
1825 |
evict(r); |
1826 |
} |
1827 |
/* either the value was in memory to start with, or it was evicted and |
1828 |
is in memory now */ |
1829 |
if (answer<0) { |
1830 |
answer=alloc_reg_hinted(r,size,1,spec); |
1831 |
} |
1832 |
if (spec>=0 && spec!=answer) { |
1833 |
mov_nregs(spec,answer); |
1834 |
answer=spec; |
1835 |
} |
1836 |
if (live.state[r].status==UNDEF) |
1837 |
live.state[r].validsize=4; |
1838 |
live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; |
1839 |
live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; |
1840 |
|
1841 |
live.nat[answer].locked++; |
1842 |
live.nat[answer].touched=touchcnt++; |
1843 |
if (size==4) { |
1844 |
live.state[r].val=0; |
1845 |
} |
1846 |
else { |
1847 |
Dif (live.state[r].val) { |
1848 |
write_log("Problem with val\n"); |
1849 |
abort(); |
1850 |
} |
1851 |
} |
1852 |
set_status(r,DIRTY); |
1853 |
return answer; |
1854 |
} |
1855 |
|
1856 |
static int writereg(int r, int size) |
1857 |
{ |
1858 |
return writereg_general(r,size,-1); |
1859 |
} |
1860 |
|
1861 |
static int writereg_specific(int r, int size, int spec) |
1862 |
{ |
1863 |
return writereg_general(r,size,spec); |
1864 |
} |
1865 |
|
1866 |
static __inline__ int rmw_general(int r, int wsize, int rsize, int spec) |
1867 |
{ |
1868 |
int n; |
1869 |
int answer=-1; |
1870 |
|
1871 |
record_register(r); |
1872 |
if (live.state[r].status==UNDEF) { |
1873 |
write_log("WARNING: Unexpected read of undefined register %d\n",r); |
1874 |
} |
1875 |
remove_offset(r,spec); |
1876 |
make_exclusive(r,0,spec); |
1877 |
|
1878 |
Dif (wsize<rsize) { |
1879 |
write_log("Cannot handle wsize<rsize in rmw_general()\n"); |
1880 |
abort(); |
1881 |
} |
1882 |
if (isinreg(r) && live.state[r].validsize>=rsize) { |
1883 |
n=live.state[r].realreg; |
1884 |
Dif (live.nat[n].nholds!=1) |
1885 |
abort(); |
1886 |
|
1887 |
switch(rsize) { |
1888 |
case 1: |
1889 |
if (live.nat[n].canbyte || spec>=0) { |
1890 |
answer=n; |
1891 |
} |
1892 |
break; |
1893 |
case 2: |
1894 |
if (live.nat[n].canword || spec>=0) { |
1895 |
answer=n; |
1896 |
} |
1897 |
break; |
1898 |
case 4: |
1899 |
answer=n; |
1900 |
break; |
1901 |
default: abort(); |
1902 |
} |
1903 |
if (answer<0) |
1904 |
evict(r); |
1905 |
} |
1906 |
/* either the value was in memory to start with, or it was evicted and |
1907 |
is in memory now */ |
1908 |
if (answer<0) { |
1909 |
answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); |
1910 |
} |
1911 |
|
1912 |
if (spec>=0 && spec!=answer) { |
1913 |
/* Too bad */ |
1914 |
mov_nregs(spec,answer); |
1915 |
answer=spec; |
1916 |
} |
1917 |
if (wsize>live.state[r].dirtysize) |
1918 |
live.state[r].dirtysize=wsize; |
1919 |
if (wsize>live.state[r].validsize) |
1920 |
live.state[r].validsize=wsize; |
1921 |
set_status(r,DIRTY); |
1922 |
|
1923 |
live.nat[answer].locked++; |
1924 |
live.nat[answer].touched=touchcnt++; |
1925 |
|
1926 |
Dif (live.state[r].val) { |
1927 |
write_log("Problem with val(rmw)\n"); |
1928 |
abort(); |
1929 |
} |
1930 |
return answer; |
1931 |
} |
1932 |
|
1933 |
static int rmw(int r, int wsize, int rsize) |
1934 |
{ |
1935 |
return rmw_general(r,wsize,rsize,-1); |
1936 |
} |
1937 |
|
1938 |
static int rmw_specific(int r, int wsize, int rsize, int spec) |
1939 |
{ |
1940 |
return rmw_general(r,wsize,rsize,spec); |
1941 |
} |
1942 |
|
1943 |
|
1944 |
/* needed for restoring the carry flag on non-P6 cores */ |
1945 |
static void bt_l_ri_noclobber(R4 r, IMM i) |
1946 |
{ |
1947 |
int size=4; |
1948 |
if (i<16) |
1949 |
size=2; |
1950 |
r=readreg(r,size); |
1951 |
raw_bt_l_ri(r,i); |
1952 |
unlock2(r); |
1953 |
} |
1954 |
|
1955 |
/******************************************************************** |
1956 |
* FPU register status handling. EMIT TIME! * |
1957 |
********************************************************************/ |
1958 |
|
1959 |
static void f_tomem(int r) |
1960 |
{ |
1961 |
if (live.fate[r].status==DIRTY) { |
1962 |
#if USE_LONG_DOUBLE |
1963 |
raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); |
1964 |
#else |
1965 |
raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); |
1966 |
#endif |
1967 |
live.fate[r].status=CLEAN; |
1968 |
} |
1969 |
} |
1970 |
|
1971 |
static void f_tomem_drop(int r) |
1972 |
{ |
1973 |
if (live.fate[r].status==DIRTY) { |
1974 |
#if USE_LONG_DOUBLE |
1975 |
raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); |
1976 |
#else |
1977 |
raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); |
1978 |
#endif |
1979 |
live.fate[r].status=INMEM; |
1980 |
} |
1981 |
} |
1982 |
|
1983 |
|
1984 |
static __inline__ int f_isinreg(int r) |
1985 |
{ |
1986 |
return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; |
1987 |
} |
1988 |
|
1989 |
static void f_evict(int r) |
1990 |
{ |
1991 |
int rr; |
1992 |
|
1993 |
if (!f_isinreg(r)) |
1994 |
return; |
1995 |
rr=live.fate[r].realreg; |
1996 |
if (live.fat[rr].nholds==1) |
1997 |
f_tomem_drop(r); |
1998 |
else |
1999 |
f_tomem(r); |
2000 |
|
2001 |
Dif (live.fat[rr].locked && |
2002 |
live.fat[rr].nholds==1) { |
2003 |
write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg); |
2004 |
abort(); |
2005 |
} |
2006 |
|
2007 |
live.fat[rr].nholds--; |
2008 |
if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ |
2009 |
int topreg=live.fat[rr].holds[live.fat[rr].nholds]; |
2010 |
int thisind=live.fate[r].realind; |
2011 |
live.fat[rr].holds[thisind]=topreg; |
2012 |
live.fate[topreg].realind=thisind; |
2013 |
} |
2014 |
live.fate[r].status=INMEM; |
2015 |
live.fate[r].realreg=-1; |
2016 |
} |
2017 |
|
2018 |
static __inline__ void f_free_nreg(int r) |
2019 |
{ |
2020 |
int i=live.fat[r].nholds; |
2021 |
|
2022 |
while (i) { |
2023 |
int vr; |
2024 |
|
2025 |
--i; |
2026 |
vr=live.fat[r].holds[i]; |
2027 |
f_evict(vr); |
2028 |
} |
2029 |
Dif (live.fat[r].nholds!=0) { |
2030 |
write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds); |
2031 |
abort(); |
2032 |
} |
2033 |
} |
2034 |
|
2035 |
|
2036 |
/* Use with care! */ |
2037 |
static __inline__ void f_isclean(int r) |
2038 |
{ |
2039 |
if (!f_isinreg(r)) |
2040 |
return; |
2041 |
live.fate[r].status=CLEAN; |
2042 |
} |
2043 |
|
2044 |
static __inline__ void f_disassociate(int r) |
2045 |
{ |
2046 |
f_isclean(r); |
2047 |
f_evict(r); |
2048 |
} |
2049 |
|
2050 |
|
2051 |
|
2052 |
static int f_alloc_reg(int r, int willclobber) |
2053 |
{ |
2054 |
int bestreg; |
2055 |
uae_s32 when; |
2056 |
int i; |
2057 |
uae_s32 badness; |
2058 |
bestreg=-1; |
2059 |
when=2000000000; |
2060 |
for (i=N_FREGS;i--;) { |
2061 |
badness=live.fat[i].touched; |
2062 |
if (live.fat[i].nholds==0) |
2063 |
badness=0; |
2064 |
|
2065 |
if (!live.fat[i].locked && badness<when) { |
2066 |
bestreg=i; |
2067 |
when=badness; |
2068 |
if (live.fat[i].nholds==0) |
2069 |
break; |
2070 |
} |
2071 |
} |
2072 |
Dif (bestreg==-1) |
2073 |
abort(); |
2074 |
|
2075 |
if (live.fat[bestreg].nholds>0) { |
2076 |
f_free_nreg(bestreg); |
2077 |
} |
2078 |
if (f_isinreg(r)) { |
2079 |
f_evict(r); |
2080 |
} |
2081 |
|
2082 |
if (!willclobber) { |
2083 |
if (live.fate[r].status!=UNDEF) { |
2084 |
#if USE_LONG_DOUBLE |
2085 |
raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); |
2086 |
#else |
2087 |
raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); |
2088 |
#endif |
2089 |
} |
2090 |
live.fate[r].status=CLEAN; |
2091 |
} |
2092 |
else { |
2093 |
live.fate[r].status=DIRTY; |
2094 |
} |
2095 |
live.fate[r].realreg=bestreg; |
2096 |
live.fate[r].realind=live.fat[bestreg].nholds; |
2097 |
live.fat[bestreg].touched=touchcnt++; |
2098 |
live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; |
2099 |
live.fat[bestreg].nholds++; |
2100 |
|
2101 |
return bestreg; |
2102 |
} |
2103 |
|
2104 |
static void f_unlock(int r) |
2105 |
{ |
2106 |
Dif (!live.fat[r].locked) |
2107 |
abort(); |
2108 |
live.fat[r].locked--; |
2109 |
} |
2110 |
|
2111 |
static void f_setlock(int r) |
2112 |
{ |
2113 |
live.fat[r].locked++; |
2114 |
} |
2115 |
|
2116 |
static __inline__ int f_readreg(int r) |
2117 |
{ |
2118 |
int n; |
2119 |
int answer=-1; |
2120 |
|
2121 |
if (f_isinreg(r)) { |
2122 |
n=live.fate[r].realreg; |
2123 |
answer=n; |
2124 |
} |
2125 |
/* either the value was in memory to start with, or it was evicted and |
2126 |
is in memory now */ |
2127 |
if (answer<0) |
2128 |
answer=f_alloc_reg(r,0); |
2129 |
|
2130 |
live.fat[answer].locked++; |
2131 |
live.fat[answer].touched=touchcnt++; |
2132 |
return answer; |
2133 |
} |
2134 |
|
2135 |
static __inline__ void f_make_exclusive(int r, int clobber) |
2136 |
{ |
2137 |
freg_status oldstate; |
2138 |
int rr=live.fate[r].realreg; |
2139 |
int nr; |
2140 |
int nind; |
2141 |
int ndirt=0; |
2142 |
int i; |
2143 |
|
2144 |
if (!f_isinreg(r)) |
2145 |
return; |
2146 |
if (live.fat[rr].nholds==1) |
2147 |
return; |
2148 |
for (i=0;i<live.fat[rr].nholds;i++) { |
2149 |
int vr=live.fat[rr].holds[i]; |
2150 |
if (vr!=r && live.fate[vr].status==DIRTY) |
2151 |
ndirt++; |
2152 |
} |
2153 |
if (!ndirt && !live.fat[rr].locked) { |
2154 |
/* Everything else is clean, so let's keep this register */ |
2155 |
for (i=0;i<live.fat[rr].nholds;i++) { |
2156 |
int vr=live.fat[rr].holds[i]; |
2157 |
if (vr!=r) { |
2158 |
f_evict(vr); |
2159 |
i--; /* Try that index again! */ |
2160 |
} |
2161 |
} |
2162 |
Dif (live.fat[rr].nholds!=1) { |
2163 |
write_log("realreg %d holds %d (",rr,live.fat[rr].nholds); |
2164 |
for (i=0;i<live.fat[rr].nholds;i++) { |
2165 |
write_log(" %d(%d,%d)",live.fat[rr].holds[i], |
2166 |
live.fate[live.fat[rr].holds[i]].realreg, |
2167 |
live.fate[live.fat[rr].holds[i]].realind); |
2168 |
} |
2169 |
write_log("\n"); |
2170 |
abort(); |
2171 |
} |
2172 |
return; |
2173 |
} |
2174 |
|
2175 |
/* We have to split the register */ |
2176 |
oldstate=live.fate[r]; |
2177 |
|
2178 |
f_setlock(rr); /* Make sure this doesn't go away */ |
2179 |
/* Forget about r being in the register rr */ |
2180 |
f_disassociate(r); |
2181 |
/* Get a new register, that we will clobber completely */ |
2182 |
nr=f_alloc_reg(r,1); |
2183 |
nind=live.fate[r].realind; |
2184 |
if (!clobber) |
2185 |
raw_fmov_rr(nr,rr); /* Make another copy */ |
2186 |
live.fate[r]=oldstate; /* Keep all the old state info */ |
2187 |
live.fate[r].realreg=nr; |
2188 |
live.fate[r].realind=nind; |
2189 |
f_unlock(rr); |
2190 |
} |
2191 |
|
2192 |
|
2193 |
static __inline__ int f_writereg(int r) |
2194 |
{ |
2195 |
int n; |
2196 |
int answer=-1; |
2197 |
|
2198 |
f_make_exclusive(r,1); |
2199 |
if (f_isinreg(r)) { |
2200 |
n=live.fate[r].realreg; |
2201 |
answer=n; |
2202 |
} |
2203 |
if (answer<0) { |
2204 |
answer=f_alloc_reg(r,1); |
2205 |
} |
2206 |
live.fate[r].status=DIRTY; |
2207 |
live.fat[answer].locked++; |
2208 |
live.fat[answer].touched=touchcnt++; |
2209 |
return answer; |
2210 |
} |
2211 |
|
2212 |
static int f_rmw(int r) |
2213 |
{ |
2214 |
int n; |
2215 |
|
2216 |
f_make_exclusive(r,0); |
2217 |
if (f_isinreg(r)) { |
2218 |
n=live.fate[r].realreg; |
2219 |
} |
2220 |
else |
2221 |
n=f_alloc_reg(r,0); |
2222 |
live.fate[r].status=DIRTY; |
2223 |
live.fat[n].locked++; |
2224 |
live.fat[n].touched=touchcnt++; |
2225 |
return n; |
2226 |
} |
2227 |
|
2228 |
static void fflags_into_flags_internal(uae_u32 tmp) |
2229 |
{ |
2230 |
int r; |
2231 |
|
2232 |
clobber_flags(); |
2233 |
r=f_readreg(FP_RESULT); |
2234 |
if (FFLAG_NREG_CLOBBER_CONDITION) { |
2235 |
int tmp2=tmp; |
2236 |
tmp=writereg_specific(tmp,4,FFLAG_NREG); |
2237 |
raw_fflags_into_flags(r); |
2238 |
unlock2(tmp); |
2239 |
forget_about(tmp2); |
2240 |
} |
2241 |
else |
2242 |
raw_fflags_into_flags(r); |
2243 |
f_unlock(r); |
2244 |
live_flags(); |
2245 |
} |
2246 |
|
2247 |
|
2248 |
|
2249 |
|
2250 |
/******************************************************************** |
2251 |
* CPU functions exposed to gencomp. Both CREATE and EMIT time * |
2252 |
********************************************************************/ |
2253 |
|
2254 |
/* |
2255 |
* RULES FOR HANDLING REGISTERS: |
2256 |
* |
2257 |
* * In the function headers, order the parameters |
2258 |
* - 1st registers written to |
2259 |
* - 2nd read/modify/write registers |
2260 |
* - 3rd registers read from |
2261 |
* * Before calling raw_*, you must call readreg, writereg or rmw for |
2262 |
* each register |
2263 |
* * The order for this is |
2264 |
* - 1st call remove_offset for all registers written to with size<4 |
2265 |
* - 2nd call readreg for all registers read without offset |
2266 |
* - 3rd call rmw for all rmw registers |
2267 |
* - 4th call readreg_offset for all registers that can handle offsets |
2268 |
* - 5th call get_offset for all the registers from the previous step |
2269 |
* - 6th call writereg for all written-to registers |
2270 |
* - 7th call raw_* |
2271 |
* - 8th unlock2 all registers that were locked |
2272 |
*/ |
2273 |
|
2274 |
MIDFUNC(0,live_flags,(void)) |
2275 |
{ |
2276 |
live.flags_on_stack=TRASH; |
2277 |
live.flags_in_flags=VALID; |
2278 |
live.flags_are_important=1; |
2279 |
} |
2280 |
MENDFUNC(0,live_flags,(void)) |
2281 |
|
2282 |
MIDFUNC(0,dont_care_flags,(void)) |
2283 |
{ |
2284 |
live.flags_are_important=0; |
2285 |
} |
2286 |
MENDFUNC(0,dont_care_flags,(void)) |
2287 |
|
2288 |
|
2289 |
MIDFUNC(0,duplicate_carry,(void)) |
2290 |
{ |
2291 |
evict(FLAGX); |
2292 |
make_flags_live_internal(); |
2293 |
COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2); |
2294 |
log_vwrite(FLAGX); |
2295 |
} |
2296 |
MENDFUNC(0,duplicate_carry,(void)) |
2297 |
|
2298 |
MIDFUNC(0,restore_carry,(void)) |
2299 |
{ |
2300 |
if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */ |
2301 |
bt_l_ri_noclobber(FLAGX,0); |
2302 |
} |
2303 |
else { /* Avoid the stall the above creates. |
2304 |
This is slow on non-P6, though. |
2305 |
*/ |
2306 |
COMPCALL(rol_b_ri(FLAGX,8)); |
2307 |
isclean(FLAGX); |
2308 |
} |
2309 |
} |
2310 |
MENDFUNC(0,restore_carry,(void)) |
2311 |
|
2312 |
MIDFUNC(0,start_needflags,(void)) |
2313 |
{ |
2314 |
needflags=1; |
2315 |
} |
2316 |
MENDFUNC(0,start_needflags,(void)) |
2317 |
|
2318 |
MIDFUNC(0,end_needflags,(void)) |
2319 |
{ |
2320 |
needflags=0; |
2321 |
} |
2322 |
MENDFUNC(0,end_needflags,(void)) |
2323 |
|
2324 |
MIDFUNC(0,make_flags_live,(void)) |
2325 |
{ |
2326 |
make_flags_live_internal(); |
2327 |
} |
2328 |
MENDFUNC(0,make_flags_live,(void)) |
2329 |
|
2330 |
MIDFUNC(1,fflags_into_flags,(W2 tmp)) |
2331 |
{ |
2332 |
clobber_flags(); |
2333 |
fflags_into_flags_internal(tmp); |
2334 |
} |
2335 |
MENDFUNC(1,fflags_into_flags,(W2 tmp)) |
2336 |
|
2337 |
|
2338 |
MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */ |
2339 |
{ |
2340 |
int size=4; |
2341 |
if (i<16) |
2342 |
size=2; |
2343 |
CLOBBER_BT; |
2344 |
r=readreg(r,size); |
2345 |
raw_bt_l_ri(r,i); |
2346 |
unlock2(r); |
2347 |
} |
2348 |
MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */ |
2349 |
|
2350 |
MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */ |
2351 |
{ |
2352 |
CLOBBER_BT; |
2353 |
r=readreg(r,4); |
2354 |
b=readreg(b,4); |
2355 |
raw_bt_l_rr(r,b); |
2356 |
unlock2(r); |
2357 |
unlock2(b); |
2358 |
} |
2359 |
MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */ |
2360 |
|
2361 |
MIDFUNC(2,btc_l_ri,(RW4 r, IMM i)) |
2362 |
{ |
2363 |
int size=4; |
2364 |
if (i<16) |
2365 |
size=2; |
2366 |
CLOBBER_BT; |
2367 |
r=rmw(r,size,size); |
2368 |
raw_btc_l_ri(r,i); |
2369 |
unlock2(r); |
2370 |
} |
2371 |
MENDFUNC(2,btc_l_ri,(RW4 r, IMM i)) |
2372 |
|
2373 |
MIDFUNC(2,btc_l_rr,(RW4 r, R4 b)) |
2374 |
{ |
2375 |
CLOBBER_BT; |
2376 |
b=readreg(b,4); |
2377 |
r=rmw(r,4,4); |
2378 |
raw_btc_l_rr(r,b); |
2379 |
unlock2(r); |
2380 |
unlock2(b); |
2381 |
} |
2382 |
MENDFUNC(2,btc_l_rr,(RW4 r, R4 b)) |
2383 |
|
2384 |
|
2385 |
MIDFUNC(2,btr_l_ri,(RW4 r, IMM i)) |
2386 |
{ |
2387 |
int size=4; |
2388 |
if (i<16) |
2389 |
size=2; |
2390 |
CLOBBER_BT; |
2391 |
r=rmw(r,size,size); |
2392 |
raw_btr_l_ri(r,i); |
2393 |
unlock2(r); |
2394 |
} |
2395 |
MENDFUNC(2,btr_l_ri,(RW4 r, IMM i)) |
2396 |
|
2397 |
MIDFUNC(2,btr_l_rr,(RW4 r, R4 b)) |
2398 |
{ |
2399 |
CLOBBER_BT; |
2400 |
b=readreg(b,4); |
2401 |
r=rmw(r,4,4); |
2402 |
raw_btr_l_rr(r,b); |
2403 |
unlock2(r); |
2404 |
unlock2(b); |
2405 |
} |
2406 |
MENDFUNC(2,btr_l_rr,(RW4 r, R4 b)) |
2407 |
|
2408 |
|
2409 |
MIDFUNC(2,bts_l_ri,(RW4 r, IMM i)) |
2410 |
{ |
2411 |
int size=4; |
2412 |
if (i<16) |
2413 |
size=2; |
2414 |
CLOBBER_BT; |
2415 |
r=rmw(r,size,size); |
2416 |
raw_bts_l_ri(r,i); |
2417 |
unlock2(r); |
2418 |
} |
2419 |
MENDFUNC(2,bts_l_ri,(RW4 r, IMM i)) |
2420 |
|
2421 |
MIDFUNC(2,bts_l_rr,(RW4 r, R4 b)) |
2422 |
{ |
2423 |
CLOBBER_BT; |
2424 |
b=readreg(b,4); |
2425 |
r=rmw(r,4,4); |
2426 |
raw_bts_l_rr(r,b); |
2427 |
unlock2(r); |
2428 |
unlock2(b); |
2429 |
} |
2430 |
MENDFUNC(2,bts_l_rr,(RW4 r, R4 b)) |
2431 |
|
2432 |
MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) |
2433 |
{ |
2434 |
CLOBBER_MOV; |
2435 |
d=writereg(d,4); |
2436 |
raw_mov_l_rm(d,s); |
2437 |
unlock2(d); |
2438 |
} |
2439 |
MENDFUNC(2,mov_l_rm,(W4 d, IMM s)) |
2440 |
|
2441 |
|
2442 |
MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */ |
2443 |
{ |
2444 |
r=readreg(r,4); |
2445 |
raw_call_r(r); |
2446 |
unlock2(r); |
2447 |
} |
2448 |
MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */ |
2449 |
|
2450 |
MIDFUNC(2,sub_l_mi,(IMM d, IMM s)) |
2451 |
{ |
2452 |
CLOBBER_SUB; |
2453 |
raw_sub_l_mi(d,s) ; |
2454 |
} |
2455 |
MENDFUNC(2,sub_l_mi,(IMM d, IMM s)) |
2456 |
|
2457 |
MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) |
2458 |
{ |
2459 |
CLOBBER_MOV; |
2460 |
raw_mov_l_mi(d,s) ; |
2461 |
} |
2462 |
MENDFUNC(2,mov_l_mi,(IMM d, IMM s)) |
2463 |
|
2464 |
MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) |
2465 |
{ |
2466 |
CLOBBER_MOV; |
2467 |
raw_mov_w_mi(d,s) ; |
2468 |
} |
2469 |
MENDFUNC(2,mov_w_mi,(IMM d, IMM s)) |
2470 |
|
2471 |
MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) |
2472 |
{ |
2473 |
CLOBBER_MOV; |
2474 |
raw_mov_b_mi(d,s) ; |
2475 |
} |
2476 |
MENDFUNC(2,mov_b_mi,(IMM d, IMM s)) |
2477 |
|
2478 |
MIDFUNC(2,rol_b_ri,(RW1 r, IMM i)) |
2479 |
{ |
2480 |
if (!i && !needflags) |
2481 |
return; |
2482 |
CLOBBER_ROL; |
2483 |
r=rmw(r,1,1); |
2484 |
raw_rol_b_ri(r,i); |
2485 |
unlock2(r); |
2486 |
} |
2487 |
MENDFUNC(2,rol_b_ri,(RW1 r, IMM i)) |
2488 |
|
2489 |
MIDFUNC(2,rol_w_ri,(RW2 r, IMM i)) |
2490 |
{ |
2491 |
if (!i && !needflags) |
2492 |
return; |
2493 |
CLOBBER_ROL; |
2494 |
r=rmw(r,2,2); |
2495 |
raw_rol_w_ri(r,i); |
2496 |
unlock2(r); |
2497 |
} |
2498 |
MENDFUNC(2,rol_w_ri,(RW2 r, IMM i)) |
2499 |
|
2500 |
MIDFUNC(2,rol_l_ri,(RW4 r, IMM i)) |
2501 |
{ |
2502 |
if (!i && !needflags) |
2503 |
return; |
2504 |
CLOBBER_ROL; |
2505 |
r=rmw(r,4,4); |
2506 |
raw_rol_l_ri(r,i); |
2507 |
unlock2(r); |
2508 |
} |
2509 |
MENDFUNC(2,rol_l_ri,(RW4 r, IMM i)) |
2510 |
|
2511 |
MIDFUNC(2,rol_l_rr,(RW4 d, R1 r)) |
2512 |
{ |
2513 |
if (isconst(r)) { |
2514 |
COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val); |
2515 |
return; |
2516 |
} |
2517 |
CLOBBER_ROL; |
2518 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2519 |
d=rmw(d,4,4); |
2520 |
Dif (r!=1) { |
2521 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2522 |
abort(); |
2523 |
} |
2524 |
raw_rol_l_rr(d,r) ; |
2525 |
unlock2(r); |
2526 |
unlock2(d); |
2527 |
} |
2528 |
MENDFUNC(2,rol_l_rr,(RW4 d, R1 r)) |
2529 |
|
2530 |
MIDFUNC(2,rol_w_rr,(RW2 d, R1 r)) |
2531 |
{ /* Can only do this with r==1, i.e. cl */ |
2532 |
|
2533 |
if (isconst(r)) { |
2534 |
COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val); |
2535 |
return; |
2536 |
} |
2537 |
CLOBBER_ROL; |
2538 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2539 |
d=rmw(d,2,2); |
2540 |
Dif (r!=1) { |
2541 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2542 |
abort(); |
2543 |
} |
2544 |
raw_rol_w_rr(d,r) ; |
2545 |
unlock2(r); |
2546 |
unlock2(d); |
2547 |
} |
2548 |
MENDFUNC(2,rol_w_rr,(RW2 d, R1 r)) |
2549 |
|
2550 |
MIDFUNC(2,rol_b_rr,(RW1 d, R1 r)) |
2551 |
{ /* Can only do this with r==1, i.e. cl */ |
2552 |
|
2553 |
if (isconst(r)) { |
2554 |
COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val); |
2555 |
return; |
2556 |
} |
2557 |
|
2558 |
CLOBBER_ROL; |
2559 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2560 |
d=rmw(d,1,1); |
2561 |
Dif (r!=1) { |
2562 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2563 |
abort(); |
2564 |
} |
2565 |
raw_rol_b_rr(d,r) ; |
2566 |
unlock2(r); |
2567 |
unlock2(d); |
2568 |
} |
2569 |
MENDFUNC(2,rol_b_rr,(RW1 d, R1 r)) |
2570 |
|
2571 |
|
2572 |
MIDFUNC(2,shll_l_rr,(RW4 d, R1 r)) |
2573 |
{ |
2574 |
if (isconst(r)) { |
2575 |
COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val); |
2576 |
return; |
2577 |
} |
2578 |
CLOBBER_SHLL; |
2579 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2580 |
d=rmw(d,4,4); |
2581 |
Dif (r!=1) { |
2582 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2583 |
abort(); |
2584 |
} |
2585 |
raw_shll_l_rr(d,r) ; |
2586 |
unlock2(r); |
2587 |
unlock2(d); |
2588 |
} |
2589 |
MENDFUNC(2,shll_l_rr,(RW4 d, R1 r)) |
2590 |
|
2591 |
MIDFUNC(2,shll_w_rr,(RW2 d, R1 r)) |
2592 |
{ /* Can only do this with r==1, i.e. cl */ |
2593 |
|
2594 |
if (isconst(r)) { |
2595 |
COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val); |
2596 |
return; |
2597 |
} |
2598 |
CLOBBER_SHLL; |
2599 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2600 |
d=rmw(d,2,2); |
2601 |
Dif (r!=1) { |
2602 |
write_log("Illegal register %d in raw_shll_b\n",r); |
2603 |
abort(); |
2604 |
} |
2605 |
raw_shll_w_rr(d,r) ; |
2606 |
unlock2(r); |
2607 |
unlock2(d); |
2608 |
} |
2609 |
MENDFUNC(2,shll_w_rr,(RW2 d, R1 r)) |
2610 |
|
2611 |
MIDFUNC(2,shll_b_rr,(RW1 d, R1 r)) |
2612 |
{ /* Can only do this with r==1, i.e. cl */ |
2613 |
|
2614 |
if (isconst(r)) { |
2615 |
COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val); |
2616 |
return; |
2617 |
} |
2618 |
|
2619 |
CLOBBER_SHLL; |
2620 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2621 |
d=rmw(d,1,1); |
2622 |
Dif (r!=1) { |
2623 |
write_log("Illegal register %d in raw_shll_b\n",r); |
2624 |
abort(); |
2625 |
} |
2626 |
raw_shll_b_rr(d,r) ; |
2627 |
unlock2(r); |
2628 |
unlock2(d); |
2629 |
} |
2630 |
MENDFUNC(2,shll_b_rr,(RW1 d, R1 r)) |
2631 |
|
2632 |
|
2633 |
MIDFUNC(2,ror_b_ri,(R1 r, IMM i)) |
2634 |
{ |
2635 |
if (!i && !needflags) |
2636 |
return; |
2637 |
CLOBBER_ROR; |
2638 |
r=rmw(r,1,1); |
2639 |
raw_ror_b_ri(r,i); |
2640 |
unlock2(r); |
2641 |
} |
2642 |
MENDFUNC(2,ror_b_ri,(R1 r, IMM i)) |
2643 |
|
2644 |
MIDFUNC(2,ror_w_ri,(R2 r, IMM i)) |
2645 |
{ |
2646 |
if (!i && !needflags) |
2647 |
return; |
2648 |
CLOBBER_ROR; |
2649 |
r=rmw(r,2,2); |
2650 |
raw_ror_w_ri(r,i); |
2651 |
unlock2(r); |
2652 |
} |
2653 |
MENDFUNC(2,ror_w_ri,(R2 r, IMM i)) |
2654 |
|
2655 |
MIDFUNC(2,ror_l_ri,(R4 r, IMM i)) |
2656 |
{ |
2657 |
if (!i && !needflags) |
2658 |
return; |
2659 |
CLOBBER_ROR; |
2660 |
r=rmw(r,4,4); |
2661 |
raw_ror_l_ri(r,i); |
2662 |
unlock2(r); |
2663 |
} |
2664 |
MENDFUNC(2,ror_l_ri,(R4 r, IMM i)) |
2665 |
|
2666 |
MIDFUNC(2,ror_l_rr,(R4 d, R1 r)) |
2667 |
{ |
2668 |
if (isconst(r)) { |
2669 |
COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val); |
2670 |
return; |
2671 |
} |
2672 |
CLOBBER_ROR; |
2673 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2674 |
d=rmw(d,4,4); |
2675 |
raw_ror_l_rr(d,r) ; |
2676 |
unlock2(r); |
2677 |
unlock2(d); |
2678 |
} |
2679 |
MENDFUNC(2,ror_l_rr,(R4 d, R1 r)) |
2680 |
|
2681 |
MIDFUNC(2,ror_w_rr,(R2 d, R1 r)) |
2682 |
{ |
2683 |
if (isconst(r)) { |
2684 |
COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val); |
2685 |
return; |
2686 |
} |
2687 |
CLOBBER_ROR; |
2688 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2689 |
d=rmw(d,2,2); |
2690 |
raw_ror_w_rr(d,r) ; |
2691 |
unlock2(r); |
2692 |
unlock2(d); |
2693 |
} |
2694 |
MENDFUNC(2,ror_w_rr,(R2 d, R1 r)) |
2695 |
|
2696 |
MIDFUNC(2,ror_b_rr,(R1 d, R1 r)) |
2697 |
{ |
2698 |
if (isconst(r)) { |
2699 |
COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val); |
2700 |
return; |
2701 |
} |
2702 |
|
2703 |
CLOBBER_ROR; |
2704 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2705 |
d=rmw(d,1,1); |
2706 |
raw_ror_b_rr(d,r) ; |
2707 |
unlock2(r); |
2708 |
unlock2(d); |
2709 |
} |
2710 |
MENDFUNC(2,ror_b_rr,(R1 d, R1 r)) |
2711 |
|
2712 |
MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r)) |
2713 |
{ |
2714 |
if (isconst(r)) { |
2715 |
COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val); |
2716 |
return; |
2717 |
} |
2718 |
CLOBBER_SHRL; |
2719 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2720 |
d=rmw(d,4,4); |
2721 |
Dif (r!=1) { |
2722 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2723 |
abort(); |
2724 |
} |
2725 |
raw_shrl_l_rr(d,r) ; |
2726 |
unlock2(r); |
2727 |
unlock2(d); |
2728 |
} |
2729 |
MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r)) |
2730 |
|
2731 |
MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r)) |
2732 |
{ /* Can only do this with r==1, i.e. cl */ |
2733 |
|
2734 |
if (isconst(r)) { |
2735 |
COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val); |
2736 |
return; |
2737 |
} |
2738 |
CLOBBER_SHRL; |
2739 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2740 |
d=rmw(d,2,2); |
2741 |
Dif (r!=1) { |
2742 |
write_log("Illegal register %d in raw_shrl_b\n",r); |
2743 |
abort(); |
2744 |
} |
2745 |
raw_shrl_w_rr(d,r) ; |
2746 |
unlock2(r); |
2747 |
unlock2(d); |
2748 |
} |
2749 |
MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r)) |
2750 |
|
2751 |
MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r)) |
2752 |
{ /* Can only do this with r==1, i.e. cl */ |
2753 |
|
2754 |
if (isconst(r)) { |
2755 |
COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val); |
2756 |
return; |
2757 |
} |
2758 |
|
2759 |
CLOBBER_SHRL; |
2760 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2761 |
d=rmw(d,1,1); |
2762 |
Dif (r!=1) { |
2763 |
write_log("Illegal register %d in raw_shrl_b\n",r); |
2764 |
abort(); |
2765 |
} |
2766 |
raw_shrl_b_rr(d,r) ; |
2767 |
unlock2(r); |
2768 |
unlock2(d); |
2769 |
} |
2770 |
MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r)) |
2771 |
|
2772 |
|
2773 |
|
2774 |
MIDFUNC(2,shll_l_ri,(RW4 r, IMM i)) |
2775 |
{ |
2776 |
if (!i && !needflags) |
2777 |
return; |
2778 |
if (isconst(r) && !needflags) { |
2779 |
live.state[r].val<<=i; |
2780 |
return; |
2781 |
} |
2782 |
CLOBBER_SHLL; |
2783 |
r=rmw(r,4,4); |
2784 |
raw_shll_l_ri(r,i); |
2785 |
unlock2(r); |
2786 |
} |
2787 |
MENDFUNC(2,shll_l_ri,(RW4 r, IMM i)) |
2788 |
|
2789 |
MIDFUNC(2,shll_w_ri,(RW2 r, IMM i)) |
2790 |
{ |
2791 |
if (!i && !needflags) |
2792 |
return; |
2793 |
CLOBBER_SHLL; |
2794 |
r=rmw(r,2,2); |
2795 |
raw_shll_w_ri(r,i); |
2796 |
unlock2(r); |
2797 |
} |
2798 |
MENDFUNC(2,shll_w_ri,(RW2 r, IMM i)) |
2799 |
|
2800 |
MIDFUNC(2,shll_b_ri,(RW1 r, IMM i)) |
2801 |
{ |
2802 |
if (!i && !needflags) |
2803 |
return; |
2804 |
CLOBBER_SHLL; |
2805 |
r=rmw(r,1,1); |
2806 |
raw_shll_b_ri(r,i); |
2807 |
unlock2(r); |
2808 |
} |
2809 |
MENDFUNC(2,shll_b_ri,(RW1 r, IMM i)) |
2810 |
|
2811 |
MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) |
2812 |
{ |
2813 |
if (!i && !needflags) |
2814 |
return; |
2815 |
if (isconst(r) && !needflags) { |
2816 |
live.state[r].val>>=i; |
2817 |
return; |
2818 |
} |
2819 |
CLOBBER_SHRL; |
2820 |
r=rmw(r,4,4); |
2821 |
raw_shrl_l_ri(r,i); |
2822 |
unlock2(r); |
2823 |
} |
2824 |
MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) |
2825 |
|
2826 |
MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) |
2827 |
{ |
2828 |
if (!i && !needflags) |
2829 |
return; |
2830 |
CLOBBER_SHRL; |
2831 |
r=rmw(r,2,2); |
2832 |
raw_shrl_w_ri(r,i); |
2833 |
unlock2(r); |
2834 |
} |
2835 |
MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) |
2836 |
|
2837 |
MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) |
2838 |
{ |
2839 |
if (!i && !needflags) |
2840 |
return; |
2841 |
CLOBBER_SHRL; |
2842 |
r=rmw(r,1,1); |
2843 |
raw_shrl_b_ri(r,i); |
2844 |
unlock2(r); |
2845 |
} |
2846 |
MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) |
2847 |
|
2848 |
MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) |
2849 |
{ |
2850 |
if (!i && !needflags) |
2851 |
return; |
2852 |
CLOBBER_SHRA; |
2853 |
r=rmw(r,4,4); |
2854 |
raw_shra_l_ri(r,i); |
2855 |
unlock2(r); |
2856 |
} |
2857 |
MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) |
2858 |
|
2859 |
MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) |
2860 |
{ |
2861 |
if (!i && !needflags) |
2862 |
return; |
2863 |
CLOBBER_SHRA; |
2864 |
r=rmw(r,2,2); |
2865 |
raw_shra_w_ri(r,i); |
2866 |
unlock2(r); |
2867 |
} |
2868 |
MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) |
2869 |
|
2870 |
MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) |
2871 |
{ |
2872 |
if (!i && !needflags) |
2873 |
return; |
2874 |
CLOBBER_SHRA; |
2875 |
r=rmw(r,1,1); |
2876 |
raw_shra_b_ri(r,i); |
2877 |
unlock2(r); |
2878 |
} |
2879 |
MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) |
2880 |
|
2881 |
MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) |
2882 |
{ |
2883 |
if (isconst(r)) { |
2884 |
COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); |
2885 |
return; |
2886 |
} |
2887 |
CLOBBER_SHRA; |
2888 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2889 |
d=rmw(d,4,4); |
2890 |
Dif (r!=1) { |
2891 |
write_log("Illegal register %d in raw_rol_b\n",r); |
2892 |
abort(); |
2893 |
} |
2894 |
raw_shra_l_rr(d,r) ; |
2895 |
unlock2(r); |
2896 |
unlock2(d); |
2897 |
} |
2898 |
MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) |
2899 |
|
2900 |
MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) |
2901 |
{ /* Can only do this with r==1, i.e. cl */ |
2902 |
|
2903 |
if (isconst(r)) { |
2904 |
COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); |
2905 |
return; |
2906 |
} |
2907 |
CLOBBER_SHRA; |
2908 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2909 |
d=rmw(d,2,2); |
2910 |
Dif (r!=1) { |
2911 |
write_log("Illegal register %d in raw_shra_b\n",r); |
2912 |
abort(); |
2913 |
} |
2914 |
raw_shra_w_rr(d,r) ; |
2915 |
unlock2(r); |
2916 |
unlock2(d); |
2917 |
} |
2918 |
MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) |
2919 |
|
2920 |
MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) |
2921 |
{ /* Can only do this with r==1, i.e. cl */ |
2922 |
|
2923 |
if (isconst(r)) { |
2924 |
COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); |
2925 |
return; |
2926 |
} |
2927 |
|
2928 |
CLOBBER_SHRA; |
2929 |
r=readreg_specific(r,1,SHIFTCOUNT_NREG); |
2930 |
d=rmw(d,1,1); |
2931 |
Dif (r!=1) { |
2932 |
write_log("Illegal register %d in raw_shra_b\n",r); |
2933 |
abort(); |
2934 |
} |
2935 |
raw_shra_b_rr(d,r) ; |
2936 |
unlock2(r); |
2937 |
unlock2(d); |
2938 |
} |
2939 |
MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) |
2940 |
|
2941 |
|
2942 |
MIDFUNC(2,setcc,(W1 d, IMM cc)) |
2943 |
{ |
2944 |
CLOBBER_SETCC; |
2945 |
d=writereg(d,1); |
2946 |
raw_setcc(d,cc); |
2947 |
unlock2(d); |
2948 |
} |
2949 |
MENDFUNC(2,setcc,(W1 d, IMM cc)) |
2950 |
|
2951 |
MIDFUNC(2,setcc_m,(IMM d, IMM cc)) |
2952 |
{ |
2953 |
CLOBBER_SETCC; |
2954 |
raw_setcc_m(d,cc); |
2955 |
} |
2956 |
MENDFUNC(2,setcc_m,(IMM d, IMM cc)) |
2957 |
|
2958 |
MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) |
2959 |
{ |
2960 |
if (d==s) |
2961 |
return; |
2962 |
CLOBBER_CMOV; |
2963 |
s=readreg(s,1); |
2964 |
d=rmw(d,1,1); |
2965 |
raw_cmov_b_rr(d,s,cc); |
2966 |
unlock2(s); |
2967 |
unlock2(d); |
2968 |
} |
2969 |
MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) |
2970 |
|
2971 |
MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) |
2972 |
{ |
2973 |
if (d==s) |
2974 |
return; |
2975 |
CLOBBER_CMOV; |
2976 |
s=readreg(s,2); |
2977 |
d=rmw(d,2,2); |
2978 |
raw_cmov_w_rr(d,s,cc); |
2979 |
unlock2(s); |
2980 |
unlock2(d); |
2981 |
} |
2982 |
MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) |
2983 |
|
2984 |
MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) |
2985 |
{ |
2986 |
if (d==s) |
2987 |
return; |
2988 |
CLOBBER_CMOV; |
2989 |
s=readreg(s,4); |
2990 |
d=rmw(d,4,4); |
2991 |
raw_cmov_l_rr(d,s,cc); |
2992 |
unlock2(s); |
2993 |
unlock2(d); |
2994 |
} |
2995 |
MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) |
2996 |
|
2997 |
MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) |
2998 |
{ |
2999 |
CLOBBER_CMOV; |
3000 |
d=rmw(d,4,4); |
3001 |
raw_cmov_l_rm(d,s,cc); |
3002 |
unlock2(d); |
3003 |
} |
3004 |
MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) |
3005 |
|
3006 |
MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) |
3007 |
{ |
3008 |
CLOBBER_BSF; |
3009 |
s = readreg(s, 4); |
3010 |
d = writereg(d, 4); |
3011 |
raw_bsf_l_rr(d, s); |
3012 |
unlock2(s); |
3013 |
unlock2(d); |
3014 |
} |
3015 |
MENDFUNC(2,bsf_l_rr,(W4 d, W4 s)) |
3016 |
|
3017 |
/* Set the Z flag depending on the value in s. Note that the |
3018 |
value has to be 0 or -1 (or, more precisely, for non-zero |
3019 |
values, bit 14 must be set)! */ |
3020 |
MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) |
3021 |
{ |
3022 |
CLOBBER_BSF; |
3023 |
s=rmw_specific(s,4,4,FLAG_NREG3); |
3024 |
tmp=writereg(tmp,4); |
3025 |
raw_flags_set_zero(s, tmp); |
3026 |
unlock2(tmp); |
3027 |
unlock2(s); |
3028 |
} |
3029 |
MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) |
3030 |
|
3031 |
MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) |
3032 |
{ |
3033 |
CLOBBER_MUL; |
3034 |
s=readreg(s,4); |
3035 |
d=rmw(d,4,4); |
3036 |
raw_imul_32_32(d,s); |
3037 |
unlock2(s); |
3038 |
unlock2(d); |
3039 |
} |
3040 |
MENDFUNC(2,imul_32_32,(RW4 d, R4 s)) |
3041 |
|
3042 |
MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) |
3043 |
{ |
3044 |
CLOBBER_MUL; |
3045 |
s=rmw_specific(s,4,4,MUL_NREG2); |
3046 |
d=rmw_specific(d,4,4,MUL_NREG1); |
3047 |
raw_imul_64_32(d,s); |
3048 |
unlock2(s); |
3049 |
unlock2(d); |
3050 |
} |
3051 |
MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) |
3052 |
|
3053 |
MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) |
3054 |
{ |
3055 |
CLOBBER_MUL; |
3056 |
s=rmw_specific(s,4,4,MUL_NREG2); |
3057 |
d=rmw_specific(d,4,4,MUL_NREG1); |
3058 |
raw_mul_64_32(d,s); |
3059 |
unlock2(s); |
3060 |
unlock2(d); |
3061 |
} |
3062 |
MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) |
3063 |
|
3064 |
MIDFUNC(2,mul_32_32,(RW4 d, R4 s)) |
3065 |
{ |
3066 |
CLOBBER_MUL; |
3067 |
s=readreg(s,4); |
3068 |
d=rmw(d,4,4); |
3069 |
raw_mul_32_32(d,s); |
3070 |
unlock2(s); |
3071 |
unlock2(d); |
3072 |
} |
3073 |
MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) |
3074 |
|
3075 |
#if SIZEOF_VOID_P == 8 |
3076 |
MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) |
3077 |
{ |
3078 |
int isrmw; |
3079 |
|
3080 |
if (isconst(s)) { |
3081 |
set_const(d,(uae_s32)live.state[s].val); |
3082 |
return; |
3083 |
} |
3084 |
|
3085 |
CLOBBER_SE32; |
3086 |
isrmw=(s==d); |
3087 |
if (!isrmw) { |
3088 |
s=readreg(s,4); |
3089 |
d=writereg(d,4); |
3090 |
} |
3091 |
else { /* If we try to lock this twice, with different sizes, we |
3092 |
are int trouble! */ |
3093 |
s=d=rmw(s,4,4); |
3094 |
} |
3095 |
raw_sign_extend_32_rr(d,s); |
3096 |
if (!isrmw) { |
3097 |
unlock2(d); |
3098 |
unlock2(s); |
3099 |
} |
3100 |
else { |
3101 |
unlock2(s); |
3102 |
} |
3103 |
} |
3104 |
MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) |
3105 |
#endif |
3106 |
|
3107 |
MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) |
3108 |
{ |
3109 |
int isrmw; |
3110 |
|
3111 |
if (isconst(s)) { |
3112 |
set_const(d,(uae_s32)(uae_s16)live.state[s].val); |
3113 |
return; |
3114 |
} |
3115 |
|
3116 |
CLOBBER_SE16; |
3117 |
isrmw=(s==d); |
3118 |
if (!isrmw) { |
3119 |
s=readreg(s,2); |
3120 |
d=writereg(d,4); |
3121 |
} |
3122 |
else { /* If we try to lock this twice, with different sizes, we |
3123 |
are int trouble! */ |
3124 |
s=d=rmw(s,4,2); |
3125 |
} |
3126 |
raw_sign_extend_16_rr(d,s); |
3127 |
if (!isrmw) { |
3128 |
unlock2(d); |
3129 |
unlock2(s); |
3130 |
} |
3131 |
else { |
3132 |
unlock2(s); |
3133 |
} |
3134 |
} |
3135 |
MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) |
3136 |
|
3137 |
MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) |
3138 |
{ |
3139 |
int isrmw; |
3140 |
|
3141 |
if (isconst(s)) { |
3142 |
set_const(d,(uae_s32)(uae_s8)live.state[s].val); |
3143 |
return; |
3144 |
} |
3145 |
|
3146 |
isrmw=(s==d); |
3147 |
CLOBBER_SE8; |
3148 |
if (!isrmw) { |
3149 |
s=readreg(s,1); |
3150 |
d=writereg(d,4); |
3151 |
} |
3152 |
else { /* If we try to lock this twice, with different sizes, we |
3153 |
are int trouble! */ |
3154 |
s=d=rmw(s,4,1); |
3155 |
} |
3156 |
|
3157 |
raw_sign_extend_8_rr(d,s); |
3158 |
|
3159 |
if (!isrmw) { |
3160 |
unlock2(d); |
3161 |
unlock2(s); |
3162 |
} |
3163 |
else { |
3164 |
unlock2(s); |
3165 |
} |
3166 |
} |
3167 |
MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) |
3168 |
|
3169 |
|
3170 |
MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) |
3171 |
{ |
3172 |
int isrmw; |
3173 |
|
3174 |
if (isconst(s)) { |
3175 |
set_const(d,(uae_u32)(uae_u16)live.state[s].val); |
3176 |
return; |
3177 |
} |
3178 |
|
3179 |
isrmw=(s==d); |
3180 |
CLOBBER_ZE16; |
3181 |
if (!isrmw) { |
3182 |
s=readreg(s,2); |
3183 |
d=writereg(d,4); |
3184 |
} |
3185 |
else { /* If we try to lock this twice, with different sizes, we |
3186 |
are int trouble! */ |
3187 |
s=d=rmw(s,4,2); |
3188 |
} |
3189 |
raw_zero_extend_16_rr(d,s); |
3190 |
if (!isrmw) { |
3191 |
unlock2(d); |
3192 |
unlock2(s); |
3193 |
} |
3194 |
else { |
3195 |
unlock2(s); |
3196 |
} |
3197 |
} |
3198 |
MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) |
3199 |
|
3200 |
MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) |
3201 |
{ |
3202 |
int isrmw; |
3203 |
if (isconst(s)) { |
3204 |
set_const(d,(uae_u32)(uae_u8)live.state[s].val); |
3205 |
return; |
3206 |
} |
3207 |
|
3208 |
isrmw=(s==d); |
3209 |
CLOBBER_ZE8; |
3210 |
if (!isrmw) { |
3211 |
s=readreg(s,1); |
3212 |
d=writereg(d,4); |
3213 |
} |
3214 |
else { /* If we try to lock this twice, with different sizes, we |
3215 |
are int trouble! */ |
3216 |
s=d=rmw(s,4,1); |
3217 |
} |
3218 |
|
3219 |
raw_zero_extend_8_rr(d,s); |
3220 |
|
3221 |
if (!isrmw) { |
3222 |
unlock2(d); |
3223 |
unlock2(s); |
3224 |
} |
3225 |
else { |
3226 |
unlock2(s); |
3227 |
} |
3228 |
} |
3229 |
MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) |
3230 |
|
3231 |
MIDFUNC(2,mov_b_rr,(W1 d, R1 s)) |
3232 |
{ |
3233 |
if (d==s) |
3234 |
return; |
3235 |
if (isconst(s)) { |
3236 |
COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); |
3237 |
return; |
3238 |
} |
3239 |
|
3240 |
CLOBBER_MOV; |
3241 |
s=readreg(s,1); |
3242 |
d=writereg(d,1); |
3243 |
raw_mov_b_rr(d,s); |
3244 |
unlock2(d); |
3245 |
unlock2(s); |
3246 |
} |
3247 |
MENDFUNC(2,mov_b_rr,(W1 d, R1 s)) |
3248 |
|
3249 |
MIDFUNC(2,mov_w_rr,(W2 d, R2 s)) |
3250 |
{ |
3251 |
if (d==s) |
3252 |
return; |
3253 |
if (isconst(s)) { |
3254 |
COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); |
3255 |
return; |
3256 |
} |
3257 |
|
3258 |
CLOBBER_MOV; |
3259 |
s=readreg(s,2); |
3260 |
d=writereg(d,2); |
3261 |
raw_mov_w_rr(d,s); |
3262 |
unlock2(d); |
3263 |
unlock2(s); |
3264 |
} |
3265 |
MENDFUNC(2,mov_w_rr,(W2 d, R2 s)) |
3266 |
|
3267 |
|
3268 |
MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) |
3269 |
{ |
3270 |
CLOBBER_MOV; |
3271 |
baser=readreg(baser,4); |
3272 |
index=readreg(index,4); |
3273 |
d=writereg(d,4); |
3274 |
|
3275 |
raw_mov_l_rrm_indexed(d,baser,index,factor); |
3276 |
unlock2(d); |
3277 |
unlock2(baser); |
3278 |
unlock2(index); |
3279 |
} |
3280 |
MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) |
3281 |
|
3282 |
MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) |
3283 |
{ |
3284 |
CLOBBER_MOV; |
3285 |
baser=readreg(baser,4); |
3286 |
index=readreg(index,4); |
3287 |
d=writereg(d,2); |
3288 |
|
3289 |
raw_mov_w_rrm_indexed(d,baser,index,factor); |
3290 |
unlock2(d); |
3291 |
unlock2(baser); |
3292 |
unlock2(index); |
3293 |
} |
3294 |
MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) |
3295 |
|
3296 |
MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) |
3297 |
{ |
3298 |
CLOBBER_MOV; |
3299 |
baser=readreg(baser,4); |
3300 |
index=readreg(index,4); |
3301 |
d=writereg(d,1); |
3302 |
|
3303 |
raw_mov_b_rrm_indexed(d,baser,index,factor); |
3304 |
|
3305 |
unlock2(d); |
3306 |
unlock2(baser); |
3307 |
unlock2(index); |
3308 |
} |
3309 |
MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) |
3310 |
|
3311 |
|
3312 |
MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) |
3313 |
{ |
3314 |
CLOBBER_MOV; |
3315 |
baser=readreg(baser,4); |
3316 |
index=readreg(index,4); |
3317 |
s=readreg(s,4); |
3318 |
|
3319 |
Dif (baser==s || index==s) |
3320 |
abort(); |
3321 |
|
3322 |
|
3323 |
raw_mov_l_mrr_indexed(baser,index,factor,s); |
3324 |
unlock2(s); |
3325 |
unlock2(baser); |
3326 |
unlock2(index); |
3327 |
} |
3328 |
MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) |
3329 |
|
3330 |
MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) |
3331 |
{ |
3332 |
CLOBBER_MOV; |
3333 |
baser=readreg(baser,4); |
3334 |
index=readreg(index,4); |
3335 |
s=readreg(s,2); |
3336 |
|
3337 |
raw_mov_w_mrr_indexed(baser,index,factor,s); |
3338 |
unlock2(s); |
3339 |
unlock2(baser); |
3340 |
unlock2(index); |
3341 |
} |
3342 |
MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) |
3343 |
|
3344 |
MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) |
3345 |
{ |
3346 |
CLOBBER_MOV; |
3347 |
s=readreg(s,1); |
3348 |
baser=readreg(baser,4); |
3349 |
index=readreg(index,4); |
3350 |
|
3351 |
raw_mov_b_mrr_indexed(baser,index,factor,s); |
3352 |
unlock2(s); |
3353 |
unlock2(baser); |
3354 |
unlock2(index); |
3355 |
} |
3356 |
MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) |
3357 |
|
3358 |
|
3359 |
MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) |
3360 |
{ |
3361 |
int basereg=baser; |
3362 |
int indexreg=index; |
3363 |
|
3364 |
CLOBBER_MOV; |
3365 |
s=readreg(s,4); |
3366 |
baser=readreg_offset(baser,4); |
3367 |
index=readreg_offset(index,4); |
3368 |
|
3369 |
base+=get_offset(basereg); |
3370 |
base+=factor*get_offset(indexreg); |
3371 |
|
3372 |
raw_mov_l_bmrr_indexed(base,baser,index,factor,s); |
3373 |
unlock2(s); |
3374 |
unlock2(baser); |
3375 |
unlock2(index); |
3376 |
} |
3377 |
MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) |
3378 |
|
3379 |
MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) |
3380 |
{ |
3381 |
int basereg=baser; |
3382 |
int indexreg=index; |
3383 |
|
3384 |
CLOBBER_MOV; |
3385 |
s=readreg(s,2); |
3386 |
baser=readreg_offset(baser,4); |
3387 |
index=readreg_offset(index,4); |
3388 |
|
3389 |
base+=get_offset(basereg); |
3390 |
base+=factor*get_offset(indexreg); |
3391 |
|
3392 |
raw_mov_w_bmrr_indexed(base,baser,index,factor,s); |
3393 |
unlock2(s); |
3394 |
unlock2(baser); |
3395 |
unlock2(index); |
3396 |
} |
3397 |
MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) |
3398 |
|
3399 |
MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) |
3400 |
{ |
3401 |
int basereg=baser; |
3402 |
int indexreg=index; |
3403 |
|
3404 |
CLOBBER_MOV; |
3405 |
s=readreg(s,1); |
3406 |
baser=readreg_offset(baser,4); |
3407 |
index=readreg_offset(index,4); |
3408 |
|
3409 |
base+=get_offset(basereg); |
3410 |
base+=factor*get_offset(indexreg); |
3411 |
|
3412 |
raw_mov_b_bmrr_indexed(base,baser,index,factor,s); |
3413 |
unlock2(s); |
3414 |
unlock2(baser); |
3415 |
unlock2(index); |
3416 |
} |
3417 |
MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) |
3418 |
|
3419 |
|
3420 |
|
3421 |
/* Read a long from base+baser+factor*index */ |
3422 |
MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) |
3423 |
{ |
3424 |
int basereg=baser; |
3425 |
int indexreg=index; |
3426 |
|
3427 |
CLOBBER_MOV; |
3428 |
baser=readreg_offset(baser,4); |
3429 |
index=readreg_offset(index,4); |
3430 |
base+=get_offset(basereg); |
3431 |
base+=factor*get_offset(indexreg); |
3432 |
d=writereg(d,4); |
3433 |
raw_mov_l_brrm_indexed(d,base,baser,index,factor); |
3434 |
unlock2(d); |
3435 |
unlock2(baser); |
3436 |
unlock2(index); |
3437 |
} |
3438 |
MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) |
3439 |
|
3440 |
|
3441 |
MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) |
3442 |
{ |
3443 |
int basereg=baser; |
3444 |
int indexreg=index; |
3445 |
|
3446 |
CLOBBER_MOV; |
3447 |
remove_offset(d,-1); |
3448 |
baser=readreg_offset(baser,4); |
3449 |
index=readreg_offset(index,4); |
3450 |
base+=get_offset(basereg); |
3451 |
base+=factor*get_offset(indexreg); |
3452 |
d=writereg(d,2); |
3453 |
raw_mov_w_brrm_indexed(d,base,baser,index,factor); |
3454 |
unlock2(d); |
3455 |
unlock2(baser); |
3456 |
unlock2(index); |
3457 |
} |
3458 |
MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) |
3459 |
|
3460 |
|
3461 |
MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) |
3462 |
{ |
3463 |
int basereg=baser; |
3464 |
int indexreg=index; |
3465 |
|
3466 |
CLOBBER_MOV; |
3467 |
remove_offset(d,-1); |
3468 |
baser=readreg_offset(baser,4); |
3469 |
index=readreg_offset(index,4); |
3470 |
base+=get_offset(basereg); |
3471 |
base+=factor*get_offset(indexreg); |
3472 |
d=writereg(d,1); |
3473 |
raw_mov_b_brrm_indexed(d,base,baser,index,factor); |
3474 |
unlock2(d); |
3475 |
unlock2(baser); |
3476 |
unlock2(index); |
3477 |
} |
3478 |
MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) |
3479 |
|
3480 |
/* Read a long from base+factor*index */ |
3481 |
MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) |
3482 |
{ |
3483 |
int indexreg=index; |
3484 |
|
3485 |
if (isconst(index)) { |
3486 |
COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); |
3487 |
return; |
3488 |
} |
3489 |
|
3490 |
CLOBBER_MOV; |
3491 |
index=readreg_offset(index,4); |
3492 |
base+=get_offset(indexreg)*factor; |
3493 |
d=writereg(d,4); |
3494 |
|
3495 |
raw_mov_l_rm_indexed(d,base,index,factor); |
3496 |
unlock2(index); |
3497 |
unlock2(d); |
3498 |
} |
3499 |
MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) |
3500 |
|
3501 |
|
3502 |
/* read the long at the address contained in s+offset and store in d */ |
3503 |
MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) |
3504 |
{ |
3505 |
if (isconst(s)) { |
3506 |
COMPCALL(mov_l_rm)(d,live.state[s].val+offset); |
3507 |
return; |
3508 |
} |
3509 |
CLOBBER_MOV; |
3510 |
s=readreg(s,4); |
3511 |
d=writereg(d,4); |
3512 |
|
3513 |
raw_mov_l_rR(d,s,offset); |
3514 |
unlock2(d); |
3515 |
unlock2(s); |
3516 |
} |
3517 |
MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) |
3518 |
|
3519 |
/* read the word at the address contained in s+offset and store in d */ |
3520 |
MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) |
3521 |
{ |
3522 |
if (isconst(s)) { |
3523 |
COMPCALL(mov_w_rm)(d,live.state[s].val+offset); |
3524 |
return; |
3525 |
} |
3526 |
CLOBBER_MOV; |
3527 |
s=readreg(s,4); |
3528 |
d=writereg(d,2); |
3529 |
|
3530 |
raw_mov_w_rR(d,s,offset); |
3531 |
unlock2(d); |
3532 |
unlock2(s); |
3533 |
} |
3534 |
MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) |
3535 |
|
3536 |
/* read the word at the address contained in s+offset and store in d */ |
3537 |
MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) |
3538 |
{ |
3539 |
if (isconst(s)) { |
3540 |
COMPCALL(mov_b_rm)(d,live.state[s].val+offset); |
3541 |
return; |
3542 |
} |
3543 |
CLOBBER_MOV; |
3544 |
s=readreg(s,4); |
3545 |
d=writereg(d,1); |
3546 |
|
3547 |
raw_mov_b_rR(d,s,offset); |
3548 |
unlock2(d); |
3549 |
unlock2(s); |
3550 |
} |
3551 |
MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) |
3552 |
|
3553 |
/* read the long at the address contained in s+offset and store in d */ |
3554 |
MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) |
3555 |
{ |
3556 |
int sreg=s; |
3557 |
if (isconst(s)) { |
3558 |
COMPCALL(mov_l_rm)(d,live.state[s].val+offset); |
3559 |
return; |
3560 |
} |
3561 |
CLOBBER_MOV; |
3562 |
s=readreg_offset(s,4); |
3563 |
offset+=get_offset(sreg); |
3564 |
d=writereg(d,4); |
3565 |
|
3566 |
raw_mov_l_brR(d,s,offset); |
3567 |
unlock2(d); |
3568 |
unlock2(s); |
3569 |
} |
3570 |
MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) |
3571 |
|
3572 |
/* read the word at the address contained in s+offset and store in d */ |
3573 |
MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) |
3574 |
{ |
3575 |
int sreg=s; |
3576 |
if (isconst(s)) { |
3577 |
COMPCALL(mov_w_rm)(d,live.state[s].val+offset); |
3578 |
return; |
3579 |
} |
3580 |
CLOBBER_MOV; |
3581 |
remove_offset(d,-1); |
3582 |
s=readreg_offset(s,4); |
3583 |
offset+=get_offset(sreg); |
3584 |
d=writereg(d,2); |
3585 |
|
3586 |
raw_mov_w_brR(d,s,offset); |
3587 |
unlock2(d); |
3588 |
unlock2(s); |
3589 |
} |
3590 |
MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) |
3591 |
|
3592 |
/* read the word at the address contained in s+offset and store in d */ |
3593 |
MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) |
3594 |
{ |
3595 |
int sreg=s; |
3596 |
if (isconst(s)) { |
3597 |
COMPCALL(mov_b_rm)(d,live.state[s].val+offset); |
3598 |
return; |
3599 |
} |
3600 |
CLOBBER_MOV; |
3601 |
remove_offset(d,-1); |
3602 |
s=readreg_offset(s,4); |
3603 |
offset+=get_offset(sreg); |
3604 |
d=writereg(d,1); |
3605 |
|
3606 |
raw_mov_b_brR(d,s,offset); |
3607 |
unlock2(d); |
3608 |
unlock2(s); |
3609 |
} |
3610 |
MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) |
3611 |
|
3612 |
MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) |
3613 |
{ |
3614 |
int dreg=d; |
3615 |
if (isconst(d)) { |
3616 |
COMPCALL(mov_l_mi)(live.state[d].val+offset,i); |
3617 |
return; |
3618 |
} |
3619 |
|
3620 |
CLOBBER_MOV; |
3621 |
d=readreg_offset(d,4); |
3622 |
offset+=get_offset(dreg); |
3623 |
raw_mov_l_Ri(d,i,offset); |
3624 |
unlock2(d); |
3625 |
} |
3626 |
MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) |
3627 |
|
3628 |
MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) |
3629 |
{ |
3630 |
int dreg=d; |
3631 |
if (isconst(d)) { |
3632 |
COMPCALL(mov_w_mi)(live.state[d].val+offset,i); |
3633 |
return; |
3634 |
} |
3635 |
|
3636 |
CLOBBER_MOV; |
3637 |
d=readreg_offset(d,4); |
3638 |
offset+=get_offset(dreg); |
3639 |
raw_mov_w_Ri(d,i,offset); |
3640 |
unlock2(d); |
3641 |
} |
3642 |
MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) |
3643 |
|
3644 |
MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) |
3645 |
{ |
3646 |
int dreg=d; |
3647 |
if (isconst(d)) { |
3648 |
COMPCALL(mov_b_mi)(live.state[d].val+offset,i); |
3649 |
return; |
3650 |
} |
3651 |
|
3652 |
CLOBBER_MOV; |
3653 |
d=readreg_offset(d,4); |
3654 |
offset+=get_offset(dreg); |
3655 |
raw_mov_b_Ri(d,i,offset); |
3656 |
unlock2(d); |
3657 |
} |
3658 |
MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) |
3659 |
|
3660 |
/* Warning! OFFSET is byte sized only! */ |
3661 |
MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) |
3662 |
{ |
3663 |
if (isconst(d)) { |
3664 |
COMPCALL(mov_l_mr)(live.state[d].val+offset,s); |
3665 |
return; |
3666 |
} |
3667 |
if (isconst(s)) { |
3668 |
COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); |
3669 |
return; |
3670 |
} |
3671 |
|
3672 |
CLOBBER_MOV; |
3673 |
s=readreg(s,4); |
3674 |
d=readreg(d,4); |
3675 |
|
3676 |
raw_mov_l_Rr(d,s,offset); |
3677 |
unlock2(d); |
3678 |
unlock2(s); |
3679 |
} |
3680 |
MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) |
3681 |
|
3682 |
MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) |
3683 |
{ |
3684 |
if (isconst(d)) { |
3685 |
COMPCALL(mov_w_mr)(live.state[d].val+offset,s); |
3686 |
return; |
3687 |
} |
3688 |
if (isconst(s)) { |
3689 |
COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); |
3690 |
return; |
3691 |
} |
3692 |
|
3693 |
CLOBBER_MOV; |
3694 |
s=readreg(s,2); |
3695 |
d=readreg(d,4); |
3696 |
raw_mov_w_Rr(d,s,offset); |
3697 |
unlock2(d); |
3698 |
unlock2(s); |
3699 |
} |
3700 |
MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) |
3701 |
|
3702 |
MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) |
3703 |
{ |
3704 |
if (isconst(d)) { |
3705 |
COMPCALL(mov_b_mr)(live.state[d].val+offset,s); |
3706 |
return; |
3707 |
} |
3708 |
if (isconst(s)) { |
3709 |
COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); |
3710 |
return; |
3711 |
} |
3712 |
|
3713 |
CLOBBER_MOV; |
3714 |
s=readreg(s,1); |
3715 |
d=readreg(d,4); |
3716 |
raw_mov_b_Rr(d,s,offset); |
3717 |
unlock2(d); |
3718 |
unlock2(s); |
3719 |
} |
3720 |
MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) |
3721 |
|
3722 |
MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) |
3723 |
{ |
3724 |
if (isconst(s)) { |
3725 |
COMPCALL(mov_l_ri)(d,live.state[s].val+offset); |
3726 |
return; |
3727 |
} |
3728 |
#if USE_OFFSET |
3729 |
if (d==s) { |
3730 |
add_offset(d,offset); |
3731 |
return; |
3732 |
} |
3733 |
#endif |
3734 |
CLOBBER_LEA; |
3735 |
s=readreg(s,4); |
3736 |
d=writereg(d,4); |
3737 |
raw_lea_l_brr(d,s,offset); |
3738 |
unlock2(d); |
3739 |
unlock2(s); |
3740 |
} |
3741 |
MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) |
3742 |
|
3743 |
MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) |
3744 |
{ |
3745 |
if (!offset) { |
3746 |
COMPCALL(lea_l_rr_indexed)(d,s,index,factor); |
3747 |
return; |
3748 |
} |
3749 |
CLOBBER_LEA; |
3750 |
s=readreg(s,4); |
3751 |
index=readreg(index,4); |
3752 |
d=writereg(d,4); |
3753 |
|
3754 |
raw_lea_l_brr_indexed(d,s,index,factor,offset); |
3755 |
unlock2(d); |
3756 |
unlock2(index); |
3757 |
unlock2(s); |
3758 |
} |
3759 |
MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) |
3760 |
|
3761 |
MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) |
3762 |
{ |
3763 |
CLOBBER_LEA; |
3764 |
s=readreg(s,4); |
3765 |
index=readreg(index,4); |
3766 |
d=writereg(d,4); |
3767 |
|
3768 |
raw_lea_l_rr_indexed(d,s,index,factor); |
3769 |
unlock2(d); |
3770 |
unlock2(index); |
3771 |
unlock2(s); |
3772 |
} |
3773 |
MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) |
3774 |
|
3775 |
/* write d to the long at the address contained in s+offset */ |
3776 |
MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) |
3777 |
{ |
3778 |
int dreg=d; |
3779 |
if (isconst(d)) { |
3780 |
COMPCALL(mov_l_mr)(live.state[d].val+offset,s); |
3781 |
return; |
3782 |
} |
3783 |
|
3784 |
CLOBBER_MOV; |
3785 |
s=readreg(s,4); |
3786 |
d=readreg_offset(d,4); |
3787 |
offset+=get_offset(dreg); |
3788 |
|
3789 |
raw_mov_l_bRr(d,s,offset); |
3790 |
unlock2(d); |
3791 |
unlock2(s); |
3792 |
} |
3793 |
MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) |
3794 |
|
3795 |
/* write the word at the address contained in s+offset and store in d */ |
3796 |
MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) |
3797 |
{ |
3798 |
int dreg=d; |
3799 |
|
3800 |
if (isconst(d)) { |
3801 |
COMPCALL(mov_w_mr)(live.state[d].val+offset,s); |
3802 |
return; |
3803 |
} |
3804 |
|
3805 |
CLOBBER_MOV; |
3806 |
s=readreg(s,2); |
3807 |
d=readreg_offset(d,4); |
3808 |
offset+=get_offset(dreg); |
3809 |
raw_mov_w_bRr(d,s,offset); |
3810 |
unlock2(d); |
3811 |
unlock2(s); |
3812 |
} |
3813 |
MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) |
3814 |
|
3815 |
MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) |
3816 |
{ |
3817 |
int dreg=d; |
3818 |
if (isconst(d)) { |
3819 |
COMPCALL(mov_b_mr)(live.state[d].val+offset,s); |
3820 |
return; |
3821 |
} |
3822 |
|
3823 |
CLOBBER_MOV; |
3824 |
s=readreg(s,1); |
3825 |
d=readreg_offset(d,4); |
3826 |
offset+=get_offset(dreg); |
3827 |
raw_mov_b_bRr(d,s,offset); |
3828 |
unlock2(d); |
3829 |
unlock2(s); |
3830 |
} |
3831 |
MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) |
3832 |
|
3833 |
MIDFUNC(1,bswap_32,(RW4 r)) |
3834 |
{ |
3835 |
int reg=r; |
3836 |
|
3837 |
if (isconst(r)) { |
3838 |
uae_u32 oldv=live.state[r].val; |
3839 |
live.state[r].val=reverse32(oldv); |
3840 |
return; |
3841 |
} |
3842 |
|
3843 |
CLOBBER_SW32; |
3844 |
r=rmw(r,4,4); |
3845 |
raw_bswap_32(r); |
3846 |
unlock2(r); |
3847 |
} |
3848 |
MENDFUNC(1,bswap_32,(RW4 r)) |
3849 |
|
3850 |
MIDFUNC(1,bswap_16,(RW2 r)) |
3851 |
{ |
3852 |
if (isconst(r)) { |
3853 |
uae_u32 oldv=live.state[r].val; |
3854 |
live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | |
3855 |
(oldv&0xffff0000); |
3856 |
return; |
3857 |
} |
3858 |
|
3859 |
CLOBBER_SW16; |
3860 |
r=rmw(r,2,2); |
3861 |
|
3862 |
raw_bswap_16(r); |
3863 |
unlock2(r); |
3864 |
} |
3865 |
MENDFUNC(1,bswap_16,(RW2 r)) |
3866 |
|
3867 |
|
3868 |
|
3869 |
MIDFUNC(2,mov_l_rr,(W4 d, R4 s)) |
3870 |
{ |
3871 |
int olds; |
3872 |
|
3873 |
if (d==s) { /* How pointless! */ |
3874 |
return; |
3875 |
} |
3876 |
if (isconst(s)) { |
3877 |
COMPCALL(mov_l_ri)(d,live.state[s].val); |
3878 |
return; |
3879 |
} |
3880 |
olds=s; |
3881 |
disassociate(d); |
3882 |
s=readreg_offset(s,4); |
3883 |
live.state[d].realreg=s; |
3884 |
live.state[d].realind=live.nat[s].nholds; |
3885 |
live.state[d].val=live.state[olds].val; |
3886 |
live.state[d].validsize=4; |
3887 |
live.state[d].dirtysize=4; |
3888 |
set_status(d,DIRTY); |
3889 |
|
3890 |
live.nat[s].holds[live.nat[s].nholds]=d; |
3891 |
live.nat[s].nholds++; |
3892 |
log_clobberreg(d); |
3893 |
/* write_log("Added %d to nreg %d(%d), now holds %d regs\n", |
3894 |
d,s,live.state[d].realind,live.nat[s].nholds); */ |
3895 |
unlock2(s); |
3896 |
} |
3897 |
MENDFUNC(2,mov_l_rr,(W4 d, R4 s)) |
3898 |
|
3899 |
MIDFUNC(2,mov_l_mr,(IMM d, R4 s)) |
3900 |
{ |
3901 |
if (isconst(s)) { |
3902 |
COMPCALL(mov_l_mi)(d,live.state[s].val); |
3903 |
return; |
3904 |
} |
3905 |
CLOBBER_MOV; |
3906 |
s=readreg(s,4); |
3907 |
|
3908 |
raw_mov_l_mr(d,s); |
3909 |
unlock2(s); |
3910 |
} |
3911 |
MENDFUNC(2,mov_l_mr,(IMM d, R4 s)) |
3912 |
|
3913 |
|
3914 |
MIDFUNC(2,mov_w_mr,(IMM d, R2 s)) |
3915 |
{ |
3916 |
if (isconst(s)) { |
3917 |
COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); |
3918 |
return; |
3919 |
} |
3920 |
CLOBBER_MOV; |
3921 |
s=readreg(s,2); |
3922 |
|
3923 |
raw_mov_w_mr(d,s); |
3924 |
unlock2(s); |
3925 |
} |
3926 |
MENDFUNC(2,mov_w_mr,(IMM d, R2 s)) |
3927 |
|
3928 |
MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) |
3929 |
{ |
3930 |
CLOBBER_MOV; |
3931 |
d=writereg(d,2); |
3932 |
|
3933 |
raw_mov_w_rm(d,s); |
3934 |
unlock2(d); |
3935 |
} |
3936 |
MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) |
3937 |
|
3938 |
MIDFUNC(2,mov_b_mr,(IMM d, R1 s)) |
3939 |
{ |
3940 |
if (isconst(s)) { |
3941 |
COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); |
3942 |
return; |
3943 |
} |
3944 |
|
3945 |
CLOBBER_MOV; |
3946 |
s=readreg(s,1); |
3947 |
|
3948 |
raw_mov_b_mr(d,s); |
3949 |
unlock2(s); |
3950 |
} |
3951 |
MENDFUNC(2,mov_b_mr,(IMM d, R1 s)) |
3952 |
|
3953 |
MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) |
3954 |
{ |
3955 |
CLOBBER_MOV; |
3956 |
d=writereg(d,1); |
3957 |
|
3958 |
raw_mov_b_rm(d,s); |
3959 |
unlock2(d); |
3960 |
} |
3961 |
MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) |
3962 |
|
3963 |
MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) |
3964 |
{ |
3965 |
set_const(d,s); |
3966 |
return; |
3967 |
} |
3968 |
MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) |
3969 |
|
3970 |
MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) |
3971 |
{ |
3972 |
CLOBBER_MOV; |
3973 |
d=writereg(d,2); |
3974 |
|
3975 |
raw_mov_w_ri(d,s); |
3976 |
unlock2(d); |
3977 |
} |
3978 |
MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) |
3979 |
|
3980 |
MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) |
3981 |
{ |
3982 |
CLOBBER_MOV; |
3983 |
d=writereg(d,1); |
3984 |
|
3985 |
raw_mov_b_ri(d,s); |
3986 |
unlock2(d); |
3987 |
} |
3988 |
MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) |
3989 |
|
3990 |
|
3991 |
MIDFUNC(2,add_l_mi,(IMM d, IMM s)) |
3992 |
{ |
3993 |
CLOBBER_ADD; |
3994 |
raw_add_l_mi(d,s) ; |
3995 |
} |
3996 |
MENDFUNC(2,add_l_mi,(IMM d, IMM s)) |
3997 |
|
3998 |
MIDFUNC(2,add_w_mi,(IMM d, IMM s)) |
3999 |
{ |
4000 |
CLOBBER_ADD; |
4001 |
raw_add_w_mi(d,s) ; |
4002 |
} |
4003 |
MENDFUNC(2,add_w_mi,(IMM d, IMM s)) |
4004 |
|
4005 |
MIDFUNC(2,add_b_mi,(IMM d, IMM s)) |
4006 |
{ |
4007 |
CLOBBER_ADD; |
4008 |
raw_add_b_mi(d,s) ; |
4009 |
} |
4010 |
MENDFUNC(2,add_b_mi,(IMM d, IMM s)) |
4011 |
|
4012 |
|
4013 |
MIDFUNC(2,test_l_ri,(R4 d, IMM i)) |
4014 |
{ |
4015 |
CLOBBER_TEST; |
4016 |
d=readreg(d,4); |
4017 |
|
4018 |
raw_test_l_ri(d,i); |
4019 |
unlock2(d); |
4020 |
} |
4021 |
MENDFUNC(2,test_l_ri,(R4 d, IMM i)) |
4022 |
|
4023 |
MIDFUNC(2,test_l_rr,(R4 d, R4 s)) |
4024 |
{ |
4025 |
CLOBBER_TEST; |
4026 |
d=readreg(d,4); |
4027 |
s=readreg(s,4); |
4028 |
|
4029 |
raw_test_l_rr(d,s);; |
4030 |
unlock2(d); |
4031 |
unlock2(s); |
4032 |
} |
4033 |
MENDFUNC(2,test_l_rr,(R4 d, R4 s)) |
4034 |
|
4035 |
MIDFUNC(2,test_w_rr,(R2 d, R2 s)) |
4036 |
{ |
4037 |
CLOBBER_TEST; |
4038 |
d=readreg(d,2); |
4039 |
s=readreg(s,2); |
4040 |
|
4041 |
raw_test_w_rr(d,s); |
4042 |
unlock2(d); |
4043 |
unlock2(s); |
4044 |
} |
4045 |
MENDFUNC(2,test_w_rr,(R2 d, R2 s)) |
4046 |
|
4047 |
MIDFUNC(2,test_b_rr,(R1 d, R1 s)) |
4048 |
{ |
4049 |
CLOBBER_TEST; |
4050 |
d=readreg(d,1); |
4051 |
s=readreg(s,1); |
4052 |
|
4053 |
raw_test_b_rr(d,s); |
4054 |
unlock2(d); |
4055 |
unlock2(s); |
4056 |
} |
4057 |
MENDFUNC(2,test_b_rr,(R1 d, R1 s)) |
4058 |
|
4059 |
|
4060 |
MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) |
4061 |
{ |
4062 |
if (isconst(d) && !needflags) { |
4063 |
live.state[d].val &= i; |
4064 |
return; |
4065 |
} |
4066 |
|
4067 |
CLOBBER_AND; |
4068 |
d=rmw(d,4,4); |
4069 |
|
4070 |
raw_and_l_ri(d,i); |
4071 |
unlock2(d); |
4072 |
} |
4073 |
MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) |
4074 |
|
4075 |
MIDFUNC(2,and_l,(RW4 d, R4 s)) |
4076 |
{ |
4077 |
CLOBBER_AND; |
4078 |
s=readreg(s,4); |
4079 |
d=rmw(d,4,4); |
4080 |
|
4081 |
raw_and_l(d,s); |
4082 |
unlock2(d); |
4083 |
unlock2(s); |
4084 |
} |
4085 |
MENDFUNC(2,and_l,(RW4 d, R4 s)) |
4086 |
|
4087 |
MIDFUNC(2,and_w,(RW2 d, R2 s)) |
4088 |
{ |
4089 |
CLOBBER_AND; |
4090 |
s=readreg(s,2); |
4091 |
d=rmw(d,2,2); |
4092 |
|
4093 |
raw_and_w(d,s); |
4094 |
unlock2(d); |
4095 |
unlock2(s); |
4096 |
} |
4097 |
MENDFUNC(2,and_w,(RW2 d, R2 s)) |
4098 |
|
4099 |
MIDFUNC(2,and_b,(RW1 d, R1 s)) |
4100 |
{ |
4101 |
CLOBBER_AND; |
4102 |
s=readreg(s,1); |
4103 |
d=rmw(d,1,1); |
4104 |
|
4105 |
raw_and_b(d,s); |
4106 |
unlock2(d); |
4107 |
unlock2(s); |
4108 |
} |
4109 |
MENDFUNC(2,and_b,(RW1 d, R1 s)) |
4110 |
|
4111 |
// gb-- used for making an fpcr value in compemu_fpp.cpp |
4112 |
MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) |
4113 |
{ |
4114 |
CLOBBER_OR; |
4115 |
d=rmw(d,4,4); |
4116 |
|
4117 |
raw_or_l_rm(d,s); |
4118 |
unlock2(d); |
4119 |
} |
4120 |
MENDFUNC(2,or_l_rm,(RW4 d, IMM s)) |
4121 |
|
4122 |
MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) |
4123 |
{ |
4124 |
if (isconst(d) && !needflags) { |
4125 |
live.state[d].val|=i; |
4126 |
return; |
4127 |
} |
4128 |
CLOBBER_OR; |
4129 |
d=rmw(d,4,4); |
4130 |
|
4131 |
raw_or_l_ri(d,i); |
4132 |
unlock2(d); |
4133 |
} |
4134 |
MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) |
4135 |
|
4136 |
MIDFUNC(2,or_l,(RW4 d, R4 s)) |
4137 |
{ |
4138 |
if (isconst(d) && isconst(s) && !needflags) { |
4139 |
live.state[d].val|=live.state[s].val; |
4140 |
return; |
4141 |
} |
4142 |
CLOBBER_OR; |
4143 |
s=readreg(s,4); |
4144 |
d=rmw(d,4,4); |
4145 |
|
4146 |
raw_or_l(d,s); |
4147 |
unlock2(d); |
4148 |
unlock2(s); |
4149 |
} |
4150 |
MENDFUNC(2,or_l,(RW4 d, R4 s)) |
4151 |
|
4152 |
MIDFUNC(2,or_w,(RW2 d, R2 s)) |
4153 |
{ |
4154 |
CLOBBER_OR; |
4155 |
s=readreg(s,2); |
4156 |
d=rmw(d,2,2); |
4157 |
|
4158 |
raw_or_w(d,s); |
4159 |
unlock2(d); |
4160 |
unlock2(s); |
4161 |
} |
4162 |
MENDFUNC(2,or_w,(RW2 d, R2 s)) |
4163 |
|
4164 |
MIDFUNC(2,or_b,(RW1 d, R1 s)) |
4165 |
{ |
4166 |
CLOBBER_OR; |
4167 |
s=readreg(s,1); |
4168 |
d=rmw(d,1,1); |
4169 |
|
4170 |
raw_or_b(d,s); |
4171 |
unlock2(d); |
4172 |
unlock2(s); |
4173 |
} |
4174 |
MENDFUNC(2,or_b,(RW1 d, R1 s)) |
4175 |
|
4176 |
MIDFUNC(2,adc_l,(RW4 d, R4 s)) |
4177 |
{ |
4178 |
CLOBBER_ADC; |
4179 |
s=readreg(s,4); |
4180 |
d=rmw(d,4,4); |
4181 |
|
4182 |
raw_adc_l(d,s); |
4183 |
|
4184 |
unlock2(d); |
4185 |
unlock2(s); |
4186 |
} |
4187 |
MENDFUNC(2,adc_l,(RW4 d, R4 s)) |
4188 |
|
4189 |
MIDFUNC(2,adc_w,(RW2 d, R2 s)) |
4190 |
{ |
4191 |
CLOBBER_ADC; |
4192 |
s=readreg(s,2); |
4193 |
d=rmw(d,2,2); |
4194 |
|
4195 |
raw_adc_w(d,s); |
4196 |
unlock2(d); |
4197 |
unlock2(s); |
4198 |
} |
4199 |
MENDFUNC(2,adc_w,(RW2 d, R2 s)) |
4200 |
|
4201 |
MIDFUNC(2,adc_b,(RW1 d, R1 s)) |
4202 |
{ |
4203 |
CLOBBER_ADC; |
4204 |
s=readreg(s,1); |
4205 |
d=rmw(d,1,1); |
4206 |
|
4207 |
raw_adc_b(d,s); |
4208 |
unlock2(d); |
4209 |
unlock2(s); |
4210 |
} |
4211 |
MENDFUNC(2,adc_b,(RW1 d, R1 s)) |
4212 |
|
4213 |
MIDFUNC(2,add_l,(RW4 d, R4 s)) |
4214 |
{ |
4215 |
if (isconst(s)) { |
4216 |
COMPCALL(add_l_ri)(d,live.state[s].val); |
4217 |
return; |
4218 |
} |
4219 |
|
4220 |
CLOBBER_ADD; |
4221 |
s=readreg(s,4); |
4222 |
d=rmw(d,4,4); |
4223 |
|
4224 |
raw_add_l(d,s); |
4225 |
|
4226 |
unlock2(d); |
4227 |
unlock2(s); |
4228 |
} |
4229 |
MENDFUNC(2,add_l,(RW4 d, R4 s)) |
4230 |
|
4231 |
MIDFUNC(2,add_w,(RW2 d, R2 s)) |
4232 |
{ |
4233 |
if (isconst(s)) { |
4234 |
COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); |
4235 |
return; |
4236 |
} |
4237 |
|
4238 |
CLOBBER_ADD; |
4239 |
s=readreg(s,2); |
4240 |
d=rmw(d,2,2); |
4241 |
|
4242 |
raw_add_w(d,s); |
4243 |
unlock2(d); |
4244 |
unlock2(s); |
4245 |
} |
4246 |
MENDFUNC(2,add_w,(RW2 d, R2 s)) |
4247 |
|
4248 |
MIDFUNC(2,add_b,(RW1 d, R1 s)) |
4249 |
{ |
4250 |
if (isconst(s)) { |
4251 |
COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); |
4252 |
return; |
4253 |
} |
4254 |
|
4255 |
CLOBBER_ADD; |
4256 |
s=readreg(s,1); |
4257 |
d=rmw(d,1,1); |
4258 |
|
4259 |
raw_add_b(d,s); |
4260 |
unlock2(d); |
4261 |
unlock2(s); |
4262 |
} |
4263 |
MENDFUNC(2,add_b,(RW1 d, R1 s)) |
4264 |
|
4265 |
MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) |
4266 |
{ |
4267 |
if (!i && !needflags) |
4268 |
return; |
4269 |
if (isconst(d) && !needflags) { |
4270 |
live.state[d].val-=i; |
4271 |
return; |
4272 |
} |
4273 |
#if USE_OFFSET |
4274 |
if (!needflags) { |
4275 |
add_offset(d,-i); |
4276 |
return; |
4277 |
} |
4278 |
#endif |
4279 |
|
4280 |
CLOBBER_SUB; |
4281 |
d=rmw(d,4,4); |
4282 |
|
4283 |
raw_sub_l_ri(d,i); |
4284 |
unlock2(d); |
4285 |
} |
4286 |
MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) |
4287 |
|
4288 |
MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) |
4289 |
{ |
4290 |
if (!i && !needflags) |
4291 |
return; |
4292 |
|
4293 |
CLOBBER_SUB; |
4294 |
d=rmw(d,2,2); |
4295 |
|
4296 |
raw_sub_w_ri(d,i); |
4297 |
unlock2(d); |
4298 |
} |
4299 |
MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) |
4300 |
|
4301 |
MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) |
4302 |
{ |
4303 |
if (!i && !needflags) |
4304 |
return; |
4305 |
|
4306 |
CLOBBER_SUB; |
4307 |
d=rmw(d,1,1); |
4308 |
|
4309 |
raw_sub_b_ri(d,i); |
4310 |
|
4311 |
unlock2(d); |
4312 |
} |
4313 |
MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) |
4314 |
|
4315 |
MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) |
4316 |
{ |
4317 |
if (!i && !needflags) |
4318 |
return; |
4319 |
if (isconst(d) && !needflags) { |
4320 |
live.state[d].val+=i; |
4321 |
return; |
4322 |
} |
4323 |
#if USE_OFFSET |
4324 |
if (!needflags) { |
4325 |
add_offset(d,i); |
4326 |
return; |
4327 |
} |
4328 |
#endif |
4329 |
CLOBBER_ADD; |
4330 |
d=rmw(d,4,4); |
4331 |
raw_add_l_ri(d,i); |
4332 |
unlock2(d); |
4333 |
} |
4334 |
MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) |
4335 |
|
4336 |
MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) |
4337 |
{ |
4338 |
if (!i && !needflags) |
4339 |
return; |
4340 |
|
4341 |
CLOBBER_ADD; |
4342 |
d=rmw(d,2,2); |
4343 |
|
4344 |
raw_add_w_ri(d,i); |
4345 |
unlock2(d); |
4346 |
} |
4347 |
MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) |
4348 |
|
4349 |
MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) |
4350 |
{ |
4351 |
if (!i && !needflags) |
4352 |
return; |
4353 |
|
4354 |
CLOBBER_ADD; |
4355 |
d=rmw(d,1,1); |
4356 |
|
4357 |
raw_add_b_ri(d,i); |
4358 |
|
4359 |
unlock2(d); |
4360 |
} |
4361 |
MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) |
4362 |
|
4363 |
MIDFUNC(2,sbb_l,(RW4 d, R4 s)) |
4364 |
{ |
4365 |
CLOBBER_SBB; |
4366 |
s=readreg(s,4); |
4367 |
d=rmw(d,4,4); |
4368 |
|
4369 |
raw_sbb_l(d,s); |
4370 |
unlock2(d); |
4371 |
unlock2(s); |
4372 |
} |
4373 |
MENDFUNC(2,sbb_l,(RW4 d, R4 s)) |
4374 |
|
4375 |
MIDFUNC(2,sbb_w,(RW2 d, R2 s)) |
4376 |
{ |
4377 |
CLOBBER_SBB; |
4378 |
s=readreg(s,2); |
4379 |
d=rmw(d,2,2); |
4380 |
|
4381 |
raw_sbb_w(d,s); |
4382 |
unlock2(d); |
4383 |
unlock2(s); |
4384 |
} |
4385 |
MENDFUNC(2,sbb_w,(RW2 d, R2 s)) |
4386 |
|
4387 |
MIDFUNC(2,sbb_b,(RW1 d, R1 s)) |
4388 |
{ |
4389 |
CLOBBER_SBB; |
4390 |
s=readreg(s,1); |
4391 |
d=rmw(d,1,1); |
4392 |
|
4393 |
raw_sbb_b(d,s); |
4394 |
unlock2(d); |
4395 |
unlock2(s); |
4396 |
} |
4397 |
MENDFUNC(2,sbb_b,(RW1 d, R1 s)) |
4398 |
|
4399 |
MIDFUNC(2,sub_l,(RW4 d, R4 s)) |
4400 |
{ |
4401 |
if (isconst(s)) { |
4402 |
COMPCALL(sub_l_ri)(d,live.state[s].val); |
4403 |
return; |
4404 |
} |
4405 |
|
4406 |
CLOBBER_SUB; |
4407 |
s=readreg(s,4); |
4408 |
d=rmw(d,4,4); |
4409 |
|
4410 |
raw_sub_l(d,s); |
4411 |
unlock2(d); |
4412 |
unlock2(s); |
4413 |
} |
4414 |
MENDFUNC(2,sub_l,(RW4 d, R4 s)) |
4415 |
|
4416 |
MIDFUNC(2,sub_w,(RW2 d, R2 s)) |
4417 |
{ |
4418 |
if (isconst(s)) { |
4419 |
COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); |
4420 |
return; |
4421 |
} |
4422 |
|
4423 |
CLOBBER_SUB; |
4424 |
s=readreg(s,2); |
4425 |
d=rmw(d,2,2); |
4426 |
|
4427 |
raw_sub_w(d,s); |
4428 |
unlock2(d); |
4429 |
unlock2(s); |
4430 |
} |
4431 |
MENDFUNC(2,sub_w,(RW2 d, R2 s)) |
4432 |
|
4433 |
MIDFUNC(2,sub_b,(RW1 d, R1 s)) |
4434 |
{ |
4435 |
if (isconst(s)) { |
4436 |
COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); |
4437 |
return; |
4438 |
} |
4439 |
|
4440 |
CLOBBER_SUB; |
4441 |
s=readreg(s,1); |
4442 |
d=rmw(d,1,1); |
4443 |
|
4444 |
raw_sub_b(d,s); |
4445 |
unlock2(d); |
4446 |
unlock2(s); |
4447 |
} |
4448 |
MENDFUNC(2,sub_b,(RW1 d, R1 s)) |
4449 |
|
4450 |
MIDFUNC(2,cmp_l,(R4 d, R4 s)) |
4451 |
{ |
4452 |
CLOBBER_CMP; |
4453 |
s=readreg(s,4); |
4454 |
d=readreg(d,4); |
4455 |
|
4456 |
raw_cmp_l(d,s); |
4457 |
unlock2(d); |
4458 |
unlock2(s); |
4459 |
} |
4460 |
MENDFUNC(2,cmp_l,(R4 d, R4 s)) |
4461 |
|
4462 |
MIDFUNC(2,cmp_l_ri,(R4 r, IMM i)) |
4463 |
{ |
4464 |
CLOBBER_CMP; |
4465 |
r=readreg(r,4); |
4466 |
|
4467 |
raw_cmp_l_ri(r,i); |
4468 |
unlock2(r); |
4469 |
} |
4470 |
MENDFUNC(2,cmp_l_ri,(R4 r, IMM i)) |
4471 |
|
4472 |
MIDFUNC(2,cmp_w,(R2 d, R2 s)) |
4473 |
{ |
4474 |
CLOBBER_CMP; |
4475 |
s=readreg(s,2); |
4476 |
d=readreg(d,2); |
4477 |
|
4478 |
raw_cmp_w(d,s); |
4479 |
unlock2(d); |
4480 |
unlock2(s); |
4481 |
} |
4482 |
MENDFUNC(2,cmp_w,(R2 d, R2 s)) |
4483 |
|
4484 |
MIDFUNC(2,cmp_b,(R1 d, R1 s)) |
4485 |
{ |
4486 |
CLOBBER_CMP; |
4487 |
s=readreg(s,1); |
4488 |
d=readreg(d,1); |
4489 |
|
4490 |
raw_cmp_b(d,s); |
4491 |
unlock2(d); |
4492 |
unlock2(s); |
4493 |
} |
4494 |
MENDFUNC(2,cmp_b,(R1 d, R1 s)) |
4495 |
|
4496 |
|
4497 |
MIDFUNC(2,xor_l,(RW4 d, R4 s)) |
4498 |
{ |
4499 |
CLOBBER_XOR; |
4500 |
s=readreg(s,4); |
4501 |
d=rmw(d,4,4); |
4502 |
|
4503 |
raw_xor_l(d,s); |
4504 |
unlock2(d); |
4505 |
unlock2(s); |
4506 |
} |
4507 |
MENDFUNC(2,xor_l,(RW4 d, R4 s)) |
4508 |
|
4509 |
MIDFUNC(2,xor_w,(RW2 d, R2 s)) |
4510 |
{ |
4511 |
CLOBBER_XOR; |
4512 |
s=readreg(s,2); |
4513 |
d=rmw(d,2,2); |
4514 |
|
4515 |
raw_xor_w(d,s); |
4516 |
unlock2(d); |
4517 |
unlock2(s); |
4518 |
} |
4519 |
MENDFUNC(2,xor_w,(RW2 d, R2 s)) |
4520 |
|
4521 |
MIDFUNC(2,xor_b,(RW1 d, R1 s)) |
4522 |
{ |
4523 |
CLOBBER_XOR; |
4524 |
s=readreg(s,1); |
4525 |
d=rmw(d,1,1); |
4526 |
|
4527 |
raw_xor_b(d,s); |
4528 |
unlock2(d); |
4529 |
unlock2(s); |
4530 |
} |
4531 |
MENDFUNC(2,xor_b,(RW1 d, R1 s)) |
4532 |
|
4533 |
MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) |
4534 |
{ |
4535 |
clobber_flags(); |
4536 |
remove_all_offsets(); |
4537 |
if (osize==4) { |
4538 |
if (out1!=in1 && out1!=r) { |
4539 |
COMPCALL(forget_about)(out1); |
4540 |
} |
4541 |
} |
4542 |
else { |
4543 |
tomem_c(out1); |
4544 |
} |
4545 |
|
4546 |
in1=readreg_specific(in1,isize,REG_PAR1); |
4547 |
r=readreg(r,4); |
4548 |
prepare_for_call_1(); /* This should ensure that there won't be |
4549 |
any need for swapping nregs in prepare_for_call_2 |
4550 |
*/ |
4551 |
#if USE_NORMAL_CALLING_CONVENTION |
4552 |
raw_push_l_r(in1); |
4553 |
#endif |
4554 |
unlock2(in1); |
4555 |
unlock2(r); |
4556 |
|
4557 |
prepare_for_call_2(); |
4558 |
raw_call_r(r); |
4559 |
|
4560 |
#if USE_NORMAL_CALLING_CONVENTION |
4561 |
raw_inc_sp(4); |
4562 |
#endif |
4563 |
|
4564 |
|
4565 |
live.nat[REG_RESULT].holds[0]=out1; |
4566 |
live.nat[REG_RESULT].nholds=1; |
4567 |
live.nat[REG_RESULT].touched=touchcnt++; |
4568 |
|
4569 |
live.state[out1].realreg=REG_RESULT; |
4570 |
live.state[out1].realind=0; |
4571 |
live.state[out1].val=0; |
4572 |
live.state[out1].validsize=osize; |
4573 |
live.state[out1].dirtysize=osize; |
4574 |
set_status(out1,DIRTY); |
4575 |
} |
4576 |
MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) |
4577 |
|
4578 |
MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) |
4579 |
{ |
4580 |
clobber_flags(); |
4581 |
remove_all_offsets(); |
4582 |
in1=readreg_specific(in1,isize1,REG_PAR1); |
4583 |
in2=readreg_specific(in2,isize2,REG_PAR2); |
4584 |
r=readreg(r,4); |
4585 |
prepare_for_call_1(); /* This should ensure that there won't be |
4586 |
any need for swapping nregs in prepare_for_call_2 |
4587 |
*/ |
4588 |
#if USE_NORMAL_CALLING_CONVENTION |
4589 |
raw_push_l_r(in2); |
4590 |
raw_push_l_r(in1); |
4591 |
#endif |
4592 |
unlock2(r); |
4593 |
unlock2(in1); |
4594 |
unlock2(in2); |
4595 |
prepare_for_call_2(); |
4596 |
raw_call_r(r); |
4597 |
#if USE_NORMAL_CALLING_CONVENTION |
4598 |
raw_inc_sp(8); |
4599 |
#endif |
4600 |
} |
4601 |
MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) |
4602 |
|
4603 |
/* forget_about() takes a mid-layer register */ |
4604 |
MIDFUNC(1,forget_about,(W4 r)) |
4605 |
{ |
4606 |
if (isinreg(r)) |
4607 |
disassociate(r); |
4608 |
live.state[r].val=0; |
4609 |
set_status(r,UNDEF); |
4610 |
} |
4611 |
MENDFUNC(1,forget_about,(W4 r)) |
4612 |
|
4613 |
MIDFUNC(0,nop,(void)) |
4614 |
{ |
4615 |
raw_nop(); |
4616 |
} |
4617 |
MENDFUNC(0,nop,(void)) |
4618 |
|
4619 |
|
4620 |
MIDFUNC(1,f_forget_about,(FW r)) |
4621 |
{ |
4622 |
if (f_isinreg(r)) |
4623 |
f_disassociate(r); |
4624 |
live.fate[r].status=UNDEF; |
4625 |
} |
4626 |
MENDFUNC(1,f_forget_about,(FW r)) |
4627 |
|
4628 |
MIDFUNC(1,fmov_pi,(FW r)) |
4629 |
{ |
4630 |
r=f_writereg(r); |
4631 |
raw_fmov_pi(r); |
4632 |
f_unlock(r); |
4633 |
} |
4634 |
MENDFUNC(1,fmov_pi,(FW r)) |
4635 |
|
4636 |
MIDFUNC(1,fmov_log10_2,(FW r)) |
4637 |
{ |
4638 |
r=f_writereg(r); |
4639 |
raw_fmov_log10_2(r); |
4640 |
f_unlock(r); |
4641 |
} |
4642 |
MENDFUNC(1,fmov_log10_2,(FW r)) |
4643 |
|
4644 |
MIDFUNC(1,fmov_log2_e,(FW r)) |
4645 |
{ |
4646 |
r=f_writereg(r); |
4647 |
raw_fmov_log2_e(r); |
4648 |
f_unlock(r); |
4649 |
} |
4650 |
MENDFUNC(1,fmov_log2_e,(FW r)) |
4651 |
|
4652 |
MIDFUNC(1,fmov_loge_2,(FW r)) |
4653 |
{ |
4654 |
r=f_writereg(r); |
4655 |
raw_fmov_loge_2(r); |
4656 |
f_unlock(r); |
4657 |
} |
4658 |
MENDFUNC(1,fmov_loge_2,(FW r)) |
4659 |
|
4660 |
MIDFUNC(1,fmov_1,(FW r)) |
4661 |
{ |
4662 |
r=f_writereg(r); |
4663 |
raw_fmov_1(r); |
4664 |
f_unlock(r); |
4665 |
} |
4666 |
MENDFUNC(1,fmov_1,(FW r)) |
4667 |
|
4668 |
MIDFUNC(1,fmov_0,(FW r)) |
4669 |
{ |
4670 |
r=f_writereg(r); |
4671 |
raw_fmov_0(r); |
4672 |
f_unlock(r); |
4673 |
} |
4674 |
MENDFUNC(1,fmov_0,(FW r)) |
4675 |
|
4676 |
MIDFUNC(2,fmov_rm,(FW r, MEMR m)) |
4677 |
{ |
4678 |
r=f_writereg(r); |
4679 |
raw_fmov_rm(r,m); |
4680 |
f_unlock(r); |
4681 |
} |
4682 |
MENDFUNC(2,fmov_rm,(FW r, MEMR m)) |
4683 |
|
4684 |
MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) |
4685 |
{ |
4686 |
r=f_writereg(r); |
4687 |
raw_fmovi_rm(r,m); |
4688 |
f_unlock(r); |
4689 |
} |
4690 |
MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) |
4691 |
|
4692 |
MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) |
4693 |
{ |
4694 |
r=f_readreg(r); |
4695 |
raw_fmovi_mr(m,r); |
4696 |
f_unlock(r); |
4697 |
} |
4698 |
MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) |
4699 |
|
4700 |
MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) |
4701 |
{ |
4702 |
r=f_writereg(r); |
4703 |
raw_fmovs_rm(r,m); |
4704 |
f_unlock(r); |
4705 |
} |
4706 |
MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) |
4707 |
|
4708 |
MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) |
4709 |
{ |
4710 |
r=f_readreg(r); |
4711 |
raw_fmovs_mr(m,r); |
4712 |
f_unlock(r); |
4713 |
} |
4714 |
MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) |
4715 |
|
4716 |
MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) |
4717 |
{ |
4718 |
r=f_readreg(r); |
4719 |
raw_fmov_ext_mr(m,r); |
4720 |
f_unlock(r); |
4721 |
} |
4722 |
MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) |
4723 |
|
4724 |
MIDFUNC(2,fmov_mr,(MEMW m, FR r)) |
4725 |
{ |
4726 |
r=f_readreg(r); |
4727 |
raw_fmov_mr(m,r); |
4728 |
f_unlock(r); |
4729 |
} |
4730 |
MENDFUNC(2,fmov_mr,(MEMW m, FR r)) |
4731 |
|
4732 |
MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) |
4733 |
{ |
4734 |
r=f_writereg(r); |
4735 |
raw_fmov_ext_rm(r,m); |
4736 |
f_unlock(r); |
4737 |
} |
4738 |
MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) |
4739 |
|
4740 |
MIDFUNC(2,fmov_rr,(FW d, FR s)) |
4741 |
{ |
4742 |
if (d==s) { /* How pointless! */ |
4743 |
return; |
4744 |
} |
4745 |
#if USE_F_ALIAS |
4746 |
f_disassociate(d); |
4747 |
s=f_readreg(s); |
4748 |
live.fate[d].realreg=s; |
4749 |
live.fate[d].realind=live.fat[s].nholds; |
4750 |
live.fate[d].status=DIRTY; |
4751 |
live.fat[s].holds[live.fat[s].nholds]=d; |
4752 |
live.fat[s].nholds++; |
4753 |
f_unlock(s); |
4754 |
#else |
4755 |
s=f_readreg(s); |
4756 |
d=f_writereg(d); |
4757 |
raw_fmov_rr(d,s); |
4758 |
f_unlock(s); |
4759 |
f_unlock(d); |
4760 |
#endif |
4761 |
} |
4762 |
MENDFUNC(2,fmov_rr,(FW d, FR s)) |
4763 |
|
4764 |
MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) |
4765 |
{ |
4766 |
index=readreg(index,4); |
4767 |
|
4768 |
raw_fldcw_m_indexed(index,base); |
4769 |
unlock2(index); |
4770 |
} |
4771 |
MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) |
4772 |
|
4773 |
MIDFUNC(1,ftst_r,(FR r)) |
4774 |
{ |
4775 |
r=f_readreg(r); |
4776 |
raw_ftst_r(r); |
4777 |
f_unlock(r); |
4778 |
} |
4779 |
MENDFUNC(1,ftst_r,(FR r)) |
4780 |
|
4781 |
MIDFUNC(0,dont_care_fflags,(void)) |
4782 |
{ |
4783 |
f_disassociate(FP_RESULT); |
4784 |
} |
4785 |
MENDFUNC(0,dont_care_fflags,(void)) |
4786 |
|
4787 |
MIDFUNC(2,fsqrt_rr,(FW d, FR s)) |
4788 |
{ |
4789 |
s=f_readreg(s); |
4790 |
d=f_writereg(d); |
4791 |
raw_fsqrt_rr(d,s); |
4792 |
f_unlock(s); |
4793 |
f_unlock(d); |
4794 |
} |
4795 |
MENDFUNC(2,fsqrt_rr,(FW d, FR s)) |
4796 |
|
4797 |
MIDFUNC(2,fabs_rr,(FW d, FR s)) |
4798 |
{ |
4799 |
s=f_readreg(s); |
4800 |
d=f_writereg(d); |
4801 |
raw_fabs_rr(d,s); |
4802 |
f_unlock(s); |
4803 |
f_unlock(d); |
4804 |
} |
4805 |
MENDFUNC(2,fabs_rr,(FW d, FR s)) |
4806 |
|
4807 |
MIDFUNC(2,fsin_rr,(FW d, FR s)) |
4808 |
{ |
4809 |
s=f_readreg(s); |
4810 |
d=f_writereg(d); |
4811 |
raw_fsin_rr(d,s); |
4812 |
f_unlock(s); |
4813 |
f_unlock(d); |
4814 |
} |
4815 |
MENDFUNC(2,fsin_rr,(FW d, FR s)) |
4816 |
|
4817 |
MIDFUNC(2,fcos_rr,(FW d, FR s)) |
4818 |
{ |
4819 |
s=f_readreg(s); |
4820 |
d=f_writereg(d); |
4821 |
raw_fcos_rr(d,s); |
4822 |
f_unlock(s); |
4823 |
f_unlock(d); |
4824 |
} |
4825 |
MENDFUNC(2,fcos_rr,(FW d, FR s)) |
4826 |
|
4827 |
MIDFUNC(2,ftwotox_rr,(FW d, FR s)) |
4828 |
{ |
4829 |
s=f_readreg(s); |
4830 |
d=f_writereg(d); |
4831 |
raw_ftwotox_rr(d,s); |
4832 |
f_unlock(s); |
4833 |
f_unlock(d); |
4834 |
} |
4835 |
MENDFUNC(2,ftwotox_rr,(FW d, FR s)) |
4836 |
|
4837 |
MIDFUNC(2,fetox_rr,(FW d, FR s)) |
4838 |
{ |
4839 |
s=f_readreg(s); |
4840 |
d=f_writereg(d); |
4841 |
raw_fetox_rr(d,s); |
4842 |
f_unlock(s); |
4843 |
f_unlock(d); |
4844 |
} |
4845 |
MENDFUNC(2,fetox_rr,(FW d, FR s)) |
4846 |
|
4847 |
MIDFUNC(2,frndint_rr,(FW d, FR s)) |
4848 |
{ |
4849 |
s=f_readreg(s); |
4850 |
d=f_writereg(d); |
4851 |
raw_frndint_rr(d,s); |
4852 |
f_unlock(s); |
4853 |
f_unlock(d); |
4854 |
} |
4855 |
MENDFUNC(2,frndint_rr,(FW d, FR s)) |
4856 |
|
4857 |
MIDFUNC(2,flog2_rr,(FW d, FR s)) |
4858 |
{ |
4859 |
s=f_readreg(s); |
4860 |
d=f_writereg(d); |
4861 |
raw_flog2_rr(d,s); |
4862 |
f_unlock(s); |
4863 |
f_unlock(d); |
4864 |
} |
4865 |
MENDFUNC(2,flog2_rr,(FW d, FR s)) |
4866 |
|
4867 |
MIDFUNC(2,fneg_rr,(FW d, FR s)) |
4868 |
{ |
4869 |
s=f_readreg(s); |
4870 |
d=f_writereg(d); |
4871 |
raw_fneg_rr(d,s); |
4872 |
f_unlock(s); |
4873 |
f_unlock(d); |
4874 |
} |
4875 |
MENDFUNC(2,fneg_rr,(FW d, FR s)) |
4876 |
|
4877 |
MIDFUNC(2,fadd_rr,(FRW d, FR s)) |
4878 |
{ |
4879 |
s=f_readreg(s); |
4880 |
d=f_rmw(d); |
4881 |
raw_fadd_rr(d,s); |
4882 |
f_unlock(s); |
4883 |
f_unlock(d); |
4884 |
} |
4885 |
MENDFUNC(2,fadd_rr,(FRW d, FR s)) |
4886 |
|
4887 |
MIDFUNC(2,fsub_rr,(FRW d, FR s)) |
4888 |
{ |
4889 |
s=f_readreg(s); |
4890 |
d=f_rmw(d); |
4891 |
raw_fsub_rr(d,s); |
4892 |
f_unlock(s); |
4893 |
f_unlock(d); |
4894 |
} |
4895 |
MENDFUNC(2,fsub_rr,(FRW d, FR s)) |
4896 |
|
4897 |
MIDFUNC(2,fcmp_rr,(FR d, FR s)) |
4898 |
{ |
4899 |
d=f_readreg(d); |
4900 |
s=f_readreg(s); |
4901 |
raw_fcmp_rr(d,s); |
4902 |
f_unlock(s); |
4903 |
f_unlock(d); |
4904 |
} |
4905 |
MENDFUNC(2,fcmp_rr,(FR d, FR s)) |
4906 |
|
4907 |
MIDFUNC(2,fdiv_rr,(FRW d, FR s)) |
4908 |
{ |
4909 |
s=f_readreg(s); |
4910 |
d=f_rmw(d); |
4911 |
raw_fdiv_rr(d,s); |
4912 |
f_unlock(s); |
4913 |
f_unlock(d); |
4914 |
} |
4915 |
MENDFUNC(2,fdiv_rr,(FRW d, FR s)) |
4916 |
|
4917 |
MIDFUNC(2,frem_rr,(FRW d, FR s)) |
4918 |
{ |
4919 |
s=f_readreg(s); |
4920 |
d=f_rmw(d); |
4921 |
raw_frem_rr(d,s); |
4922 |
f_unlock(s); |
4923 |
f_unlock(d); |
4924 |
} |
4925 |
MENDFUNC(2,frem_rr,(FRW d, FR s)) |
4926 |
|
4927 |
MIDFUNC(2,frem1_rr,(FRW d, FR s)) |
4928 |
{ |
4929 |
s=f_readreg(s); |
4930 |
d=f_rmw(d); |
4931 |
raw_frem1_rr(d,s); |
4932 |
f_unlock(s); |
4933 |
f_unlock(d); |
4934 |
} |
4935 |
MENDFUNC(2,frem1_rr,(FRW d, FR s)) |
4936 |
|
4937 |
MIDFUNC(2,fmul_rr,(FRW d, FR s)) |
4938 |
{ |
4939 |
s=f_readreg(s); |
4940 |
d=f_rmw(d); |
4941 |
raw_fmul_rr(d,s); |
4942 |
f_unlock(s); |
4943 |
f_unlock(d); |
4944 |
} |
4945 |
MENDFUNC(2,fmul_rr,(FRW d, FR s)) |
4946 |
|
4947 |
/******************************************************************** |
4948 |
* Support functions exposed to gencomp. CREATE time * |
4949 |
********************************************************************/ |
4950 |
|
4951 |
void set_zero(int r, int tmp) |
4952 |
{ |
4953 |
if (setzflg_uses_bsf) |
4954 |
bsf_l_rr(r,r); |
4955 |
else |
4956 |
simulate_bsf(tmp,r); |
4957 |
} |
4958 |
|
4959 |
int kill_rodent(int r) |
4960 |
{ |
4961 |
return KILLTHERAT && |
4962 |
have_rat_stall && |
4963 |
(live.state[r].status==INMEM || |
4964 |
live.state[r].status==CLEAN || |
4965 |
live.state[r].status==ISCONST || |
4966 |
live.state[r].dirtysize==4); |
4967 |
} |
4968 |
|
4969 |
uae_u32 get_const(int r) |
4970 |
{ |
4971 |
Dif (!isconst(r)) { |
4972 |
write_log("Register %d should be constant, but isn't\n",r); |
4973 |
abort(); |
4974 |
} |
4975 |
return live.state[r].val; |
4976 |
} |
4977 |
|
4978 |
void sync_m68k_pc(void) |
4979 |
{ |
4980 |
if (m68k_pc_offset) { |
4981 |
add_l_ri(PC_P,m68k_pc_offset); |
4982 |
comp_pc_p+=m68k_pc_offset; |
4983 |
m68k_pc_offset=0; |
4984 |
} |
4985 |
} |
4986 |
|
4987 |
/******************************************************************** |
4988 |
* Scratch registers management * |
4989 |
********************************************************************/ |
4990 |
|
4991 |
struct scratch_t { |
4992 |
uae_u32 regs[VREGS]; |
4993 |
fpu_register fregs[VFREGS]; |
4994 |
}; |
4995 |
|
4996 |
static scratch_t scratch; |
4997 |
|
4998 |
/******************************************************************** |
4999 |
* Support functions exposed to newcpu * |
5000 |
********************************************************************/ |
5001 |
|
5002 |
static inline const char *str_on_off(bool b) |
5003 |
{ |
5004 |
return b ? "on" : "off"; |
5005 |
} |
5006 |
|
5007 |
void compiler_init(void) |
5008 |
{ |
5009 |
static bool initialized = false; |
5010 |
if (initialized) |
5011 |
return; |
5012 |
|
5013 |
#if JIT_DEBUG |
5014 |
// JIT debug mode ? |
5015 |
JITDebug = PrefsFindBool("jitdebug"); |
5016 |
#endif |
5017 |
write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no"); |
5018 |
|
5019 |
#ifdef USE_JIT_FPU |
5020 |
// Use JIT compiler for FPU instructions ? |
5021 |
avoid_fpu = !PrefsFindBool("jitfpu"); |
5022 |
#else |
5023 |
// JIT FPU is always disabled |
5024 |
avoid_fpu = true; |
5025 |
#endif |
5026 |
write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no"); |
5027 |
|
5028 |
// Get size of the translation cache (in KB) |
5029 |
cache_size = PrefsFindInt32("jitcachesize"); |
5030 |
write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size); |
5031 |
|
5032 |
// Initialize target CPU (check for features, e.g. CMOV, rat stalls) |
5033 |
raw_init_cpu(); |
5034 |
setzflg_uses_bsf = target_check_bsf(); |
5035 |
write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no"); |
5036 |
write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no"); |
5037 |
write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps); |
5038 |
|
5039 |
// Translation cache flush mechanism |
5040 |
lazy_flush = PrefsFindBool("jitlazyflush"); |
5041 |
write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush)); |
5042 |
flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; |
5043 |
|
5044 |
// Compiler features |
5045 |
write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1)); |
5046 |
write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS)); |
5047 |
write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET)); |
5048 |
#if USE_INLINING |
5049 |
follow_const_jumps = PrefsFindBool("jitinline"); |
5050 |
#endif |
5051 |
write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps)); |
5052 |
write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA)); |
5053 |
|
5054 |
// Build compiler tables |
5055 |
build_comp(); |
5056 |
|
5057 |
initialized = true; |
5058 |
|
5059 |
#if PROFILE_UNTRANSLATED_INSNS |
5060 |
write_log("<JIT compiler> : gather statistics on untranslated insns count\n"); |
5061 |
#endif |
5062 |
|
5063 |
#if PROFILE_COMPILE_TIME |
5064 |
write_log("<JIT compiler> : gather statistics on translation time\n"); |
5065 |
emul_start_time = clock(); |
5066 |
#endif |
5067 |
} |
5068 |
|
5069 |
void compiler_exit(void) |
5070 |
{ |
5071 |
#if PROFILE_COMPILE_TIME |
5072 |
emul_end_time = clock(); |
5073 |
#endif |
5074 |
|
5075 |
// Deallocate translation cache |
5076 |
if (compiled_code) { |
5077 |
vm_release(compiled_code, cache_size * 1024); |
5078 |
compiled_code = 0; |
5079 |
} |
5080 |
|
5081 |
// Deallocate popallspace |
5082 |
if (popallspace) { |
5083 |
vm_release(popallspace, POPALLSPACE_SIZE); |
5084 |
popallspace = 0; |
5085 |
} |
5086 |
|
5087 |
#if PROFILE_COMPILE_TIME |
5088 |
write_log("### Compile Block statistics\n"); |
5089 |
write_log("Number of calls to compile_block : %d\n", compile_count); |
5090 |
uae_u32 emul_time = emul_end_time - emul_start_time; |
5091 |
write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC)); |
5092 |
write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC), |
5093 |
100.0*double(compile_time)/double(emul_time)); |
5094 |
write_log("\n"); |
5095 |
#endif |
5096 |
|
5097 |
#if PROFILE_UNTRANSLATED_INSNS |
5098 |
uae_u64 untranslated_count = 0; |
5099 |
for (int i = 0; i < 65536; i++) { |
5100 |
opcode_nums[i] = i; |
5101 |
untranslated_count += raw_cputbl_count[i]; |
5102 |
} |
5103 |
write_log("Sorting out untranslated instructions count...\n"); |
5104 |
qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); |
5105 |
write_log("\nRank Opc Count Name\n"); |
5106 |
for (int i = 0; i < untranslated_top_ten; i++) { |
5107 |
uae_u32 count = raw_cputbl_count[opcode_nums[i]]; |
5108 |
struct instr *dp; |
5109 |
struct mnemolookup *lookup; |
5110 |
if (!count) |
5111 |
break; |
5112 |
dp = table68k + opcode_nums[i]; |
5113 |
for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++) |
5114 |
; |
5115 |
write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name); |
5116 |
} |
5117 |
#endif |
5118 |
|
5119 |
#if RECORD_REGISTER_USAGE |
5120 |
int reg_count_ids[16]; |
5121 |
uint64 tot_reg_count = 0; |
5122 |
for (int i = 0; i < 16; i++) { |
5123 |
reg_count_ids[i] = i; |
5124 |
tot_reg_count += reg_count[i]; |
5125 |
} |
5126 |
qsort(reg_count_ids, 16, sizeof(int), reg_count_compare); |
5127 |
uint64 cum_reg_count = 0; |
5128 |
for (int i = 0; i < 16; i++) { |
5129 |
int r = reg_count_ids[i]; |
5130 |
cum_reg_count += reg_count[r]; |
5131 |
printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8, |
5132 |
reg_count[r], |
5133 |
100.0*double(reg_count[r])/double(tot_reg_count), |
5134 |
100.0*double(cum_reg_count)/double(tot_reg_count)); |
5135 |
} |
5136 |
#endif |
5137 |
} |
5138 |
|
5139 |
bool compiler_use_jit(void) |
5140 |
{ |
5141 |
// Check for the "jit" prefs item |
5142 |
if (!PrefsFindBool("jit")) |
5143 |
return false; |
5144 |
|
5145 |
// Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB |
5146 |
if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) { |
5147 |
write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE); |
5148 |
return false; |
5149 |
} |
5150 |
|
5151 |
// Enable JIT for 68020+ emulation only |
5152 |
if (CPUType < 2) { |
5153 |
write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType); |
5154 |
return false; |
5155 |
} |
5156 |
|
5157 |
return true; |
5158 |
} |
5159 |
|
5160 |
void init_comp(void) |
5161 |
{ |
5162 |
int i; |
5163 |
uae_s8* cb=can_byte; |
5164 |
uae_s8* cw=can_word; |
5165 |
uae_s8* au=always_used; |
5166 |
|
5167 |
#if RECORD_REGISTER_USAGE |
5168 |
for (i=0;i<16;i++) |
5169 |
reg_count_local[i] = 0; |
5170 |
#endif |
5171 |
|
5172 |
for (i=0;i<VREGS;i++) { |
5173 |
live.state[i].realreg=-1; |
5174 |
live.state[i].needflush=NF_SCRATCH; |
5175 |
live.state[i].val=0; |
5176 |
set_status(i,UNDEF); |
5177 |
} |
5178 |
|
5179 |
for (i=0;i<VFREGS;i++) { |
5180 |
live.fate[i].status=UNDEF; |
5181 |
live.fate[i].realreg=-1; |
5182 |
live.fate[i].needflush=NF_SCRATCH; |
5183 |
} |
5184 |
|
5185 |
for (i=0;i<VREGS;i++) { |
5186 |
if (i<16) { /* First 16 registers map to 68k registers */ |
5187 |
live.state[i].mem=((uae_u32*)®s)+i; |
5188 |
live.state[i].needflush=NF_TOMEM; |
5189 |
set_status(i,INMEM); |
5190 |
} |
5191 |
else |
5192 |
live.state[i].mem=scratch.regs+i; |
5193 |
} |
5194 |
live.state[PC_P].mem=(uae_u32*)&(regs.pc_p); |
5195 |
live.state[PC_P].needflush=NF_TOMEM; |
5196 |
set_const(PC_P,(uintptr)comp_pc_p); |
5197 |
|
5198 |
live.state[FLAGX].mem=(uae_u32*)&(regflags.x); |
5199 |
live.state[FLAGX].needflush=NF_TOMEM; |
5200 |
set_status(FLAGX,INMEM); |
5201 |
|
5202 |
live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv); |
5203 |
live.state[FLAGTMP].needflush=NF_TOMEM; |
5204 |
set_status(FLAGTMP,INMEM); |
5205 |
|
5206 |
live.state[NEXT_HANDLER].needflush=NF_HANDLER; |
5207 |
set_status(NEXT_HANDLER,UNDEF); |
5208 |
|
5209 |
for (i=0;i<VFREGS;i++) { |
5210 |
if (i<8) { /* First 8 registers map to 68k FPU registers */ |
5211 |
live.fate[i].mem=(uae_u32*)fpu_register_address(i); |
5212 |
live.fate[i].needflush=NF_TOMEM; |
5213 |
live.fate[i].status=INMEM; |
5214 |
} |
5215 |
else if (i==FP_RESULT) { |
5216 |
live.fate[i].mem=(uae_u32*)(&fpu.result); |
5217 |
live.fate[i].needflush=NF_TOMEM; |
5218 |
live.fate[i].status=INMEM; |
5219 |
} |
5220 |
else |
5221 |
live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]); |
5222 |
} |
5223 |
|
5224 |
|
5225 |
for (i=0;i<N_REGS;i++) { |
5226 |
live.nat[i].touched=0; |
5227 |
live.nat[i].nholds=0; |
5228 |
live.nat[i].locked=0; |
5229 |
if (*cb==i) { |
5230 |
live.nat[i].canbyte=1; cb++; |
5231 |
} else live.nat[i].canbyte=0; |
5232 |
if (*cw==i) { |
5233 |
live.nat[i].canword=1; cw++; |
5234 |
} else live.nat[i].canword=0; |
5235 |
if (*au==i) { |
5236 |
live.nat[i].locked=1; au++; |
5237 |
} |
5238 |
} |
5239 |
|
5240 |
for (i=0;i<N_FREGS;i++) { |
5241 |
live.fat[i].touched=0; |
5242 |
live.fat[i].nholds=0; |
5243 |
live.fat[i].locked=0; |
5244 |
} |
5245 |
|
5246 |
touchcnt=1; |
5247 |
m68k_pc_offset=0; |
5248 |
live.flags_in_flags=TRASH; |
5249 |
live.flags_on_stack=VALID; |
5250 |
live.flags_are_important=1; |
5251 |
|
5252 |
raw_fp_init(); |
5253 |
} |
5254 |
|
5255 |
/* Only do this if you really mean it! The next call should be to init!*/ |
5256 |
void flush(int save_regs) |
5257 |
{ |
5258 |
int fi,i; |
5259 |
|
5260 |
log_flush(); |
5261 |
flush_flags(); /* low level */ |
5262 |
sync_m68k_pc(); /* mid level */ |
5263 |
|
5264 |
if (save_regs) { |
5265 |
for (i=0;i<VFREGS;i++) { |
5266 |
if (live.fate[i].needflush==NF_SCRATCH || |
5267 |
live.fate[i].status==CLEAN) { |
5268 |
f_disassociate(i); |
5269 |
} |
5270 |
} |
5271 |
for (i=0;i<VREGS;i++) { |
5272 |
if (live.state[i].needflush==NF_TOMEM) { |
5273 |
switch(live.state[i].status) { |
5274 |
case INMEM: |
5275 |
if (live.state[i].val) { |
5276 |
raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val); |
5277 |
log_vwrite(i); |
5278 |
live.state[i].val=0; |
5279 |
} |
5280 |
break; |
5281 |
case CLEAN: |
5282 |
case DIRTY: |
5283 |
remove_offset(i,-1); tomem(i); break; |
5284 |
case ISCONST: |
5285 |
if (i!=PC_P) |
5286 |
writeback_const(i); |
5287 |
break; |
5288 |
default: break; |
5289 |
} |
5290 |
Dif (live.state[i].val && i!=PC_P) { |
5291 |
write_log("Register %d still has val %x\n", |
5292 |
i,live.state[i].val); |
5293 |
} |
5294 |
} |
5295 |
} |
5296 |
for (i=0;i<VFREGS;i++) { |
5297 |
if (live.fate[i].needflush==NF_TOMEM && |
5298 |
live.fate[i].status==DIRTY) { |
5299 |
f_evict(i); |
5300 |
} |
5301 |
} |
5302 |
raw_fp_cleanup_drop(); |
5303 |
} |
5304 |
if (needflags) { |
5305 |
write_log("Warning! flush with needflags=1!\n"); |
5306 |
} |
5307 |
} |
5308 |
|
5309 |
static void flush_keepflags(void) |
5310 |
{ |
5311 |
int fi,i; |
5312 |
|
5313 |
for (i=0;i<VFREGS;i++) { |
5314 |
if (live.fate[i].needflush==NF_SCRATCH || |
5315 |
live.fate[i].status==CLEAN) { |
5316 |
f_disassociate(i); |
5317 |
} |
5318 |
} |
5319 |
for (i=0;i<VREGS;i++) { |
5320 |
if (live.state[i].needflush==NF_TOMEM) { |
5321 |
switch(live.state[i].status) { |
5322 |
case INMEM: |
5323 |
/* Can't adjust the offset here --- that needs "add" */ |
5324 |
break; |
5325 |
case CLEAN: |
5326 |
case DIRTY: |
5327 |
remove_offset(i,-1); tomem(i); break; |
5328 |
case ISCONST: |
5329 |
if (i!=PC_P) |
5330 |
writeback_const(i); |
5331 |
break; |
5332 |
default: break; |
5333 |
} |
5334 |
} |
5335 |
} |
5336 |
for (i=0;i<VFREGS;i++) { |
5337 |
if (live.fate[i].needflush==NF_TOMEM && |
5338 |
live.fate[i].status==DIRTY) { |
5339 |
f_evict(i); |
5340 |
} |
5341 |
} |
5342 |
raw_fp_cleanup_drop(); |
5343 |
} |
5344 |
|
5345 |
void freescratch(void) |
5346 |
{ |
5347 |
int i; |
5348 |
for (i=0;i<N_REGS;i++) |
5349 |
if (live.nat[i].locked && i!=4) |
5350 |
write_log("Warning! %d is locked\n",i); |
5351 |
|
5352 |
for (i=0;i<VREGS;i++) |
5353 |
if (live.state[i].needflush==NF_SCRATCH) { |
5354 |
forget_about(i); |
5355 |
} |
5356 |
|
5357 |
for (i=0;i<VFREGS;i++) |
5358 |
if (live.fate[i].needflush==NF_SCRATCH) { |
5359 |
f_forget_about(i); |
5360 |
} |
5361 |
} |
5362 |
|
5363 |
/******************************************************************** |
5364 |
* Support functions, internal * |
5365 |
********************************************************************/ |
5366 |
|
5367 |
|
5368 |
static void align_target(uae_u32 a) |
5369 |
{ |
5370 |
if (!a) |
5371 |
return; |
5372 |
|
5373 |
if (tune_nop_fillers) |
5374 |
raw_emit_nop_filler(a - (((uintptr)target) & (a - 1))); |
5375 |
else { |
5376 |
/* Fill with NOPs --- makes debugging with gdb easier */ |
5377 |
while ((uintptr)target&(a-1)) |
5378 |
*target++=0x90; |
5379 |
} |
5380 |
} |
5381 |
|
5382 |
static __inline__ int isinrom(uintptr addr) |
5383 |
{ |
5384 |
return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); |
5385 |
} |
5386 |
|
5387 |
static void flush_all(void) |
5388 |
{ |
5389 |
int i; |
5390 |
|
5391 |
log_flush(); |
5392 |
for (i=0;i<VREGS;i++) |
5393 |
if (live.state[i].status==DIRTY) { |
5394 |
if (!call_saved[live.state[i].realreg]) { |
5395 |
tomem(i); |
5396 |
} |
5397 |
} |
5398 |
for (i=0;i<VFREGS;i++) |
5399 |
if (f_isinreg(i)) |
5400 |
f_evict(i); |
5401 |
raw_fp_cleanup_drop(); |
5402 |
} |
5403 |
|
5404 |
/* Make sure all registers that will get clobbered by a call are |
5405 |
save and sound in memory */ |
5406 |
static void prepare_for_call_1(void) |
5407 |
{ |
5408 |
flush_all(); /* If there are registers that don't get clobbered, |
5409 |
* we should be a bit more selective here */ |
5410 |
} |
5411 |
|
5412 |
/* We will call a C routine in a moment. That will clobber all registers, |
5413 |
so we need to disassociate everything */ |
5414 |
static void prepare_for_call_2(void) |
5415 |
{ |
5416 |
int i; |
5417 |
for (i=0;i<N_REGS;i++) |
5418 |
if (!call_saved[i] && live.nat[i].nholds>0) |
5419 |
free_nreg(i); |
5420 |
|
5421 |
for (i=0;i<N_FREGS;i++) |
5422 |
if (live.fat[i].nholds>0) |
5423 |
f_free_nreg(i); |
5424 |
|
5425 |
live.flags_in_flags=TRASH; /* Note: We assume we already rescued the |
5426 |
flags at the very start of the call_r |
5427 |
functions! */ |
5428 |
} |
5429 |
|
5430 |
/******************************************************************** |
5431 |
* Memory access and related functions, CREATE time * |
5432 |
********************************************************************/ |
5433 |
|
5434 |
void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond) |
5435 |
{ |
5436 |
next_pc_p=not_taken; |
5437 |
taken_pc_p=taken; |
5438 |
branch_cc=cond; |
5439 |
} |
5440 |
|
5441 |
|
5442 |
static uae_u32 get_handler_address(uae_u32 addr) |
5443 |
{ |
5444 |
uae_u32 cl=cacheline(addr); |
5445 |
blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); |
5446 |
return (uintptr)&(bi->direct_handler_to_use); |
5447 |
} |
5448 |
|
5449 |
static uae_u32 get_handler(uae_u32 addr) |
5450 |
{ |
5451 |
uae_u32 cl=cacheline(addr); |
5452 |
blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); |
5453 |
return (uintptr)bi->direct_handler_to_use; |
5454 |
} |
5455 |
|
5456 |
static void load_handler(int reg, uae_u32 addr) |
5457 |
{ |
5458 |
mov_l_rm(reg,get_handler_address(addr)); |
5459 |
} |
5460 |
|
5461 |
/* This version assumes that it is writing *real* memory, and *will* fail |
5462 |
* if that assumption is wrong! No branches, no second chances, just |
5463 |
* straight go-for-it attitude */ |
5464 |
|
5465 |
static void writemem_real(int address, int source, int size, int tmp, int clobber) |
5466 |
{ |
5467 |
int f=tmp; |
5468 |
|
5469 |
if (clobber) |
5470 |
f=source; |
5471 |
|
5472 |
switch(size) { |
5473 |
case 1: mov_b_bRr(address,source,MEMBaseDiff); break; |
5474 |
case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; |
5475 |
case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; |
5476 |
} |
5477 |
forget_about(tmp); |
5478 |
forget_about(f); |
5479 |
} |
5480 |
|
5481 |
void writebyte(int address, int source, int tmp) |
5482 |
{ |
5483 |
writemem_real(address,source,1,tmp,0); |
5484 |
} |
5485 |
|
5486 |
static __inline__ void writeword_general(int address, int source, int tmp, |
5487 |
int clobber) |
5488 |
{ |
5489 |
writemem_real(address,source,2,tmp,clobber); |
5490 |
} |
5491 |
|
5492 |
void writeword_clobber(int address, int source, int tmp) |
5493 |
{ |
5494 |
writeword_general(address,source,tmp,1); |
5495 |
} |
5496 |
|
5497 |
void writeword(int address, int source, int tmp) |
5498 |
{ |
5499 |
writeword_general(address,source,tmp,0); |
5500 |
} |
5501 |
|
5502 |
static __inline__ void writelong_general(int address, int source, int tmp, |
5503 |
int clobber) |
5504 |
{ |
5505 |
writemem_real(address,source,4,tmp,clobber); |
5506 |
} |
5507 |
|
5508 |
void writelong_clobber(int address, int source, int tmp) |
5509 |
{ |
5510 |
writelong_general(address,source,tmp,1); |
5511 |
} |
5512 |
|
5513 |
void writelong(int address, int source, int tmp) |
5514 |
{ |
5515 |
writelong_general(address,source,tmp,0); |
5516 |
} |
5517 |
|
5518 |
|
5519 |
|
5520 |
/* This version assumes that it is reading *real* memory, and *will* fail |
5521 |
* if that assumption is wrong! No branches, no second chances, just |
5522 |
* straight go-for-it attitude */ |
5523 |
|
5524 |
static void readmem_real(int address, int dest, int size, int tmp) |
5525 |
{ |
5526 |
int f=tmp; |
5527 |
|
5528 |
if (size==4 && address!=dest) |
5529 |
f=dest; |
5530 |
|
5531 |
switch(size) { |
5532 |
case 1: mov_b_brR(dest,address,MEMBaseDiff); break; |
5533 |
case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break; |
5534 |
case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break; |
5535 |
} |
5536 |
forget_about(tmp); |
5537 |
} |
5538 |
|
5539 |
void readbyte(int address, int dest, int tmp) |
5540 |
{ |
5541 |
readmem_real(address,dest,1,tmp); |
5542 |
} |
5543 |
|
5544 |
void readword(int address, int dest, int tmp) |
5545 |
{ |
5546 |
readmem_real(address,dest,2,tmp); |
5547 |
} |
5548 |
|
5549 |
void readlong(int address, int dest, int tmp) |
5550 |
{ |
5551 |
readmem_real(address,dest,4,tmp); |
5552 |
} |
5553 |
|
5554 |
void get_n_addr(int address, int dest, int tmp) |
5555 |
{ |
5556 |
// a is the register containing the virtual address |
5557 |
// after the offset had been fetched |
5558 |
int a=tmp; |
5559 |
|
5560 |
// f is the register that will contain the offset |
5561 |
int f=tmp; |
5562 |
|
5563 |
// a == f == tmp if (address == dest) |
5564 |
if (address!=dest) { |
5565 |
a=address; |
5566 |
f=dest; |
5567 |
} |
5568 |
|
5569 |
#if REAL_ADDRESSING |
5570 |
mov_l_rr(dest, address); |
5571 |
#elif DIRECT_ADDRESSING |
5572 |
lea_l_brr(dest,address,MEMBaseDiff); |
5573 |
#endif |
5574 |
forget_about(tmp); |
5575 |
} |
5576 |
|
5577 |
void get_n_addr_jmp(int address, int dest, int tmp) |
5578 |
{ |
5579 |
/* For this, we need to get the same address as the rest of UAE |
5580 |
would --- otherwise we end up translating everything twice */ |
5581 |
get_n_addr(address,dest,tmp); |
5582 |
} |
5583 |
|
5584 |
|
5585 |
/* base is a register, but dp is an actual value. |
5586 |
target is a register, as is tmp */ |
5587 |
void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) |
5588 |
{ |
5589 |
int reg = (dp >> 12) & 15; |
5590 |
int regd_shift=(dp >> 9) & 3; |
5591 |
|
5592 |
if (dp & 0x100) { |
5593 |
int ignorebase=(dp&0x80); |
5594 |
int ignorereg=(dp&0x40); |
5595 |
int addbase=0; |
5596 |
int outer=0; |
5597 |
|
5598 |
if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); |
5599 |
if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); |
5600 |
|
5601 |
if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); |
5602 |
if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); |
5603 |
|
5604 |
if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ |
5605 |
if (!ignorereg) { |
5606 |
if ((dp & 0x800) == 0) |
5607 |
sign_extend_16_rr(target,reg); |
5608 |
else |
5609 |
mov_l_rr(target,reg); |
5610 |
shll_l_ri(target,regd_shift); |
5611 |
} |
5612 |
else |
5613 |
mov_l_ri(target,0); |
5614 |
|
5615 |
/* target is now regd */ |
5616 |
if (!ignorebase) |
5617 |
add_l(target,base); |
5618 |
add_l_ri(target,addbase); |
5619 |
if (dp&0x03) readlong(target,target,tmp); |
5620 |
} else { /* do the getlong first, then add regd */ |
5621 |
if (!ignorebase) { |
5622 |
mov_l_rr(target,base); |
5623 |
add_l_ri(target,addbase); |
5624 |
} |
5625 |
else |
5626 |
mov_l_ri(target,addbase); |
5627 |
if (dp&0x03) readlong(target,target,tmp); |
5628 |
|
5629 |
if (!ignorereg) { |
5630 |
if ((dp & 0x800) == 0) |
5631 |
sign_extend_16_rr(tmp,reg); |
5632 |
else |
5633 |
mov_l_rr(tmp,reg); |
5634 |
shll_l_ri(tmp,regd_shift); |
5635 |
/* tmp is now regd */ |
5636 |
add_l(target,tmp); |
5637 |
} |
5638 |
} |
5639 |
add_l_ri(target,outer); |
5640 |
} |
5641 |
else { /* 68000 version */ |
5642 |
if ((dp & 0x800) == 0) { /* Sign extend */ |
5643 |
sign_extend_16_rr(target,reg); |
5644 |
lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp)); |
5645 |
} |
5646 |
else { |
5647 |
lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp)); |
5648 |
} |
5649 |
} |
5650 |
forget_about(tmp); |
5651 |
} |
5652 |
|
5653 |
|
5654 |
|
5655 |
|
5656 |
|
5657 |
void set_cache_state(int enabled) |
5658 |
{ |
5659 |
if (enabled!=letit) |
5660 |
flush_icache_hard(77); |
5661 |
letit=enabled; |
5662 |
} |
5663 |
|
5664 |
int get_cache_state(void) |
5665 |
{ |
5666 |
return letit; |
5667 |
} |
5668 |
|
5669 |
uae_u32 get_jitted_size(void) |
5670 |
{ |
5671 |
if (compiled_code) |
5672 |
return current_compile_p-compiled_code; |
5673 |
return 0; |
5674 |
} |
5675 |
|
5676 |
const int CODE_ALLOC_MAX_ATTEMPTS = 10; |
5677 |
const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB |
5678 |
|
5679 |
static uint8 *do_alloc_code(uint32 size, int depth) |
5680 |
{ |
5681 |
#if defined(__linux__) && 0 |
5682 |
/* |
5683 |
This is a really awful hack that is known to work on Linux at |
5684 |
least. |
5685 |
|
5686 |
The trick here is to make sure the allocated cache is nearby |
5687 |
code segment, and more precisely in the positive half of a |
5688 |
32-bit address space. i.e. addr < 0x80000000. Actually, it |
5689 |
turned out that a 32-bit binary run on AMD64 yields a cache |
5690 |
allocated around 0xa0000000, thus causing some troubles when |
5691 |
translating addresses from m68k to x86. |
5692 |
*/ |
5693 |
static uint8 * code_base = NULL; |
5694 |
if (code_base == NULL) { |
5695 |
uintptr page_size = getpagesize(); |
5696 |
uintptr boundaries = CODE_ALLOC_BOUNDARIES; |
5697 |
if (boundaries < page_size) |
5698 |
boundaries = page_size; |
5699 |
code_base = (uint8 *)sbrk(0); |
5700 |
for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) { |
5701 |
if (vm_acquire_fixed(code_base, size) == 0) { |
5702 |
uint8 *code = code_base; |
5703 |
code_base += size; |
5704 |
return code; |
5705 |
} |
5706 |
code_base += boundaries; |
5707 |
} |
5708 |
return NULL; |
5709 |
} |
5710 |
|
5711 |
if (vm_acquire_fixed(code_base, size) == 0) { |
5712 |
uint8 *code = code_base; |
5713 |
code_base += size; |
5714 |
return code; |
5715 |
} |
5716 |
|
5717 |
if (depth >= CODE_ALLOC_MAX_ATTEMPTS) |
5718 |
return NULL; |
5719 |
|
5720 |
return do_alloc_code(size, depth + 1); |
5721 |
#else |
5722 |
uint8 *code = (uint8 *)vm_acquire(size); |
5723 |
return code == VM_MAP_FAILED ? NULL : code; |
5724 |
#endif |
5725 |
} |
5726 |
|
5727 |
static inline uint8 *alloc_code(uint32 size) |
5728 |
{ |
5729 |
uint8 *ptr = do_alloc_code(size, 0); |
5730 |
/* allocated code must fit in 32-bit boundaries */ |
5731 |
assert((uintptr)ptr <= 0xffffffff); |
5732 |
return ptr; |
5733 |
} |
5734 |
|
5735 |
void alloc_cache(void) |
5736 |
{ |
5737 |
if (compiled_code) { |
5738 |
flush_icache_hard(6); |
5739 |
vm_release(compiled_code, cache_size * 1024); |
5740 |
compiled_code = 0; |
5741 |
} |
5742 |
|
5743 |
if (cache_size == 0) |
5744 |
return; |
5745 |
|
5746 |
while (!compiled_code && cache_size) { |
5747 |
if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) { |
5748 |
compiled_code = 0; |
5749 |
cache_size /= 2; |
5750 |
} |
5751 |
} |
5752 |
vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); |
5753 |
|
5754 |
if (compiled_code) { |
5755 |
write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code); |
5756 |
max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST; |
5757 |
current_compile_p = compiled_code; |
5758 |
current_cache_size = 0; |
5759 |
} |
5760 |
} |
5761 |
|
5762 |
|
5763 |
|
5764 |
extern void op_illg_1 (uae_u32 opcode) REGPARAM; |
5765 |
|
5766 |
static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) |
5767 |
{ |
5768 |
uae_u32 k1 = 0; |
5769 |
uae_u32 k2 = 0; |
5770 |
|
5771 |
#if USE_CHECKSUM_INFO |
5772 |
checksum_info *csi = bi->csi; |
5773 |
Dif(!csi) abort(); |
5774 |
while (csi) { |
5775 |
uae_s32 len = csi->length; |
5776 |
uintptr tmp = (uintptr)csi->start_p; |
5777 |
#else |
5778 |
uae_s32 len = bi->len; |
5779 |
uintptr tmp = (uintptr)bi->min_pcp; |
5780 |
#endif |
5781 |
uae_u32*pos; |
5782 |
|
5783 |
len += (tmp & 3); |
5784 |
tmp &= ~((uintptr)3); |
5785 |
pos = (uae_u32 *)tmp; |
5786 |
|
5787 |
if (len >= 0 && len <= MAX_CHECKSUM_LEN) { |
5788 |
while (len > 0) { |
5789 |
k1 += *pos; |
5790 |
k2 ^= *pos; |
5791 |
pos++; |
5792 |
len -= 4; |
5793 |
} |
5794 |
} |
5795 |
|
5796 |
#if USE_CHECKSUM_INFO |
5797 |
csi = csi->next; |
5798 |
} |
5799 |
#endif |
5800 |
|
5801 |
*c1 = k1; |
5802 |
*c2 = k2; |
5803 |
} |
5804 |
|
5805 |
#if 0 |
5806 |
static void show_checksum(CSI_TYPE* csi) |
5807 |
{ |
5808 |
uae_u32 k1=0; |
5809 |
uae_u32 k2=0; |
5810 |
uae_s32 len=CSI_LENGTH(csi); |
5811 |
uae_u32 tmp=(uintptr)CSI_START_P(csi); |
5812 |
uae_u32* pos; |
5813 |
|
5814 |
len+=(tmp&3); |
5815 |
tmp&=(~3); |
5816 |
pos=(uae_u32*)tmp; |
5817 |
|
5818 |
if (len<0 || len>MAX_CHECKSUM_LEN) { |
5819 |
return; |
5820 |
} |
5821 |
else { |
5822 |
while (len>0) { |
5823 |
write_log("%08x ",*pos); |
5824 |
pos++; |
5825 |
len-=4; |
5826 |
} |
5827 |
write_log(" bla\n"); |
5828 |
} |
5829 |
} |
5830 |
#endif |
5831 |
|
5832 |
|
5833 |
int check_for_cache_miss(void) |
5834 |
{ |
5835 |
blockinfo* bi=get_blockinfo_addr(regs.pc_p); |
5836 |
|
5837 |
if (bi) { |
5838 |
int cl=cacheline(regs.pc_p); |
5839 |
if (bi!=cache_tags[cl+1].bi) { |
5840 |
raise_in_cl_list(bi); |
5841 |
return 1; |
5842 |
} |
5843 |
} |
5844 |
return 0; |
5845 |
} |
5846 |
|
5847 |
|
5848 |
static void recompile_block(void) |
5849 |
{ |
5850 |
/* An existing block's countdown code has expired. We need to make |
5851 |
sure that execute_normal doesn't refuse to recompile due to a |
5852 |
perceived cache miss... */ |
5853 |
blockinfo* bi=get_blockinfo_addr(regs.pc_p); |
5854 |
|
5855 |
Dif (!bi) |
5856 |
abort(); |
5857 |
raise_in_cl_list(bi); |
5858 |
execute_normal(); |
5859 |
return; |
5860 |
} |
5861 |
static void cache_miss(void) |
5862 |
{ |
5863 |
blockinfo* bi=get_blockinfo_addr(regs.pc_p); |
5864 |
uae_u32 cl=cacheline(regs.pc_p); |
5865 |
blockinfo* bi2=get_blockinfo(cl); |
5866 |
|
5867 |
if (!bi) { |
5868 |
execute_normal(); /* Compile this block now */ |
5869 |
return; |
5870 |
} |
5871 |
Dif (!bi2 || bi==bi2) { |
5872 |
write_log("Unexplained cache miss %p %p\n",bi,bi2); |
5873 |
abort(); |
5874 |
} |
5875 |
raise_in_cl_list(bi); |
5876 |
return; |
5877 |
} |
5878 |
|
5879 |
static int called_check_checksum(blockinfo* bi); |
5880 |
|
5881 |
static inline int block_check_checksum(blockinfo* bi) |
5882 |
{ |
5883 |
uae_u32 c1,c2; |
5884 |
bool isgood; |
5885 |
|
5886 |
if (bi->status!=BI_NEED_CHECK) |
5887 |
return 1; /* This block is in a checked state */ |
5888 |
|
5889 |
checksum_count++; |
5890 |
|
5891 |
if (bi->c1 || bi->c2) |
5892 |
calc_checksum(bi,&c1,&c2); |
5893 |
else { |
5894 |
c1=c2=1; /* Make sure it doesn't match */ |
5895 |
} |
5896 |
|
5897 |
isgood=(c1==bi->c1 && c2==bi->c2); |
5898 |
|
5899 |
if (isgood) { |
5900 |
/* This block is still OK. So we reactivate. Of course, that |
5901 |
means we have to move it into the needs-to-be-flushed list */ |
5902 |
bi->handler_to_use=bi->handler; |
5903 |
set_dhtu(bi,bi->direct_handler); |
5904 |
bi->status=BI_CHECKING; |
5905 |
isgood=called_check_checksum(bi); |
5906 |
} |
5907 |
if (isgood) { |
5908 |
/* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, |
5909 |
c1,c2,bi->c1,bi->c2);*/ |
5910 |
remove_from_list(bi); |
5911 |
add_to_active(bi); |
5912 |
raise_in_cl_list(bi); |
5913 |
bi->status=BI_ACTIVE; |
5914 |
} |
5915 |
else { |
5916 |
/* This block actually changed. We need to invalidate it, |
5917 |
and set it up to be recompiled */ |
5918 |
/* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, |
5919 |
c1,c2,bi->c1,bi->c2); */ |
5920 |
invalidate_block(bi); |
5921 |
raise_in_cl_list(bi); |
5922 |
} |
5923 |
return isgood; |
5924 |
} |
5925 |
|
5926 |
static int called_check_checksum(blockinfo* bi) |
5927 |
{ |
5928 |
dependency* x=bi->deplist; |
5929 |
int isgood=1; |
5930 |
int i; |
5931 |
|
5932 |
for (i=0;i<2 && isgood;i++) { |
5933 |
if (bi->dep[i].jmp_off) { |
5934 |
isgood=block_check_checksum(bi->dep[i].target); |
5935 |
} |
5936 |
} |
5937 |
return isgood; |
5938 |
} |
5939 |
|
5940 |
static void check_checksum(void) |
5941 |
{ |
5942 |
blockinfo* bi=get_blockinfo_addr(regs.pc_p); |
5943 |
uae_u32 cl=cacheline(regs.pc_p); |
5944 |
blockinfo* bi2=get_blockinfo(cl); |
5945 |
|
5946 |
/* These are not the droids you are looking for... */ |
5947 |
if (!bi) { |
5948 |
/* Whoever is the primary target is in a dormant state, but |
5949 |
calling it was accidental, and we should just compile this |
5950 |
new block */ |
5951 |
execute_normal(); |
5952 |
return; |
5953 |
} |
5954 |
if (bi!=bi2) { |
5955 |
/* The block was hit accidentally, but it does exist. Cache miss */ |
5956 |
cache_miss(); |
5957 |
return; |
5958 |
} |
5959 |
|
5960 |
if (!block_check_checksum(bi)) |
5961 |
execute_normal(); |
5962 |
} |
5963 |
|
5964 |
static __inline__ void match_states(blockinfo* bi) |
5965 |
{ |
5966 |
int i; |
5967 |
smallstate* s=&(bi->env); |
5968 |
|
5969 |
if (bi->status==BI_NEED_CHECK) { |
5970 |
block_check_checksum(bi); |
5971 |
} |
5972 |
if (bi->status==BI_ACTIVE || |
5973 |
bi->status==BI_FINALIZING) { /* Deal with the *promises* the |
5974 |
block makes (about not using |
5975 |
certain vregs) */ |
5976 |
for (i=0;i<16;i++) { |
5977 |
if (s->virt[i]==L_UNNEEDED) { |
5978 |
// write_log("unneeded reg %d at %p\n",i,target); |
5979 |
COMPCALL(forget_about)(i); // FIXME |
5980 |
} |
5981 |
} |
5982 |
} |
5983 |
flush(1); |
5984 |
|
5985 |
/* And now deal with the *demands* the block makes */ |
5986 |
for (i=0;i<N_REGS;i++) { |
5987 |
int v=s->nat[i]; |
5988 |
if (v>=0) { |
5989 |
// printf("Loading reg %d into %d at %p\n",v,i,target); |
5990 |
readreg_specific(v,4,i); |
5991 |
// do_load_reg(i,v); |
5992 |
// setlock(i); |
5993 |
} |
5994 |
} |
5995 |
for (i=0;i<N_REGS;i++) { |
5996 |
int v=s->nat[i]; |
5997 |
if (v>=0) { |
5998 |
unlock2(i); |
5999 |
} |
6000 |
} |
6001 |
} |
6002 |
|
6003 |
static __inline__ void create_popalls(void) |
6004 |
{ |
6005 |
int i,r; |
6006 |
|
6007 |
if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { |
6008 |
write_log("FATAL: Could not allocate popallspace!\n"); |
6009 |
abort(); |
6010 |
} |
6011 |
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); |
6012 |
|
6013 |
int stack_space = STACK_OFFSET; |
6014 |
for (i=0;i<N_REGS;i++) { |
6015 |
if (need_to_preserve[i]) |
6016 |
stack_space += sizeof(void *); |
6017 |
} |
6018 |
stack_space %= STACK_ALIGN; |
6019 |
if (stack_space) |
6020 |
stack_space = STACK_ALIGN - stack_space; |
6021 |
|
6022 |
current_compile_p=popallspace; |
6023 |
set_target(current_compile_p); |
6024 |
|
6025 |
/* We need to guarantee 16-byte stack alignment on x86 at any point |
6026 |
within the JIT generated code. We have multiple exit points |
6027 |
possible but a single entry. A "jmp" is used so that we don't |
6028 |
have to generate stack alignment in generated code that has to |
6029 |
call external functions (e.g. a generic instruction handler). |
6030 |
|
6031 |
In summary, JIT generated code is not leaf so we have to deal |
6032 |
with it here to maintain correct stack alignment. */ |
6033 |
align_target(align_jumps); |
6034 |
current_compile_p=get_target(); |
6035 |
pushall_call_handler=get_target(); |
6036 |
for (i=N_REGS;i--;) { |
6037 |
if (need_to_preserve[i]) |
6038 |
raw_push_l_r(i); |
6039 |
} |
6040 |
raw_dec_sp(stack_space); |
6041 |
r=REG_PC_TMP; |
6042 |
raw_mov_l_rm(r,(uintptr)®s.pc_p); |
6043 |
raw_and_l_ri(r,TAGMASK); |
6044 |
raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P); |
6045 |
|
6046 |
/* now the exit points */ |
6047 |
align_target(align_jumps); |
6048 |
popall_do_nothing=get_target(); |
6049 |
raw_inc_sp(stack_space); |
6050 |
for (i=0;i<N_REGS;i++) { |
6051 |
if (need_to_preserve[i]) |
6052 |
raw_pop_l_r(i); |
6053 |
} |
6054 |
raw_jmp((uintptr)do_nothing); |
6055 |
|
6056 |
align_target(align_jumps); |
6057 |
popall_execute_normal=get_target(); |
6058 |
raw_inc_sp(stack_space); |
6059 |
for (i=0;i<N_REGS;i++) { |
6060 |
if (need_to_preserve[i]) |
6061 |
raw_pop_l_r(i); |
6062 |
} |
6063 |
raw_jmp((uintptr)execute_normal); |
6064 |
|
6065 |
align_target(align_jumps); |
6066 |
popall_cache_miss=get_target(); |
6067 |
raw_inc_sp(stack_space); |
6068 |
for (i=0;i<N_REGS;i++) { |
6069 |
if (need_to_preserve[i]) |
6070 |
raw_pop_l_r(i); |
6071 |
} |
6072 |
raw_jmp((uintptr)cache_miss); |
6073 |
|
6074 |
align_target(align_jumps); |
6075 |
popall_recompile_block=get_target(); |
6076 |
raw_inc_sp(stack_space); |
6077 |
for (i=0;i<N_REGS;i++) { |
6078 |
if (need_to_preserve[i]) |
6079 |
raw_pop_l_r(i); |
6080 |
} |
6081 |
raw_jmp((uintptr)recompile_block); |
6082 |
|
6083 |
align_target(align_jumps); |
6084 |
popall_exec_nostats=get_target(); |
6085 |
raw_inc_sp(stack_space); |
6086 |
for (i=0;i<N_REGS;i++) { |
6087 |
if (need_to_preserve[i]) |
6088 |
raw_pop_l_r(i); |
6089 |
} |
6090 |
raw_jmp((uintptr)exec_nostats); |
6091 |
|
6092 |
align_target(align_jumps); |
6093 |
popall_check_checksum=get_target(); |
6094 |
raw_inc_sp(stack_space); |
6095 |
for (i=0;i<N_REGS;i++) { |
6096 |
if (need_to_preserve[i]) |
6097 |
raw_pop_l_r(i); |
6098 |
} |
6099 |
raw_jmp((uintptr)check_checksum); |
6100 |
|
6101 |
// no need to further write into popallspace |
6102 |
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE); |
6103 |
} |
6104 |
|
6105 |
static __inline__ void reset_lists(void) |
6106 |
{ |
6107 |
int i; |
6108 |
|
6109 |
for (i=0;i<MAX_HOLD_BI;i++) |
6110 |
hold_bi[i]=NULL; |
6111 |
active=NULL; |
6112 |
dormant=NULL; |
6113 |
} |
6114 |
|
6115 |
static void prepare_block(blockinfo* bi) |
6116 |
{ |
6117 |
int i; |
6118 |
|
6119 |
set_target(current_compile_p); |
6120 |
align_target(align_jumps); |
6121 |
bi->direct_pen=(cpuop_func *)get_target(); |
6122 |
raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); |
6123 |
raw_mov_l_mr((uintptr)®s.pc_p,0); |
6124 |
raw_jmp((uintptr)popall_execute_normal); |
6125 |
|
6126 |
align_target(align_jumps); |
6127 |
bi->direct_pcc=(cpuop_func *)get_target(); |
6128 |
raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); |
6129 |
raw_mov_l_mr((uintptr)®s.pc_p,0); |
6130 |
raw_jmp((uintptr)popall_check_checksum); |
6131 |
current_compile_p=get_target(); |
6132 |
|
6133 |
bi->deplist=NULL; |
6134 |
for (i=0;i<2;i++) { |
6135 |
bi->dep[i].prev_p=NULL; |
6136 |
bi->dep[i].next=NULL; |
6137 |
} |
6138 |
bi->env=default_ss; |
6139 |
bi->status=BI_INVALID; |
6140 |
bi->havestate=0; |
6141 |
//bi->env=empty_ss; |
6142 |
} |
6143 |
|
6144 |
// OPCODE is in big endian format, use cft_map() beforehand, if needed. |
6145 |
static inline void reset_compop(int opcode) |
6146 |
{ |
6147 |
compfunctbl[opcode] = NULL; |
6148 |
nfcompfunctbl[opcode] = NULL; |
6149 |
} |
6150 |
|
6151 |
static int read_opcode(const char *p) |
6152 |
{ |
6153 |
int opcode = 0; |
6154 |
for (int i = 0; i < 4; i++) { |
6155 |
int op = p[i]; |
6156 |
switch (op) { |
6157 |
case '0': case '1': case '2': case '3': case '4': |
6158 |
case '5': case '6': case '7': case '8': case '9': |
6159 |
opcode = (opcode << 4) | (op - '0'); |
6160 |
break; |
6161 |
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
6162 |
opcode = (opcode << 4) | ((op - 'a') + 10); |
6163 |
break; |
6164 |
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
6165 |
opcode = (opcode << 4) | ((op - 'A') + 10); |
6166 |
break; |
6167 |
default: |
6168 |
return -1; |
6169 |
} |
6170 |
} |
6171 |
return opcode; |
6172 |
} |
6173 |
|
6174 |
static bool merge_blacklist() |
6175 |
{ |
6176 |
const char *blacklist = PrefsFindString("jitblacklist"); |
6177 |
if (blacklist) { |
6178 |
const char *p = blacklist; |
6179 |
for (;;) { |
6180 |
if (*p == 0) |
6181 |
return true; |
6182 |
|
6183 |
int opcode1 = read_opcode(p); |
6184 |
if (opcode1 < 0) |
6185 |
return false; |
6186 |
p += 4; |
6187 |
|
6188 |
int opcode2 = opcode1; |
6189 |
if (*p == '-') { |
6190 |
p++; |
6191 |
opcode2 = read_opcode(p); |
6192 |
if (opcode2 < 0) |
6193 |
return false; |
6194 |
p += 4; |
6195 |
} |
6196 |
|
6197 |
if (*p == 0 || *p == ',' || *p == ';') { |
6198 |
write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2); |
6199 |
for (int opcode = opcode1; opcode <= opcode2; opcode++) |
6200 |
reset_compop(cft_map(opcode)); |
6201 |
|
6202 |
if (*p == ',' || *p++ == ';') |
6203 |
continue; |
6204 |
|
6205 |
return true; |
6206 |
} |
6207 |
|
6208 |
return false; |
6209 |
} |
6210 |
} |
6211 |
return true; |
6212 |
} |
6213 |
|
6214 |
void build_comp(void) |
6215 |
{ |
6216 |
int i; |
6217 |
int jumpcount=0; |
6218 |
unsigned long opcode; |
6219 |
struct comptbl* tbl=op_smalltbl_0_comp_ff; |
6220 |
struct comptbl* nftbl=op_smalltbl_0_comp_nf; |
6221 |
int count; |
6222 |
int cpu_level = 0; // 68000 (default) |
6223 |
if (CPUType == 4) |
6224 |
cpu_level = 4; // 68040 with FPU |
6225 |
else { |
6226 |
if (FPUType) |
6227 |
cpu_level = 3; // 68020 with FPU |
6228 |
else if (CPUType >= 2) |
6229 |
cpu_level = 2; // 68020 |
6230 |
else if (CPUType == 1) |
6231 |
cpu_level = 1; |
6232 |
} |
6233 |
struct cputbl *nfctbl = ( |
6234 |
cpu_level == 4 ? op_smalltbl_0_nf |
6235 |
: cpu_level == 3 ? op_smalltbl_1_nf |
6236 |
: cpu_level == 2 ? op_smalltbl_2_nf |
6237 |
: cpu_level == 1 ? op_smalltbl_3_nf |
6238 |
: op_smalltbl_4_nf); |
6239 |
|
6240 |
write_log ("<JIT compiler> : building compiler function tables\n"); |
6241 |
|
6242 |
for (opcode = 0; opcode < 65536; opcode++) { |
6243 |
reset_compop(opcode); |
6244 |
nfcpufunctbl[opcode] = op_illg_1; |
6245 |
prop[opcode].use_flags = 0x1f; |
6246 |
prop[opcode].set_flags = 0x1f; |
6247 |
prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap |
6248 |
} |
6249 |
|
6250 |
for (i = 0; tbl[i].opcode < 65536; i++) { |
6251 |
int cflow = table68k[tbl[i].opcode].cflow; |
6252 |
if (follow_const_jumps && (tbl[i].specific & 16)) |
6253 |
cflow = fl_const_jump; |
6254 |
else |
6255 |
cflow &= ~fl_const_jump; |
6256 |
prop[cft_map(tbl[i].opcode)].cflow = cflow; |
6257 |
|
6258 |
int uses_fpu = tbl[i].specific & 32; |
6259 |
if (uses_fpu && avoid_fpu) |
6260 |
compfunctbl[cft_map(tbl[i].opcode)] = NULL; |
6261 |
else |
6262 |
compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; |
6263 |
} |
6264 |
|
6265 |
for (i = 0; nftbl[i].opcode < 65536; i++) { |
6266 |
int uses_fpu = tbl[i].specific & 32; |
6267 |
if (uses_fpu && avoid_fpu) |
6268 |
nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; |
6269 |
else |
6270 |
nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; |
6271 |
|
6272 |
nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; |
6273 |
} |
6274 |
|
6275 |
for (i = 0; nfctbl[i].handler; i++) { |
6276 |
nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; |
6277 |
} |
6278 |
|
6279 |
for (opcode = 0; opcode < 65536; opcode++) { |
6280 |
compop_func *f; |
6281 |
compop_func *nff; |
6282 |
cpuop_func *nfcf; |
6283 |
int isaddx,cflow; |
6284 |
|
6285 |
if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) |
6286 |
continue; |
6287 |
|
6288 |
if (table68k[opcode].handler != -1) { |
6289 |
f = compfunctbl[cft_map(table68k[opcode].handler)]; |
6290 |
nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; |
6291 |
nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; |
6292 |
cflow = prop[cft_map(table68k[opcode].handler)].cflow; |
6293 |
isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; |
6294 |
prop[cft_map(opcode)].cflow = cflow; |
6295 |
prop[cft_map(opcode)].is_addx = isaddx; |
6296 |
compfunctbl[cft_map(opcode)] = f; |
6297 |
nfcompfunctbl[cft_map(opcode)] = nff; |
6298 |
Dif (nfcf == op_illg_1) |
6299 |
abort(); |
6300 |
nfcpufunctbl[cft_map(opcode)] = nfcf; |
6301 |
} |
6302 |
prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; |
6303 |
prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; |
6304 |
/* Unconditional jumps don't evaluate condition codes, so they |
6305 |
* don't actually use any flags themselves */ |
6306 |
if (prop[cft_map(opcode)].cflow & fl_const_jump) |
6307 |
prop[cft_map(opcode)].use_flags = 0; |
6308 |
} |
6309 |
for (i = 0; nfctbl[i].handler != NULL; i++) { |
6310 |
if (nfctbl[i].specific) |
6311 |
nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; |
6312 |
} |
6313 |
|
6314 |
/* Merge in blacklist */ |
6315 |
if (!merge_blacklist()) |
6316 |
write_log("<JIT compiler> : blacklist merge failure!\n"); |
6317 |
|
6318 |
count=0; |
6319 |
for (opcode = 0; opcode < 65536; opcode++) { |
6320 |
if (compfunctbl[cft_map(opcode)]) |
6321 |
count++; |
6322 |
} |
6323 |
write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count); |
6324 |
|
6325 |
/* Initialise state */ |
6326 |
create_popalls(); |
6327 |
alloc_cache(); |
6328 |
reset_lists(); |
6329 |
|
6330 |
for (i=0;i<TAGSIZE;i+=2) { |
6331 |
cache_tags[i].handler=(cpuop_func *)popall_execute_normal; |
6332 |
cache_tags[i+1].bi=NULL; |
6333 |
} |
6334 |
|
6335 |
#if 0 |
6336 |
for (i=0;i<N_REGS;i++) { |
6337 |
empty_ss.nat[i].holds=-1; |
6338 |
empty_ss.nat[i].validsize=0; |
6339 |
empty_ss.nat[i].dirtysize=0; |
6340 |
} |
6341 |
#endif |
6342 |
for (i=0;i<VREGS;i++) { |
6343 |
empty_ss.virt[i]=L_NEEDED; |
6344 |
} |
6345 |
for (i=0;i<N_REGS;i++) { |
6346 |
empty_ss.nat[i]=L_UNKNOWN; |
6347 |
} |
6348 |
default_ss=empty_ss; |
6349 |
} |
6350 |
|
6351 |
|
6352 |
static void flush_icache_none(int n) |
6353 |
{ |
6354 |
/* Nothing to do. */ |
6355 |
} |
6356 |
|
6357 |
static void flush_icache_hard(int n) |
6358 |
{ |
6359 |
uae_u32 i; |
6360 |
blockinfo* bi, *dbi; |
6361 |
|
6362 |
hard_flush_count++; |
6363 |
#if 0 |
6364 |
write_log("Flush Icache_hard(%d/%x/%p), %u KB\n", |
6365 |
n,regs.pc,regs.pc_p,current_cache_size/1024); |
6366 |
current_cache_size = 0; |
6367 |
#endif |
6368 |
bi=active; |
6369 |
while(bi) { |
6370 |
cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal; |
6371 |
cache_tags[cacheline(bi->pc_p)+1].bi=NULL; |
6372 |
dbi=bi; bi=bi->next; |
6373 |
free_blockinfo(dbi); |
6374 |
} |
6375 |
bi=dormant; |
6376 |
while(bi) { |
6377 |
cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal; |
6378 |
cache_tags[cacheline(bi->pc_p)+1].bi=NULL; |
6379 |
dbi=bi; bi=bi->next; |
6380 |
free_blockinfo(dbi); |
6381 |
} |
6382 |
|
6383 |
reset_lists(); |
6384 |
if (!compiled_code) |
6385 |
return; |
6386 |
current_compile_p=compiled_code; |
6387 |
SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */ |
6388 |
} |
6389 |
|
6390 |
|
6391 |
/* "Soft flushing" --- instead of actually throwing everything away, |
6392 |
we simply mark everything as "needs to be checked". |
6393 |
*/ |
6394 |
|
6395 |
static inline void flush_icache_lazy(int n) |
6396 |
{ |
6397 |
uae_u32 i; |
6398 |
blockinfo* bi; |
6399 |
blockinfo* bi2; |
6400 |
|
6401 |
soft_flush_count++; |
6402 |
if (!active) |
6403 |
return; |
6404 |
|
6405 |
bi=active; |
6406 |
while (bi) { |
6407 |
uae_u32 cl=cacheline(bi->pc_p); |
6408 |
if (bi->status==BI_INVALID || |
6409 |
bi->status==BI_NEED_RECOMP) { |
6410 |
if (bi==cache_tags[cl+1].bi) |
6411 |
cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; |
6412 |
bi->handler_to_use=(cpuop_func *)popall_execute_normal; |
6413 |
set_dhtu(bi,bi->direct_pen); |
6414 |
bi->status=BI_INVALID; |
6415 |
} |
6416 |
else { |
6417 |
if (bi==cache_tags[cl+1].bi) |
6418 |
cache_tags[cl].handler=(cpuop_func *)popall_check_checksum; |
6419 |
bi->handler_to_use=(cpuop_func *)popall_check_checksum; |
6420 |
set_dhtu(bi,bi->direct_pcc); |
6421 |
bi->status=BI_NEED_CHECK; |
6422 |
} |
6423 |
bi2=bi; |
6424 |
bi=bi->next; |
6425 |
} |
6426 |
/* bi2 is now the last entry in the active list */ |
6427 |
bi2->next=dormant; |
6428 |
if (dormant) |
6429 |
dormant->prev_p=&(bi2->next); |
6430 |
|
6431 |
dormant=active; |
6432 |
active->prev_p=&dormant; |
6433 |
active=NULL; |
6434 |
} |
6435 |
|
6436 |
void flush_icache_range(uae_u8 *start_p, uae_u32 length) |
6437 |
{ |
6438 |
if (!active) |
6439 |
return; |
6440 |
|
6441 |
#if LAZY_FLUSH_ICACHE_RANGE |
6442 |
blockinfo *bi = active; |
6443 |
while (bi) { |
6444 |
#if USE_CHECKSUM_INFO |
6445 |
bool candidate = false; |
6446 |
for (checksum_info *csi = bi->csi; csi; csi = csi->next) { |
6447 |
if (((start_p - csi->start_p) < csi->length) || |
6448 |
((csi->start_p - start_p) < length)) { |
6449 |
candidate = true; |
6450 |
break; |
6451 |
} |
6452 |
} |
6453 |
#else |
6454 |
// Assume system is consistent and would invalidate the right range |
6455 |
const bool candidate = (bi->pc_p - start_p) < length; |
6456 |
#endif |
6457 |
blockinfo *dbi = bi; |
6458 |
bi = bi->next; |
6459 |
if (candidate) { |
6460 |
uae_u32 cl = cacheline(dbi->pc_p); |
6461 |
if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) { |
6462 |
if (dbi == cache_tags[cl+1].bi) |
6463 |
cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; |
6464 |
dbi->handler_to_use = (cpuop_func *)popall_execute_normal; |
6465 |
set_dhtu(dbi, dbi->direct_pen); |
6466 |
dbi->status = BI_INVALID; |
6467 |
} |
6468 |
else { |
6469 |
if (dbi == cache_tags[cl+1].bi) |
6470 |
cache_tags[cl].handler = (cpuop_func *)popall_check_checksum; |
6471 |
dbi->handler_to_use = (cpuop_func *)popall_check_checksum; |
6472 |
set_dhtu(dbi, dbi->direct_pcc); |
6473 |
dbi->status = BI_NEED_CHECK; |
6474 |
} |
6475 |
remove_from_list(dbi); |
6476 |
add_to_dormant(dbi); |
6477 |
} |
6478 |
} |
6479 |
return; |
6480 |
#endif |
6481 |
flush_icache(-1); |
6482 |
} |
6483 |
|
6484 |
static void catastrophe(void) |
6485 |
{ |
6486 |
abort(); |
6487 |
} |
6488 |
|
6489 |
int failure; |
6490 |
|
6491 |
#define TARGET_M68K 0 |
6492 |
#define TARGET_POWERPC 1 |
6493 |
#define TARGET_X86 2 |
6494 |
#define TARGET_X86_64 3 |
6495 |
#if defined(i386) || defined(__i386__) |
6496 |
#define TARGET_NATIVE TARGET_X86 |
6497 |
#endif |
6498 |
#if defined(powerpc) || defined(__powerpc__) |
6499 |
#define TARGET_NATIVE TARGET_POWERPC |
6500 |
#endif |
6501 |
#if defined(x86_64) || defined(__x86_64__) |
6502 |
#define TARGET_NATIVE TARGET_X86_64 |
6503 |
#endif |
6504 |
|
6505 |
#ifdef ENABLE_MON |
6506 |
static uae_u32 mon_read_byte_jit(uintptr addr) |
6507 |
{ |
6508 |
uae_u8 *m = (uae_u8 *)addr; |
6509 |
return (uintptr)(*m); |
6510 |
} |
6511 |
|
6512 |
static void mon_write_byte_jit(uintptr addr, uae_u32 b) |
6513 |
{ |
6514 |
uae_u8 *m = (uae_u8 *)addr; |
6515 |
*m = b; |
6516 |
} |
6517 |
#endif |
6518 |
|
6519 |
void disasm_block(int target, uint8 * start, size_t length) |
6520 |
{ |
6521 |
if (!JITDebug) |
6522 |
return; |
6523 |
|
6524 |
#if defined(JIT_DEBUG) && defined(ENABLE_MON) |
6525 |
char disasm_str[200]; |
6526 |
sprintf(disasm_str, "%s $%x $%x", |
6527 |
target == TARGET_M68K ? "d68" : |
6528 |
target == TARGET_X86 ? "d86" : |
6529 |
target == TARGET_X86_64 ? "d8664" : |
6530 |
target == TARGET_POWERPC ? "d" : "x", |
6531 |
start, start + length - 1); |
6532 |
|
6533 |
uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte; |
6534 |
void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte; |
6535 |
|
6536 |
mon_read_byte = mon_read_byte_jit; |
6537 |
mon_write_byte = mon_write_byte_jit; |
6538 |
|
6539 |
char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL}; |
6540 |
mon(4, arg); |
6541 |
|
6542 |
mon_read_byte = old_mon_read_byte; |
6543 |
mon_write_byte = old_mon_write_byte; |
6544 |
#endif |
6545 |
} |
6546 |
|
6547 |
static void disasm_native_block(uint8 *start, size_t length) |
6548 |
{ |
6549 |
disasm_block(TARGET_NATIVE, start, length); |
6550 |
} |
6551 |
|
6552 |
static void disasm_m68k_block(uint8 *start, size_t length) |
6553 |
{ |
6554 |
disasm_block(TARGET_M68K, start, length); |
6555 |
} |
6556 |
|
6557 |
#ifdef HAVE_GET_WORD_UNSWAPPED |
6558 |
# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a))) |
6559 |
#else |
6560 |
# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a))) |
6561 |
#endif |
6562 |
|
6563 |
#if JIT_DEBUG |
6564 |
static uae_u8 *last_regs_pc_p = 0; |
6565 |
static uae_u8 *last_compiled_block_addr = 0; |
6566 |
|
6567 |
void compiler_dumpstate(void) |
6568 |
{ |
6569 |
if (!JITDebug) |
6570 |
return; |
6571 |
|
6572 |
write_log("### Host addresses\n"); |
6573 |
write_log("MEM_BASE : %x\n", MEMBaseDiff); |
6574 |
write_log("PC_P : %p\n", ®s.pc_p); |
6575 |
write_log("SPCFLAGS : %p\n", ®s.spcflags); |
6576 |
write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]); |
6577 |
write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]); |
6578 |
write_log("\n"); |
6579 |
|
6580 |
write_log("### M68k processor state\n"); |
6581 |
m68k_dumpstate(0); |
6582 |
write_log("\n"); |
6583 |
|
6584 |
write_log("### Block in Mac address space\n"); |
6585 |
write_log("M68K block : %p\n", |
6586 |
(void *)(uintptr)get_virtual_address(last_regs_pc_p)); |
6587 |
write_log("Native block : %p (%d bytes)\n", |
6588 |
(void *)(uintptr)get_virtual_address(last_compiled_block_addr), |
6589 |
get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); |
6590 |
write_log("\n"); |
6591 |
} |
6592 |
#endif |
6593 |
|
6594 |
static void compile_block(cpu_history* pc_hist, int blocklen) |
6595 |
{ |
6596 |
if (letit && compiled_code) { |
6597 |
#if PROFILE_COMPILE_TIME |
6598 |
compile_count++; |
6599 |
clock_t start_time = clock(); |
6600 |
#endif |
6601 |
#if JIT_DEBUG |
6602 |
bool disasm_block = false; |
6603 |
#endif |
6604 |
|
6605 |
/* OK, here we need to 'compile' a block */ |
6606 |
int i; |
6607 |
int r; |
6608 |
int was_comp=0; |
6609 |
uae_u8 liveflags[MAXRUN+1]; |
6610 |
#if USE_CHECKSUM_INFO |
6611 |
bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); |
6612 |
uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; |
6613 |
uintptr min_pcp=max_pcp; |
6614 |
#else |
6615 |
uintptr max_pcp=(uintptr)pc_hist[0].location; |
6616 |
uintptr min_pcp=max_pcp; |
6617 |
#endif |
6618 |
uae_u32 cl=cacheline(pc_hist[0].location); |
6619 |
void* specflags=(void*)®s.spcflags; |
6620 |
blockinfo* bi=NULL; |
6621 |
blockinfo* bi2; |
6622 |
int extra_len=0; |
6623 |
|
6624 |
redo_current_block=0; |
6625 |
if (current_compile_p>=max_compile_start) |
6626 |
flush_icache_hard(7); |
6627 |
|
6628 |
alloc_blockinfos(); |
6629 |
|
6630 |
bi=get_blockinfo_addr_new(pc_hist[0].location,0); |
6631 |
bi2=get_blockinfo(cl); |
6632 |
|
6633 |
optlev=bi->optlevel; |
6634 |
if (bi->status!=BI_INVALID) { |
6635 |
Dif (bi!=bi2) { |
6636 |
/* I don't think it can happen anymore. Shouldn't, in |
6637 |
any case. So let's make sure... */ |
6638 |
write_log("WOOOWOO count=%d, ol=%d %p %p\n", |
6639 |
bi->count,bi->optlevel,bi->handler_to_use, |
6640 |
cache_tags[cl].handler); |
6641 |
abort(); |
6642 |
} |
6643 |
|
6644 |
Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { |
6645 |
write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status); |
6646 |
/* What the heck? We are not supposed to be here! */ |
6647 |
abort(); |
6648 |
} |
6649 |
} |
6650 |
if (bi->count==-1) { |
6651 |
optlev++; |
6652 |
while (!optcount[optlev]) |
6653 |
optlev++; |
6654 |
bi->count=optcount[optlev]-1; |
6655 |
} |
6656 |
current_block_pc_p=(uintptr)pc_hist[0].location; |
6657 |
|
6658 |
remove_deps(bi); /* We are about to create new code */ |
6659 |
bi->optlevel=optlev; |
6660 |
bi->pc_p=(uae_u8*)pc_hist[0].location; |
6661 |
#if USE_CHECKSUM_INFO |
6662 |
free_checksum_info_chain(bi->csi); |
6663 |
bi->csi = NULL; |
6664 |
#endif |
6665 |
|
6666 |
liveflags[blocklen]=0x1f; /* All flags needed afterwards */ |
6667 |
i=blocklen; |
6668 |
while (i--) { |
6669 |
uae_u16* currpcp=pc_hist[i].location; |
6670 |
uae_u32 op=DO_GET_OPCODE(currpcp); |
6671 |
|
6672 |
#if USE_CHECKSUM_INFO |
6673 |
trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); |
6674 |
if (follow_const_jumps && is_const_jump(op)) { |
6675 |
checksum_info *csi = alloc_checksum_info(); |
6676 |
csi->start_p = (uae_u8 *)min_pcp; |
6677 |
csi->length = max_pcp - min_pcp + LONGEST_68K_INST; |
6678 |
csi->next = bi->csi; |
6679 |
bi->csi = csi; |
6680 |
max_pcp = (uintptr)currpcp; |
6681 |
} |
6682 |
min_pcp = (uintptr)currpcp; |
6683 |
#else |
6684 |
if ((uintptr)currpcp<min_pcp) |
6685 |
min_pcp=(uintptr)currpcp; |
6686 |
if ((uintptr)currpcp>max_pcp) |
6687 |
max_pcp=(uintptr)currpcp; |
6688 |
#endif |
6689 |
|
6690 |
liveflags[i]=((liveflags[i+1]& |
6691 |
(~prop[op].set_flags))| |
6692 |
prop[op].use_flags); |
6693 |
if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) |
6694 |
liveflags[i]&= ~FLAG_Z; |
6695 |
} |
6696 |
|
6697 |
#if USE_CHECKSUM_INFO |
6698 |
checksum_info *csi = alloc_checksum_info(); |
6699 |
csi->start_p = (uae_u8 *)min_pcp; |
6700 |
csi->length = max_pcp - min_pcp + LONGEST_68K_INST; |
6701 |
csi->next = bi->csi; |
6702 |
bi->csi = csi; |
6703 |
#endif |
6704 |
|
6705 |
bi->needed_flags=liveflags[0]; |
6706 |
|
6707 |
align_target(align_loops); |
6708 |
was_comp=0; |
6709 |
|
6710 |
bi->direct_handler=(cpuop_func *)get_target(); |
6711 |
set_dhtu(bi,bi->direct_handler); |
6712 |
bi->status=BI_COMPILING; |
6713 |
current_block_start_target=(uintptr)get_target(); |
6714 |
|
6715 |
log_startblock(); |
6716 |
|
6717 |
if (bi->count>=0) { /* Need to generate countdown code */ |
6718 |
raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); |
6719 |
raw_sub_l_mi((uintptr)&(bi->count),1); |
6720 |
raw_jl((uintptr)popall_recompile_block); |
6721 |
} |
6722 |
if (optlev==0) { /* No need to actually translate */ |
6723 |
/* Execute normally without keeping stats */ |
6724 |
raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); |
6725 |
raw_jmp((uintptr)popall_exec_nostats); |
6726 |
} |
6727 |
else { |
6728 |
reg_alloc_run=0; |
6729 |
next_pc_p=0; |
6730 |
taken_pc_p=0; |
6731 |
branch_cc=0; |
6732 |
|
6733 |
comp_pc_p=(uae_u8*)pc_hist[0].location; |
6734 |
init_comp(); |
6735 |
was_comp=1; |
6736 |
|
6737 |
#ifdef USE_CPU_EMUL_SERVICES |
6738 |
raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); |
6739 |
raw_jcc_b_oponly(NATIVE_CC_GT); |
6740 |
uae_s8 *branchadd=(uae_s8*)get_target(); |
6741 |
emit_byte(0); |
6742 |
raw_call((uintptr)cpu_do_check_ticks); |
6743 |
*branchadd=(uintptr)get_target()-((uintptr)branchadd+1); |
6744 |
#endif |
6745 |
|
6746 |
#if JIT_DEBUG |
6747 |
if (JITDebug) { |
6748 |
raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); |
6749 |
raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); |
6750 |
} |
6751 |
#endif |
6752 |
|
6753 |
for (i=0;i<blocklen && |
6754 |
get_target_noopt()<max_compile_start;i++) { |
6755 |
cpuop_func **cputbl; |
6756 |
compop_func **comptbl; |
6757 |
uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location); |
6758 |
needed_flags=(liveflags[i+1] & prop[opcode].set_flags); |
6759 |
if (!needed_flags) { |
6760 |
cputbl=nfcpufunctbl; |
6761 |
comptbl=nfcompfunctbl; |
6762 |
} |
6763 |
else { |
6764 |
cputbl=cpufunctbl; |
6765 |
comptbl=compfunctbl; |
6766 |
} |
6767 |
|
6768 |
#if FLIGHT_RECORDER |
6769 |
{ |
6770 |
mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1); |
6771 |
clobber_flags(); |
6772 |
remove_all_offsets(); |
6773 |
int arg = readreg_specific(S1,4,REG_PAR1); |
6774 |
prepare_for_call_1(); |
6775 |
unlock2(arg); |
6776 |
prepare_for_call_2(); |
6777 |
raw_call((uintptr)m68k_record_step); |
6778 |
} |
6779 |
#endif |
6780 |
|
6781 |
failure = 1; // gb-- defaults to failure state |
6782 |
if (comptbl[opcode] && optlev>1) { |
6783 |
failure=0; |
6784 |
if (!was_comp) { |
6785 |
comp_pc_p=(uae_u8*)pc_hist[i].location; |
6786 |
init_comp(); |
6787 |
} |
6788 |
was_comp=1; |
6789 |
|
6790 |
comptbl[opcode](opcode); |
6791 |
freescratch(); |
6792 |
if (!(liveflags[i+1] & FLAG_CZNV)) { |
6793 |
/* We can forget about flags */ |
6794 |
dont_care_flags(); |
6795 |
} |
6796 |
#if INDIVIDUAL_INST |
6797 |
flush(1); |
6798 |
nop(); |
6799 |
flush(1); |
6800 |
was_comp=0; |
6801 |
#endif |
6802 |
} |
6803 |
|
6804 |
if (failure) { |
6805 |
if (was_comp) { |
6806 |
flush(1); |
6807 |
was_comp=0; |
6808 |
} |
6809 |
raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); |
6810 |
#if USE_NORMAL_CALLING_CONVENTION |
6811 |
raw_push_l_r(REG_PAR1); |
6812 |
#endif |
6813 |
raw_mov_l_mi((uintptr)®s.pc_p, |
6814 |
(uintptr)pc_hist[i].location); |
6815 |
raw_call((uintptr)cputbl[opcode]); |
6816 |
#if PROFILE_UNTRANSLATED_INSNS |
6817 |
// raw_cputbl_count[] is indexed with plain opcode (in m68k order) |
6818 |
raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); |
6819 |
#endif |
6820 |
#if USE_NORMAL_CALLING_CONVENTION |
6821 |
raw_inc_sp(4); |
6822 |
#endif |
6823 |
|
6824 |
if (i < blocklen - 1) { |
6825 |
uae_s8* branchadd; |
6826 |
|
6827 |
raw_mov_l_rm(0,(uintptr)specflags); |
6828 |
raw_test_l_rr(0,0); |
6829 |
raw_jz_b_oponly(); |
6830 |
branchadd=(uae_s8 *)get_target(); |
6831 |
emit_byte(0); |
6832 |
raw_jmp((uintptr)popall_do_nothing); |
6833 |
*branchadd=(uintptr)get_target()-(uintptr)branchadd-1; |
6834 |
} |
6835 |
} |
6836 |
} |
6837 |
#if 1 /* This isn't completely kosher yet; It really needs to be |
6838 |
be integrated into a general inter-block-dependency scheme */ |
6839 |
if (next_pc_p && taken_pc_p && |
6840 |
was_comp && taken_pc_p==current_block_pc_p) { |
6841 |
blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); |
6842 |
blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); |
6843 |
uae_u8 x=bi1->needed_flags; |
6844 |
|
6845 |
if (x==0xff || 1) { /* To be on the safe side */ |
6846 |
uae_u16* next=(uae_u16*)next_pc_p; |
6847 |
uae_u32 op=DO_GET_OPCODE(next); |
6848 |
|
6849 |
x=0x1f; |
6850 |
x&=(~prop[op].set_flags); |
6851 |
x|=prop[op].use_flags; |
6852 |
} |
6853 |
|
6854 |
x|=bi2->needed_flags; |
6855 |
if (!(x & FLAG_CZNV)) { |
6856 |
/* We can forget about flags */ |
6857 |
dont_care_flags(); |
6858 |
extra_len+=2; /* The next instruction now is part of this |
6859 |
block */ |
6860 |
} |
6861 |
|
6862 |
} |
6863 |
#endif |
6864 |
log_flush(); |
6865 |
|
6866 |
if (next_pc_p) { /* A branch was registered */ |
6867 |
uintptr t1=next_pc_p; |
6868 |
uintptr t2=taken_pc_p; |
6869 |
int cc=branch_cc; |
6870 |
|
6871 |
uae_u32* branchadd; |
6872 |
uae_u32* tba; |
6873 |
bigstate tmp; |
6874 |
blockinfo* tbi; |
6875 |
|
6876 |
if (taken_pc_p<next_pc_p) { |
6877 |
/* backward branch. Optimize for the "taken" case --- |
6878 |
which means the raw_jcc should fall through when |
6879 |
the 68k branch is taken. */ |
6880 |
t1=taken_pc_p; |
6881 |
t2=next_pc_p; |
6882 |
cc=branch_cc^1; |
6883 |
} |
6884 |
|
6885 |
tmp=live; /* ouch! This is big... */ |
6886 |
raw_jcc_l_oponly(cc); |
6887 |
branchadd=(uae_u32*)get_target(); |
6888 |
emit_long(0); |
6889 |
|
6890 |
/* predicted outcome */ |
6891 |
tbi=get_blockinfo_addr_new((void*)t1,1); |
6892 |
match_states(tbi); |
6893 |
raw_cmp_l_mi((uintptr)specflags,0); |
6894 |
raw_jcc_l_oponly(4); |
6895 |
tba=(uae_u32*)get_target(); |
6896 |
emit_long(get_handler(t1)-((uintptr)tba+4)); |
6897 |
raw_mov_l_mi((uintptr)®s.pc_p,t1); |
6898 |
flush_reg_count(); |
6899 |
raw_jmp((uintptr)popall_do_nothing); |
6900 |
create_jmpdep(bi,0,tba,t1); |
6901 |
|
6902 |
align_target(align_jumps); |
6903 |
/* not-predicted outcome */ |
6904 |
*branchadd=(uintptr)get_target()-((uintptr)branchadd+4); |
6905 |
live=tmp; /* Ouch again */ |
6906 |
tbi=get_blockinfo_addr_new((void*)t2,1); |
6907 |
match_states(tbi); |
6908 |
|
6909 |
//flush(1); /* Can only get here if was_comp==1 */ |
6910 |
raw_cmp_l_mi((uintptr)specflags,0); |
6911 |
raw_jcc_l_oponly(4); |
6912 |
tba=(uae_u32*)get_target(); |
6913 |
emit_long(get_handler(t2)-((uintptr)tba+4)); |
6914 |
raw_mov_l_mi((uintptr)®s.pc_p,t2); |
6915 |
flush_reg_count(); |
6916 |
raw_jmp((uintptr)popall_do_nothing); |
6917 |
create_jmpdep(bi,1,tba,t2); |
6918 |
} |
6919 |
else |
6920 |
{ |
6921 |
if (was_comp) { |
6922 |
flush(1); |
6923 |
} |
6924 |
flush_reg_count(); |
6925 |
|
6926 |
/* Let's find out where next_handler is... */ |
6927 |
if (was_comp && isinreg(PC_P)) { |
6928 |
r=live.state[PC_P].realreg; |
6929 |
raw_and_l_ri(r,TAGMASK); |
6930 |
int r2 = (r==0) ? 1 : 0; |
6931 |
raw_mov_l_ri(r2,(uintptr)popall_do_nothing); |
6932 |
raw_cmp_l_mi((uintptr)specflags,0); |
6933 |
raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ); |
6934 |
raw_jmp_r(r2); |
6935 |
} |
6936 |
else if (was_comp && isconst(PC_P)) { |
6937 |
uae_u32 v=live.state[PC_P].val; |
6938 |
uae_u32* tba; |
6939 |
blockinfo* tbi; |
6940 |
|
6941 |
tbi=get_blockinfo_addr_new((void*)(uintptr)v,1); |
6942 |
match_states(tbi); |
6943 |
|
6944 |
raw_cmp_l_mi((uintptr)specflags,0); |
6945 |
raw_jcc_l_oponly(4); |
6946 |
tba=(uae_u32*)get_target(); |
6947 |
emit_long(get_handler(v)-((uintptr)tba+4)); |
6948 |
raw_mov_l_mi((uintptr)®s.pc_p,v); |
6949 |
raw_jmp((uintptr)popall_do_nothing); |
6950 |
create_jmpdep(bi,0,tba,v); |
6951 |
} |
6952 |
else { |
6953 |
r=REG_PC_TMP; |
6954 |
raw_mov_l_rm(r,(uintptr)®s.pc_p); |
6955 |
raw_and_l_ri(r,TAGMASK); |
6956 |
int r2 = (r==0) ? 1 : 0; |
6957 |
raw_mov_l_ri(r2,(uintptr)popall_do_nothing); |
6958 |
raw_cmp_l_mi((uintptr)specflags,0); |
6959 |
raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ); |
6960 |
raw_jmp_r(r2); |
6961 |
} |
6962 |
} |
6963 |
} |
6964 |
|
6965 |
#if USE_MATCH |
6966 |
if (callers_need_recompile(&live,&(bi->env))) { |
6967 |
mark_callers_recompile(bi); |
6968 |
} |
6969 |
|
6970 |
big_to_small_state(&live,&(bi->env)); |
6971 |
#endif |
6972 |
|
6973 |
#if USE_CHECKSUM_INFO |
6974 |
remove_from_list(bi); |
6975 |
if (trace_in_rom) { |
6976 |
// No need to checksum that block trace on cache invalidation |
6977 |
free_checksum_info_chain(bi->csi); |
6978 |
bi->csi = NULL; |
6979 |
add_to_dormant(bi); |
6980 |
} |
6981 |
else { |
6982 |
calc_checksum(bi,&(bi->c1),&(bi->c2)); |
6983 |
add_to_active(bi); |
6984 |
} |
6985 |
#else |
6986 |
if (next_pc_p+extra_len>=max_pcp && |
6987 |
next_pc_p+extra_len<max_pcp+LONGEST_68K_INST) |
6988 |
max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */ |
6989 |
else |
6990 |
max_pcp+=LONGEST_68K_INST; |
6991 |
|
6992 |
bi->len=max_pcp-min_pcp; |
6993 |
bi->min_pcp=min_pcp; |
6994 |
|
6995 |
remove_from_list(bi); |
6996 |
if (isinrom(min_pcp) && isinrom(max_pcp)) { |
6997 |
add_to_dormant(bi); /* No need to checksum it on cache flush. |
6998 |
Please don't start changing ROMs in |
6999 |
flight! */ |
7000 |
} |
7001 |
else { |
7002 |
calc_checksum(bi,&(bi->c1),&(bi->c2)); |
7003 |
add_to_active(bi); |
7004 |
} |
7005 |
#endif |
7006 |
|
7007 |
current_cache_size += get_target() - (uae_u8 *)current_compile_p; |
7008 |
|
7009 |
#if JIT_DEBUG |
7010 |
if (JITDebug) |
7011 |
bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target; |
7012 |
|
7013 |
if (JITDebug && disasm_block) { |
7014 |
uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); |
7015 |
D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen)); |
7016 |
uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; |
7017 |
disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size); |
7018 |
D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location)); |
7019 |
disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size); |
7020 |
getchar(); |
7021 |
} |
7022 |
#endif |
7023 |
|
7024 |
log_dump(); |
7025 |
align_target(align_jumps); |
7026 |
|
7027 |
/* This is the non-direct handler */ |
7028 |
bi->handler= |
7029 |
bi->handler_to_use=(cpuop_func *)get_target(); |
7030 |
raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); |
7031 |
raw_jnz((uintptr)popall_cache_miss); |
7032 |
comp_pc_p=(uae_u8*)pc_hist[0].location; |
7033 |
|
7034 |
bi->status=BI_FINALIZING; |
7035 |
init_comp(); |
7036 |
match_states(bi); |
7037 |
flush(1); |
7038 |
|
7039 |
raw_jmp((uintptr)bi->direct_handler); |
7040 |
|
7041 |
current_compile_p=get_target(); |
7042 |
raise_in_cl_list(bi); |
7043 |
|
7044 |
/* We will flush soon, anyway, so let's do it now */ |
7045 |
if (current_compile_p>=max_compile_start) |
7046 |
flush_icache_hard(7); |
7047 |
|
7048 |
bi->status=BI_ACTIVE; |
7049 |
if (redo_current_block) |
7050 |
block_need_recompile(bi); |
7051 |
|
7052 |
#if PROFILE_COMPILE_TIME |
7053 |
compile_time += (clock() - start_time); |
7054 |
#endif |
7055 |
} |
7056 |
|
7057 |
/* Account for compilation time */ |
7058 |
cpu_do_check_ticks(); |
7059 |
} |
7060 |
|
7061 |
void do_nothing(void) |
7062 |
{ |
7063 |
/* What did you expect this to do? */ |
7064 |
} |
7065 |
|
7066 |
void exec_nostats(void) |
7067 |
{ |
7068 |
for (;;) { |
7069 |
uae_u32 opcode = GET_OPCODE; |
7070 |
#if FLIGHT_RECORDER |
7071 |
m68k_record_step(m68k_getpc()); |
7072 |
#endif |
7073 |
(*cpufunctbl[opcode])(opcode); |
7074 |
cpu_check_ticks(); |
7075 |
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) { |
7076 |
return; /* We will deal with the spcflags in the caller */ |
7077 |
} |
7078 |
} |
7079 |
} |
7080 |
|
7081 |
void execute_normal(void) |
7082 |
{ |
7083 |
if (!check_for_cache_miss()) { |
7084 |
cpu_history pc_hist[MAXRUN]; |
7085 |
int blocklen = 0; |
7086 |
#if REAL_ADDRESSING || DIRECT_ADDRESSING |
7087 |
start_pc_p = regs.pc_p; |
7088 |
start_pc = get_virtual_address(regs.pc_p); |
7089 |
#else |
7090 |
start_pc_p = regs.pc_oldp; |
7091 |
start_pc = regs.pc; |
7092 |
#endif |
7093 |
for (;;) { /* Take note: This is the do-it-normal loop */ |
7094 |
pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p; |
7095 |
uae_u32 opcode = GET_OPCODE; |
7096 |
#if FLIGHT_RECORDER |
7097 |
m68k_record_step(m68k_getpc()); |
7098 |
#endif |
7099 |
(*cpufunctbl[opcode])(opcode); |
7100 |
cpu_check_ticks(); |
7101 |
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) { |
7102 |
compile_block(pc_hist, blocklen); |
7103 |
return; /* We will deal with the spcflags in the caller */ |
7104 |
} |
7105 |
/* No need to check regs.spcflags, because if they were set, |
7106 |
we'd have ended up inside that "if" */ |
7107 |
} |
7108 |
} |
7109 |
} |
7110 |
|
7111 |
typedef void (*compiled_handler)(void); |
7112 |
|
7113 |
static void m68k_do_compile_execute(void) |
7114 |
{ |
7115 |
for (;;) { |
7116 |
((compiled_handler)(pushall_call_handler))(); |
7117 |
/* Whenever we return from that, we should check spcflags */ |
7118 |
if (SPCFLAGS_TEST(SPCFLAG_ALL)) { |
7119 |
if (m68k_do_specialties ()) |
7120 |
return; |
7121 |
} |
7122 |
} |
7123 |
} |
7124 |
|
7125 |
void m68k_compile_execute (void) |
7126 |
{ |
7127 |
for (;;) { |
7128 |
if (quit_program) |
7129 |
break; |
7130 |
m68k_do_compile_execute(); |
7131 |
} |
7132 |
} |